dlaqps.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346
  1. /* dlaqps.f -- translated by f2c (version 20061008).
  2. You must link the resulting object file with libf2c:
  3. on Microsoft Windows system, link with libf2c.lib;
  4. on Linux or Unix systems, link with .../path/to/libf2c.a -lm
  5. or, if you install libf2c.a in a standard place, with -lf2c -lm
  6. -- in that order, at the end of the command line, as in
  7. cc *.o -lf2c -lm
  8. Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
  9. http://www.netlib.org/f2c/libf2c.zip
  10. */
  11. #include "f2c.h"
  12. #include "blaswrap.h"
  13. /* Table of constant values */
  14. static integer c__1 = 1;
  15. static doublereal c_b8 = -1.;
  16. static doublereal c_b9 = 1.;
  17. static doublereal c_b16 = 0.;
  18. /* Subroutine */ int _starpu_dlaqps_(integer *m, integer *n, integer *offset, integer
  19. *nb, integer *kb, doublereal *a, integer *lda, integer *jpvt,
  20. doublereal *tau, doublereal *vn1, doublereal *vn2, doublereal *auxv,
  21. doublereal *f, integer *ldf)
  22. {
  23. /* System generated locals */
  24. integer a_dim1, a_offset, f_dim1, f_offset, i__1, i__2;
  25. doublereal d__1, d__2;
  26. /* Builtin functions */
  27. double sqrt(doublereal);
  28. integer i_dnnt(doublereal *);
  29. /* Local variables */
  30. integer j, k, rk;
  31. doublereal akk;
  32. integer pvt;
  33. doublereal temp;
  34. extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *);
  35. doublereal temp2, tol3z;
  36. extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *,
  37. integer *, doublereal *, doublereal *, integer *, doublereal *,
  38. integer *, doublereal *, doublereal *, integer *),
  39. _starpu_dgemv_(char *, integer *, integer *, doublereal *, doublereal *,
  40. integer *, doublereal *, integer *, doublereal *, doublereal *,
  41. integer *);
  42. integer itemp;
  43. extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *,
  44. doublereal *, integer *);
  45. extern doublereal _starpu_dlamch_(char *);
  46. extern integer _starpu_idamax_(integer *, doublereal *, integer *);
  47. extern /* Subroutine */ int _starpu_dlarfp_(integer *, doublereal *, doublereal *,
  48. integer *, doublereal *);
  49. integer lsticc, lastrk;
  50. /* -- LAPACK auxiliary routine (version 3.2) -- */
  51. /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
  52. /* November 2006 */
  53. /* .. Scalar Arguments .. */
  54. /* .. */
  55. /* .. Array Arguments .. */
  56. /* .. */
  57. /* Purpose */
  58. /* ======= */
  59. /* DLAQPS computes a step of QR factorization with column pivoting */
  60. /* of a real M-by-N matrix A by using Blas-3. It tries to factorize */
  61. /* NB columns from A starting from the row OFFSET+1, and updates all */
  62. /* of the matrix with Blas-3 xGEMM. */
  63. /* In some cases, due to catastrophic cancellations, it cannot */
  64. /* factorize NB columns. Hence, the actual number of factorized */
  65. /* columns is returned in KB. */
  66. /* Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. */
  67. /* Arguments */
  68. /* ========= */
  69. /* M (input) INTEGER */
  70. /* The number of rows of the matrix A. M >= 0. */
  71. /* N (input) INTEGER */
  72. /* The number of columns of the matrix A. N >= 0 */
  73. /* OFFSET (input) INTEGER */
  74. /* The number of rows of A that have been factorized in */
  75. /* previous steps. */
  76. /* NB (input) INTEGER */
  77. /* The number of columns to factorize. */
  78. /* KB (output) INTEGER */
  79. /* The number of columns actually factorized. */
  80. /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */
  81. /* On entry, the M-by-N matrix A. */
  82. /* On exit, block A(OFFSET+1:M,1:KB) is the triangular */
  83. /* factor obtained and block A(1:OFFSET,1:N) has been */
  84. /* accordingly pivoted, but no factorized. */
  85. /* The rest of the matrix, block A(OFFSET+1:M,KB+1:N) has */
  86. /* been updated. */
  87. /* LDA (input) INTEGER */
  88. /* The leading dimension of the array A. LDA >= max(1,M). */
  89. /* JPVT (input/output) INTEGER array, dimension (N) */
  90. /* JPVT(I) = K <==> Column K of the full matrix A has been */
  91. /* permuted into position I in AP. */
  92. /* TAU (output) DOUBLE PRECISION array, dimension (KB) */
  93. /* The scalar factors of the elementary reflectors. */
  94. /* VN1 (input/output) DOUBLE PRECISION array, dimension (N) */
  95. /* The vector with the partial column norms. */
  96. /* VN2 (input/output) DOUBLE PRECISION array, dimension (N) */
  97. /* The vector with the exact column norms. */
  98. /* AUXV (input/output) DOUBLE PRECISION array, dimension (NB) */
  99. /* Auxiliar vector. */
  100. /* F (input/output) DOUBLE PRECISION array, dimension (LDF,NB) */
  101. /* Matrix F' = L*Y'*A. */
  102. /* LDF (input) INTEGER */
  103. /* The leading dimension of the array F. LDF >= max(1,N). */
  104. /* Further Details */
  105. /* =============== */
  106. /* Based on contributions by */
  107. /* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain */
  108. /* X. Sun, Computer Science Dept., Duke University, USA */
  109. /* Partial column norm updating strategy modified by */
  110. /* Z. Drmac and Z. Bujanovic, Dept. of Mathematics, */
  111. /* University of Zagreb, Croatia. */
  112. /* June 2006. */
  113. /* For more details see LAPACK Working Note 176. */
  114. /* ===================================================================== */
  115. /* .. Parameters .. */
  116. /* .. */
  117. /* .. Local Scalars .. */
  118. /* .. */
  119. /* .. External Subroutines .. */
  120. /* .. */
  121. /* .. Intrinsic Functions .. */
  122. /* .. */
  123. /* .. External Functions .. */
  124. /* .. */
  125. /* .. Executable Statements .. */
  126. /* Parameter adjustments */
  127. a_dim1 = *lda;
  128. a_offset = 1 + a_dim1;
  129. a -= a_offset;
  130. --jpvt;
  131. --tau;
  132. --vn1;
  133. --vn2;
  134. --auxv;
  135. f_dim1 = *ldf;
  136. f_offset = 1 + f_dim1;
  137. f -= f_offset;
  138. /* Function Body */
  139. /* Computing MIN */
  140. i__1 = *m, i__2 = *n + *offset;
  141. lastrk = min(i__1,i__2);
  142. lsticc = 0;
  143. k = 0;
  144. tol3z = sqrt(_starpu_dlamch_("Epsilon"));
  145. /* Beginning of while loop. */
  146. L10:
  147. if (k < *nb && lsticc == 0) {
  148. ++k;
  149. rk = *offset + k;
  150. /* Determine ith pivot column and swap if necessary */
  151. i__1 = *n - k + 1;
  152. pvt = k - 1 + _starpu_idamax_(&i__1, &vn1[k], &c__1);
  153. if (pvt != k) {
  154. _starpu_dswap_(m, &a[pvt * a_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &c__1);
  155. i__1 = k - 1;
  156. _starpu_dswap_(&i__1, &f[pvt + f_dim1], ldf, &f[k + f_dim1], ldf);
  157. itemp = jpvt[pvt];
  158. jpvt[pvt] = jpvt[k];
  159. jpvt[k] = itemp;
  160. vn1[pvt] = vn1[k];
  161. vn2[pvt] = vn2[k];
  162. }
  163. /* Apply previous Householder reflectors to column K: */
  164. /* A(RK:M,K) := A(RK:M,K) - A(RK:M,1:K-1)*F(K,1:K-1)'. */
  165. if (k > 1) {
  166. i__1 = *m - rk + 1;
  167. i__2 = k - 1;
  168. _starpu_dgemv_("No transpose", &i__1, &i__2, &c_b8, &a[rk + a_dim1], lda,
  169. &f[k + f_dim1], ldf, &c_b9, &a[rk + k * a_dim1], &c__1);
  170. }
  171. /* Generate elementary reflector H(k). */
  172. if (rk < *m) {
  173. i__1 = *m - rk + 1;
  174. _starpu_dlarfp_(&i__1, &a[rk + k * a_dim1], &a[rk + 1 + k * a_dim1], &
  175. c__1, &tau[k]);
  176. } else {
  177. _starpu_dlarfp_(&c__1, &a[rk + k * a_dim1], &a[rk + k * a_dim1], &c__1, &
  178. tau[k]);
  179. }
  180. akk = a[rk + k * a_dim1];
  181. a[rk + k * a_dim1] = 1.;
  182. /* Compute Kth column of F: */
  183. /* Compute F(K+1:N,K) := tau(K)*A(RK:M,K+1:N)'*A(RK:M,K). */
  184. if (k < *n) {
  185. i__1 = *m - rk + 1;
  186. i__2 = *n - k;
  187. _starpu_dgemv_("Transpose", &i__1, &i__2, &tau[k], &a[rk + (k + 1) *
  188. a_dim1], lda, &a[rk + k * a_dim1], &c__1, &c_b16, &f[k +
  189. 1 + k * f_dim1], &c__1);
  190. }
  191. /* Padding F(1:K,K) with zeros. */
  192. i__1 = k;
  193. for (j = 1; j <= i__1; ++j) {
  194. f[j + k * f_dim1] = 0.;
  195. /* L20: */
  196. }
  197. /* Incremental updating of F: */
  198. /* F(1:N,K) := F(1:N,K) - tau(K)*F(1:N,1:K-1)*A(RK:M,1:K-1)' */
  199. /* *A(RK:M,K). */
  200. if (k > 1) {
  201. i__1 = *m - rk + 1;
  202. i__2 = k - 1;
  203. d__1 = -tau[k];
  204. _starpu_dgemv_("Transpose", &i__1, &i__2, &d__1, &a[rk + a_dim1], lda, &a[
  205. rk + k * a_dim1], &c__1, &c_b16, &auxv[1], &c__1);
  206. i__1 = k - 1;
  207. _starpu_dgemv_("No transpose", n, &i__1, &c_b9, &f[f_dim1 + 1], ldf, &
  208. auxv[1], &c__1, &c_b9, &f[k * f_dim1 + 1], &c__1);
  209. }
  210. /* Update the current row of A: */
  211. /* A(RK,K+1:N) := A(RK,K+1:N) - A(RK,1:K)*F(K+1:N,1:K)'. */
  212. if (k < *n) {
  213. i__1 = *n - k;
  214. _starpu_dgemv_("No transpose", &i__1, &k, &c_b8, &f[k + 1 + f_dim1], ldf,
  215. &a[rk + a_dim1], lda, &c_b9, &a[rk + (k + 1) * a_dim1],
  216. lda);
  217. }
  218. /* Update partial column norms. */
  219. if (rk < lastrk) {
  220. i__1 = *n;
  221. for (j = k + 1; j <= i__1; ++j) {
  222. if (vn1[j] != 0.) {
  223. /* NOTE: The following 4 lines follow from the analysis in */
  224. /* Lapack Working Note 176. */
  225. temp = (d__1 = a[rk + j * a_dim1], abs(d__1)) / vn1[j];
  226. /* Computing MAX */
  227. d__1 = 0., d__2 = (temp + 1.) * (1. - temp);
  228. temp = max(d__1,d__2);
  229. /* Computing 2nd power */
  230. d__1 = vn1[j] / vn2[j];
  231. temp2 = temp * (d__1 * d__1);
  232. if (temp2 <= tol3z) {
  233. vn2[j] = (doublereal) lsticc;
  234. lsticc = j;
  235. } else {
  236. vn1[j] *= sqrt(temp);
  237. }
  238. }
  239. /* L30: */
  240. }
  241. }
  242. a[rk + k * a_dim1] = akk;
  243. /* End of while loop. */
  244. goto L10;
  245. }
  246. *kb = k;
  247. rk = *offset + *kb;
  248. /* Apply the block reflector to the rest of the matrix: */
  249. /* A(OFFSET+KB+1:M,KB+1:N) := A(OFFSET+KB+1:M,KB+1:N) - */
  250. /* A(OFFSET+KB+1:M,1:KB)*F(KB+1:N,1:KB)'. */
  251. /* Computing MIN */
  252. i__1 = *n, i__2 = *m - *offset;
  253. if (*kb < min(i__1,i__2)) {
  254. i__1 = *m - rk;
  255. i__2 = *n - *kb;
  256. _starpu_dgemm_("No transpose", "Transpose", &i__1, &i__2, kb, &c_b8, &a[rk +
  257. 1 + a_dim1], lda, &f[*kb + 1 + f_dim1], ldf, &c_b9, &a[rk + 1
  258. + (*kb + 1) * a_dim1], lda);
  259. }
  260. /* Recomputation of difficult columns. */
  261. L40:
  262. if (lsticc > 0) {
  263. itemp = i_dnnt(&vn2[lsticc]);
  264. i__1 = *m - rk;
  265. vn1[lsticc] = _starpu_dnrm2_(&i__1, &a[rk + 1 + lsticc * a_dim1], &c__1);
  266. /* NOTE: The computation of VN1( LSTICC ) relies on the fact that */
  267. /* SNRM2 does not fail on vectors with norm below the value of */
  268. /* SQRT(DLAMCH('S')) */
  269. vn2[lsticc] = vn1[lsticc];
  270. lsticc = itemp;
  271. goto L40;
  272. }
  273. return 0;
  274. /* End of DLAQPS */
  275. } /* _starpu_dlaqps_ */