dgelsd.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694
  1. /* dgelsd.f -- translated by f2c (version 20061008).
  2. You must link the resulting object file with libf2c:
  3. on Microsoft Windows system, link with libf2c.lib;
  4. on Linux or Unix systems, link with .../path/to/libf2c.a -lm
  5. or, if you install libf2c.a in a standard place, with -lf2c -lm
  6. -- in that order, at the end of the command line, as in
  7. cc *.o -lf2c -lm
  8. Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
  9. http://www.netlib.org/f2c/libf2c.zip
  10. */
  11. #include "f2c.h"
  12. #include "blaswrap.h"
  13. /* Table of constant values */
  14. static integer c__6 = 6;
  15. static integer c_n1 = -1;
  16. static integer c__9 = 9;
  17. static integer c__0 = 0;
  18. static integer c__1 = 1;
  19. static doublereal c_b82 = 0.;
  20. /* Subroutine */ int _starpu_dgelsd_(integer *m, integer *n, integer *nrhs,
  21. doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *
  22. s, doublereal *rcond, integer *rank, doublereal *work, integer *lwork,
  23. integer *iwork, integer *info)
  24. {
  25. /* System generated locals */
  26. integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
  27. /* Builtin functions */
  28. double log(doublereal);
  29. /* Local variables */
  30. integer ie, il, mm;
  31. doublereal eps, anrm, bnrm;
  32. integer itau, nlvl, iascl, ibscl;
  33. doublereal sfmin;
  34. integer minmn, maxmn, itaup, itauq, mnthr, nwork;
  35. extern /* Subroutine */ int _starpu_dlabad_(doublereal *, doublereal *), _starpu_dgebrd_(
  36. integer *, integer *, doublereal *, integer *, doublereal *,
  37. doublereal *, doublereal *, doublereal *, doublereal *, integer *,
  38. integer *);
  39. extern doublereal _starpu_dlamch_(char *), _starpu_dlange_(char *, integer *,
  40. integer *, doublereal *, integer *, doublereal *);
  41. extern /* Subroutine */ int _starpu_dgelqf_(integer *, integer *, doublereal *,
  42. integer *, doublereal *, doublereal *, integer *, integer *),
  43. _starpu_dlalsd_(char *, integer *, integer *, integer *, doublereal *,
  44. doublereal *, doublereal *, integer *, doublereal *, integer *,
  45. doublereal *, integer *, integer *), _starpu_dlascl_(char *,
  46. integer *, integer *, doublereal *, doublereal *, integer *,
  47. integer *, doublereal *, integer *, integer *), _starpu_dgeqrf_(
  48. integer *, integer *, doublereal *, integer *, doublereal *,
  49. doublereal *, integer *, integer *), _starpu_dlacpy_(char *, integer *,
  50. integer *, doublereal *, integer *, doublereal *, integer *), _starpu_dlaset_(char *, integer *, integer *, doublereal *,
  51. doublereal *, doublereal *, integer *), _starpu_xerbla_(char *,
  52. integer *);
  53. extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *,
  54. integer *, integer *);
  55. doublereal bignum;
  56. extern /* Subroutine */ int _starpu_dormbr_(char *, char *, char *, integer *,
  57. integer *, integer *, doublereal *, integer *, doublereal *,
  58. doublereal *, integer *, doublereal *, integer *, integer *);
  59. integer wlalsd;
  60. extern /* Subroutine */ int _starpu_dormlq_(char *, char *, integer *, integer *,
  61. integer *, doublereal *, integer *, doublereal *, doublereal *,
  62. integer *, doublereal *, integer *, integer *);
  63. integer ldwork;
  64. extern /* Subroutine */ int _starpu_dormqr_(char *, char *, integer *, integer *,
  65. integer *, doublereal *, integer *, doublereal *, doublereal *,
  66. integer *, doublereal *, integer *, integer *);
  67. integer minwrk, maxwrk;
  68. doublereal smlnum;
  69. logical lquery;
  70. integer smlsiz;
  71. /* -- LAPACK driver routine (version 3.2) -- */
  72. /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
  73. /* November 2006 */
  74. /* .. Scalar Arguments .. */
  75. /* .. */
  76. /* .. Array Arguments .. */
  77. /* .. */
  78. /* Purpose */
  79. /* ======= */
  80. /* DGELSD computes the minimum-norm solution to a real linear least */
  81. /* squares problem: */
  82. /* minimize 2-norm(| b - A*x |) */
  83. /* using the singular value decomposition (SVD) of A. A is an M-by-N */
  84. /* matrix which may be rank-deficient. */
  85. /* Several right hand side vectors b and solution vectors x can be */
  86. /* handled in a single call; they are stored as the columns of the */
  87. /* M-by-NRHS right hand side matrix B and the N-by-NRHS solution */
  88. /* matrix X. */
  89. /* The problem is solved in three steps: */
  90. /* (1) Reduce the coefficient matrix A to bidiagonal form with */
  91. /* Householder transformations, reducing the original problem */
  92. /* into a "bidiagonal least squares problem" (BLS) */
  93. /* (2) Solve the BLS using a divide and conquer approach. */
  94. /* (3) Apply back all the Householder tranformations to solve */
  95. /* the original least squares problem. */
  96. /* The effective rank of A is determined by treating as zero those */
  97. /* singular values which are less than RCOND times the largest singular */
  98. /* value. */
  99. /* The divide and conquer algorithm makes very mild assumptions about */
  100. /* floating point arithmetic. It will work on machines with a guard */
  101. /* digit in add/subtract, or on those binary machines without guard */
  102. /* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */
  103. /* Cray-2. It could conceivably fail on hexadecimal or decimal machines */
  104. /* without guard digits, but we know of none. */
  105. /* Arguments */
  106. /* ========= */
  107. /* M (input) INTEGER */
  108. /* The number of rows of A. M >= 0. */
  109. /* N (input) INTEGER */
  110. /* The number of columns of A. N >= 0. */
  111. /* NRHS (input) INTEGER */
  112. /* The number of right hand sides, i.e., the number of columns */
  113. /* of the matrices B and X. NRHS >= 0. */
  114. /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */
  115. /* On entry, the M-by-N matrix A. */
  116. /* On exit, A has been destroyed. */
  117. /* LDA (input) INTEGER */
  118. /* The leading dimension of the array A. LDA >= max(1,M). */
  119. /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */
  120. /* On entry, the M-by-NRHS right hand side matrix B. */
  121. /* On exit, B is overwritten by the N-by-NRHS solution */
  122. /* matrix X. If m >= n and RANK = n, the residual */
  123. /* sum-of-squares for the solution in the i-th column is given */
  124. /* by the sum of squares of elements n+1:m in that column. */
  125. /* LDB (input) INTEGER */
  126. /* The leading dimension of the array B. LDB >= max(1,max(M,N)). */
  127. /* S (output) DOUBLE PRECISION array, dimension (min(M,N)) */
  128. /* The singular values of A in decreasing order. */
  129. /* The condition number of A in the 2-norm = S(1)/S(min(m,n)). */
  130. /* RCOND (input) DOUBLE PRECISION */
  131. /* RCOND is used to determine the effective rank of A. */
  132. /* Singular values S(i) <= RCOND*S(1) are treated as zero. */
  133. /* If RCOND < 0, machine precision is used instead. */
  134. /* RANK (output) INTEGER */
  135. /* The effective rank of A, i.e., the number of singular values */
  136. /* which are greater than RCOND*S(1). */
  137. /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
  138. /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
  139. /* LWORK (input) INTEGER */
  140. /* The dimension of the array WORK. LWORK must be at least 1. */
  141. /* The exact minimum amount of workspace needed depends on M, */
  142. /* N and NRHS. As long as LWORK is at least */
  143. /* 12*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2, */
  144. /* if M is greater than or equal to N or */
  145. /* 12*M + 2*M*SMLSIZ + 8*M*NLVL + M*NRHS + (SMLSIZ+1)**2, */
  146. /* if M is less than N, the code will execute correctly. */
  147. /* SMLSIZ is returned by ILAENV and is equal to the maximum */
  148. /* size of the subproblems at the bottom of the computation */
  149. /* tree (usually about 25), and */
  150. /* NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) */
  151. /* For good performance, LWORK should generally be larger. */
  152. /* If LWORK = -1, then a workspace query is assumed; the routine */
  153. /* only calculates the optimal size of the WORK array, returns */
  154. /* this value as the first entry of the WORK array, and no error */
  155. /* message related to LWORK is issued by XERBLA. */
  156. /* IWORK (workspace) INTEGER array, dimension (MAX(1,LIWORK)) */
  157. /* LIWORK >= 3 * MINMN * NLVL + 11 * MINMN, */
  158. /* where MINMN = MIN( M,N ). */
  159. /* INFO (output) INTEGER */
  160. /* = 0: successful exit */
  161. /* < 0: if INFO = -i, the i-th argument had an illegal value. */
  162. /* > 0: the algorithm for computing the SVD failed to converge; */
  163. /* if INFO = i, i off-diagonal elements of an intermediate */
  164. /* bidiagonal form did not converge to zero. */
  165. /* Further Details */
  166. /* =============== */
  167. /* Based on contributions by */
  168. /* Ming Gu and Ren-Cang Li, Computer Science Division, University of */
  169. /* California at Berkeley, USA */
  170. /* Osni Marques, LBNL/NERSC, USA */
  171. /* ===================================================================== */
  172. /* .. Parameters .. */
  173. /* .. */
  174. /* .. Local Scalars .. */
  175. /* .. */
  176. /* .. External Subroutines .. */
  177. /* .. */
  178. /* .. External Functions .. */
  179. /* .. */
  180. /* .. Intrinsic Functions .. */
  181. /* .. */
  182. /* .. Executable Statements .. */
  183. /* Test the input arguments. */
  184. /* Parameter adjustments */
  185. a_dim1 = *lda;
  186. a_offset = 1 + a_dim1;
  187. a -= a_offset;
  188. b_dim1 = *ldb;
  189. b_offset = 1 + b_dim1;
  190. b -= b_offset;
  191. --s;
  192. --work;
  193. --iwork;
  194. /* Function Body */
  195. *info = 0;
  196. minmn = min(*m,*n);
  197. maxmn = max(*m,*n);
  198. mnthr = _starpu_ilaenv_(&c__6, "DGELSD", " ", m, n, nrhs, &c_n1);
  199. lquery = *lwork == -1;
  200. if (*m < 0) {
  201. *info = -1;
  202. } else if (*n < 0) {
  203. *info = -2;
  204. } else if (*nrhs < 0) {
  205. *info = -3;
  206. } else if (*lda < max(1,*m)) {
  207. *info = -5;
  208. } else if (*ldb < max(1,maxmn)) {
  209. *info = -7;
  210. }
  211. smlsiz = _starpu_ilaenv_(&c__9, "DGELSD", " ", &c__0, &c__0, &c__0, &c__0);
  212. /* Compute workspace. */
  213. /* (Note: Comments in the code beginning "Workspace:" describe the */
  214. /* minimal amount of workspace needed at that point in the code, */
  215. /* as well as the preferred amount for good performance. */
  216. /* NB refers to the optimal block size for the immediately */
  217. /* following subroutine, as returned by ILAENV.) */
  218. minwrk = 1;
  219. minmn = max(1,minmn);
  220. /* Computing MAX */
  221. i__1 = (integer) (log((doublereal) minmn / (doublereal) (smlsiz + 1)) /
  222. log(2.)) + 1;
  223. nlvl = max(i__1,0);
  224. if (*info == 0) {
  225. maxwrk = 0;
  226. mm = *m;
  227. if (*m >= *n && *m >= mnthr) {
  228. /* Path 1a - overdetermined, with many more rows than columns. */
  229. mm = *n;
  230. /* Computing MAX */
  231. i__1 = maxwrk, i__2 = *n + *n * _starpu_ilaenv_(&c__1, "DGEQRF", " ", m,
  232. n, &c_n1, &c_n1);
  233. maxwrk = max(i__1,i__2);
  234. /* Computing MAX */
  235. i__1 = maxwrk, i__2 = *n + *nrhs * _starpu_ilaenv_(&c__1, "DORMQR", "LT",
  236. m, nrhs, n, &c_n1);
  237. maxwrk = max(i__1,i__2);
  238. }
  239. if (*m >= *n) {
  240. /* Path 1 - overdetermined or exactly determined. */
  241. /* Computing MAX */
  242. i__1 = maxwrk, i__2 = *n * 3 + (mm + *n) * _starpu_ilaenv_(&c__1, "DGEBRD"
  243. , " ", &mm, n, &c_n1, &c_n1);
  244. maxwrk = max(i__1,i__2);
  245. /* Computing MAX */
  246. i__1 = maxwrk, i__2 = *n * 3 + *nrhs * _starpu_ilaenv_(&c__1, "DORMBR",
  247. "QLT", &mm, nrhs, n, &c_n1);
  248. maxwrk = max(i__1,i__2);
  249. /* Computing MAX */
  250. i__1 = maxwrk, i__2 = *n * 3 + (*n - 1) * _starpu_ilaenv_(&c__1, "DORMBR",
  251. "PLN", n, nrhs, n, &c_n1);
  252. maxwrk = max(i__1,i__2);
  253. /* Computing 2nd power */
  254. i__1 = smlsiz + 1;
  255. wlalsd = *n * 9 + (*n << 1) * smlsiz + (*n << 3) * nlvl + *n * *
  256. nrhs + i__1 * i__1;
  257. /* Computing MAX */
  258. i__1 = maxwrk, i__2 = *n * 3 + wlalsd;
  259. maxwrk = max(i__1,i__2);
  260. /* Computing MAX */
  261. i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = max(i__1,i__2),
  262. i__2 = *n * 3 + wlalsd;
  263. minwrk = max(i__1,i__2);
  264. }
  265. if (*n > *m) {
  266. /* Computing 2nd power */
  267. i__1 = smlsiz + 1;
  268. wlalsd = *m * 9 + (*m << 1) * smlsiz + (*m << 3) * nlvl + *m * *
  269. nrhs + i__1 * i__1;
  270. if (*n >= mnthr) {
  271. /* Path 2a - underdetermined, with many more columns */
  272. /* than rows. */
  273. maxwrk = *m + *m * _starpu_ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1,
  274. &c_n1);
  275. /* Computing MAX */
  276. i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m << 1) *
  277. _starpu_ilaenv_(&c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1);
  278. maxwrk = max(i__1,i__2);
  279. /* Computing MAX */
  280. i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *nrhs * _starpu_ilaenv_(&
  281. c__1, "DORMBR", "QLT", m, nrhs, m, &c_n1);
  282. maxwrk = max(i__1,i__2);
  283. /* Computing MAX */
  284. i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m - 1) *
  285. _starpu_ilaenv_(&c__1, "DORMBR", "PLN", m, nrhs, m, &c_n1);
  286. maxwrk = max(i__1,i__2);
  287. if (*nrhs > 1) {
  288. /* Computing MAX */
  289. i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs;
  290. maxwrk = max(i__1,i__2);
  291. } else {
  292. /* Computing MAX */
  293. i__1 = maxwrk, i__2 = *m * *m + (*m << 1);
  294. maxwrk = max(i__1,i__2);
  295. }
  296. /* Computing MAX */
  297. i__1 = maxwrk, i__2 = *m + *nrhs * _starpu_ilaenv_(&c__1, "DORMLQ",
  298. "LT", n, nrhs, m, &c_n1);
  299. maxwrk = max(i__1,i__2);
  300. /* Computing MAX */
  301. i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + wlalsd;
  302. maxwrk = max(i__1,i__2);
  303. /* XXX: Ensure the Path 2a case below is triggered. The workspace */
  304. /* calculation should use queries for all routines eventually. */
  305. /* Computing MAX */
  306. /* Computing MAX */
  307. i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), i__3 =
  308. max(i__3,*nrhs), i__4 = *n - *m * 3;
  309. i__1 = maxwrk, i__2 = (*m << 2) + *m * *m + max(i__3,i__4);
  310. maxwrk = max(i__1,i__2);
  311. } else {
  312. /* Path 2 - remaining underdetermined cases. */
  313. maxwrk = *m * 3 + (*n + *m) * _starpu_ilaenv_(&c__1, "DGEBRD", " ", m,
  314. n, &c_n1, &c_n1);
  315. /* Computing MAX */
  316. i__1 = maxwrk, i__2 = *m * 3 + *nrhs * _starpu_ilaenv_(&c__1, "DORMBR"
  317. , "QLT", m, nrhs, n, &c_n1);
  318. maxwrk = max(i__1,i__2);
  319. /* Computing MAX */
  320. i__1 = maxwrk, i__2 = *m * 3 + *m * _starpu_ilaenv_(&c__1, "DORMBR",
  321. "PLN", n, nrhs, m, &c_n1);
  322. maxwrk = max(i__1,i__2);
  323. /* Computing MAX */
  324. i__1 = maxwrk, i__2 = *m * 3 + wlalsd;
  325. maxwrk = max(i__1,i__2);
  326. }
  327. /* Computing MAX */
  328. i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *m, i__1 = max(i__1,i__2),
  329. i__2 = *m * 3 + wlalsd;
  330. minwrk = max(i__1,i__2);
  331. }
  332. minwrk = min(minwrk,maxwrk);
  333. work[1] = (doublereal) maxwrk;
  334. if (*lwork < minwrk && ! lquery) {
  335. *info = -12;
  336. }
  337. }
  338. if (*info != 0) {
  339. i__1 = -(*info);
  340. _starpu_xerbla_("DGELSD", &i__1);
  341. return 0;
  342. } else if (lquery) {
  343. goto L10;
  344. }
  345. /* Quick return if possible. */
  346. if (*m == 0 || *n == 0) {
  347. *rank = 0;
  348. return 0;
  349. }
  350. /* Get machine parameters. */
  351. eps = _starpu_dlamch_("P");
  352. sfmin = _starpu_dlamch_("S");
  353. smlnum = sfmin / eps;
  354. bignum = 1. / smlnum;
  355. _starpu_dlabad_(&smlnum, &bignum);
  356. /* Scale A if max entry outside range [SMLNUM,BIGNUM]. */
  357. anrm = _starpu_dlange_("M", m, n, &a[a_offset], lda, &work[1]);
  358. iascl = 0;
  359. if (anrm > 0. && anrm < smlnum) {
  360. /* Scale matrix norm up to SMLNUM. */
  361. _starpu_dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda,
  362. info);
  363. iascl = 1;
  364. } else if (anrm > bignum) {
  365. /* Scale matrix norm down to BIGNUM. */
  366. _starpu_dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda,
  367. info);
  368. iascl = 2;
  369. } else if (anrm == 0.) {
  370. /* Matrix all zero. Return zero solution. */
  371. i__1 = max(*m,*n);
  372. _starpu_dlaset_("F", &i__1, nrhs, &c_b82, &c_b82, &b[b_offset], ldb);
  373. _starpu_dlaset_("F", &minmn, &c__1, &c_b82, &c_b82, &s[1], &c__1);
  374. *rank = 0;
  375. goto L10;
  376. }
  377. /* Scale B if max entry outside range [SMLNUM,BIGNUM]. */
  378. bnrm = _starpu_dlange_("M", m, nrhs, &b[b_offset], ldb, &work[1]);
  379. ibscl = 0;
  380. if (bnrm > 0. && bnrm < smlnum) {
  381. /* Scale matrix norm up to SMLNUM. */
  382. _starpu_dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb,
  383. info);
  384. ibscl = 1;
  385. } else if (bnrm > bignum) {
  386. /* Scale matrix norm down to BIGNUM. */
  387. _starpu_dlascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb,
  388. info);
  389. ibscl = 2;
  390. }
  391. /* If M < N make sure certain entries of B are zero. */
  392. if (*m < *n) {
  393. i__1 = *n - *m;
  394. _starpu_dlaset_("F", &i__1, nrhs, &c_b82, &c_b82, &b[*m + 1 + b_dim1], ldb);
  395. }
  396. /* Overdetermined case. */
  397. if (*m >= *n) {
  398. /* Path 1 - overdetermined or exactly determined. */
  399. mm = *m;
  400. if (*m >= mnthr) {
  401. /* Path 1a - overdetermined, with many more rows than columns. */
  402. mm = *n;
  403. itau = 1;
  404. nwork = itau + *n;
  405. /* Compute A=Q*R. */
  406. /* (Workspace: need 2*N, prefer N+N*NB) */
  407. i__1 = *lwork - nwork + 1;
  408. _starpu_dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1,
  409. info);
  410. /* Multiply B by transpose(Q). */
  411. /* (Workspace: need N+NRHS, prefer N+NRHS*NB) */
  412. i__1 = *lwork - nwork + 1;
  413. _starpu_dormqr_("L", "T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[
  414. b_offset], ldb, &work[nwork], &i__1, info);
  415. /* Zero out below R. */
  416. if (*n > 1) {
  417. i__1 = *n - 1;
  418. i__2 = *n - 1;
  419. _starpu_dlaset_("L", &i__1, &i__2, &c_b82, &c_b82, &a[a_dim1 + 2],
  420. lda);
  421. }
  422. }
  423. ie = 1;
  424. itauq = ie + *n;
  425. itaup = itauq + *n;
  426. nwork = itaup + *n;
  427. /* Bidiagonalize R in A. */
  428. /* (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB) */
  429. i__1 = *lwork - nwork + 1;
  430. _starpu_dgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
  431. work[itaup], &work[nwork], &i__1, info);
  432. /* Multiply B by transpose of left bidiagonalizing vectors of R. */
  433. /* (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB) */
  434. i__1 = *lwork - nwork + 1;
  435. _starpu_dormbr_("Q", "L", "T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq],
  436. &b[b_offset], ldb, &work[nwork], &i__1, info);
  437. /* Solve the bidiagonal least squares problem. */
  438. _starpu_dlalsd_("U", &smlsiz, n, nrhs, &s[1], &work[ie], &b[b_offset], ldb,
  439. rcond, rank, &work[nwork], &iwork[1], info);
  440. if (*info != 0) {
  441. goto L10;
  442. }
  443. /* Multiply B by right bidiagonalizing vectors of R. */
  444. i__1 = *lwork - nwork + 1;
  445. _starpu_dormbr_("P", "L", "N", n, nrhs, n, &a[a_offset], lda, &work[itaup], &
  446. b[b_offset], ldb, &work[nwork], &i__1, info);
  447. } else /* if(complicated condition) */ {
  448. /* Computing MAX */
  449. i__1 = *m, i__2 = (*m << 1) - 4, i__1 = max(i__1,i__2), i__1 = max(
  450. i__1,*nrhs), i__2 = *n - *m * 3, i__1 = max(i__1,i__2);
  451. if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__1,wlalsd)) {
  452. /* Path 2a - underdetermined, with many more columns than rows */
  453. /* and sufficient workspace for an efficient algorithm. */
  454. ldwork = *m;
  455. /* Computing MAX */
  456. /* Computing MAX */
  457. i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), i__3 =
  458. max(i__3,*nrhs), i__4 = *n - *m * 3;
  459. i__1 = (*m << 2) + *m * *lda + max(i__3,i__4), i__2 = *m * *lda +
  460. *m + *m * *nrhs, i__1 = max(i__1,i__2), i__2 = (*m << 2)
  461. + *m * *lda + wlalsd;
  462. if (*lwork >= max(i__1,i__2)) {
  463. ldwork = *lda;
  464. }
  465. itau = 1;
  466. nwork = *m + 1;
  467. /* Compute A=L*Q. */
  468. /* (Workspace: need 2*M, prefer M+M*NB) */
  469. i__1 = *lwork - nwork + 1;
  470. _starpu_dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1,
  471. info);
  472. il = nwork;
  473. /* Copy L to WORK(IL), zeroing out above its diagonal. */
  474. _starpu_dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork);
  475. i__1 = *m - 1;
  476. i__2 = *m - 1;
  477. _starpu_dlaset_("U", &i__1, &i__2, &c_b82, &c_b82, &work[il + ldwork], &
  478. ldwork);
  479. ie = il + ldwork * *m;
  480. itauq = ie + *m;
  481. itaup = itauq + *m;
  482. nwork = itaup + *m;
  483. /* Bidiagonalize L in WORK(IL). */
  484. /* (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB) */
  485. i__1 = *lwork - nwork + 1;
  486. _starpu_dgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq],
  487. &work[itaup], &work[nwork], &i__1, info);
  488. /* Multiply B by transpose of left bidiagonalizing vectors of L. */
  489. /* (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) */
  490. i__1 = *lwork - nwork + 1;
  491. _starpu_dormbr_("Q", "L", "T", m, nrhs, m, &work[il], &ldwork, &work[
  492. itauq], &b[b_offset], ldb, &work[nwork], &i__1, info);
  493. /* Solve the bidiagonal least squares problem. */
  494. _starpu_dlalsd_("U", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset],
  495. ldb, rcond, rank, &work[nwork], &iwork[1], info);
  496. if (*info != 0) {
  497. goto L10;
  498. }
  499. /* Multiply B by right bidiagonalizing vectors of L. */
  500. i__1 = *lwork - nwork + 1;
  501. _starpu_dormbr_("P", "L", "N", m, nrhs, m, &work[il], &ldwork, &work[
  502. itaup], &b[b_offset], ldb, &work[nwork], &i__1, info);
  503. /* Zero out below first M rows of B. */
  504. i__1 = *n - *m;
  505. _starpu_dlaset_("F", &i__1, nrhs, &c_b82, &c_b82, &b[*m + 1 + b_dim1],
  506. ldb);
  507. nwork = itau + *m;
  508. /* Multiply transpose(Q) by B. */
  509. /* (Workspace: need M+NRHS, prefer M+NRHS*NB) */
  510. i__1 = *lwork - nwork + 1;
  511. _starpu_dormlq_("L", "T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[
  512. b_offset], ldb, &work[nwork], &i__1, info);
  513. } else {
  514. /* Path 2 - remaining underdetermined cases. */
  515. ie = 1;
  516. itauq = ie + *m;
  517. itaup = itauq + *m;
  518. nwork = itaup + *m;
  519. /* Bidiagonalize A. */
  520. /* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */
  521. i__1 = *lwork - nwork + 1;
  522. _starpu_dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
  523. work[itaup], &work[nwork], &i__1, info);
  524. /* Multiply B by transpose of left bidiagonalizing vectors. */
  525. /* (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB) */
  526. i__1 = *lwork - nwork + 1;
  527. _starpu_dormbr_("Q", "L", "T", m, nrhs, n, &a[a_offset], lda, &work[itauq]
  528. , &b[b_offset], ldb, &work[nwork], &i__1, info);
  529. /* Solve the bidiagonal least squares problem. */
  530. _starpu_dlalsd_("L", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset],
  531. ldb, rcond, rank, &work[nwork], &iwork[1], info);
  532. if (*info != 0) {
  533. goto L10;
  534. }
  535. /* Multiply B by right bidiagonalizing vectors of A. */
  536. i__1 = *lwork - nwork + 1;
  537. _starpu_dormbr_("P", "L", "N", n, nrhs, m, &a[a_offset], lda, &work[itaup]
  538. , &b[b_offset], ldb, &work[nwork], &i__1, info);
  539. }
  540. }
  541. /* Undo scaling. */
  542. if (iascl == 1) {
  543. _starpu_dlascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb,
  544. info);
  545. _starpu_dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
  546. minmn, info);
  547. } else if (iascl == 2) {
  548. _starpu_dlascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb,
  549. info);
  550. _starpu_dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
  551. minmn, info);
  552. }
  553. if (ibscl == 1) {
  554. _starpu_dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb,
  555. info);
  556. } else if (ibscl == 2) {
  557. _starpu_dlascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb,
  558. info);
  559. }
  560. L10:
  561. work[1] = (doublereal) maxwrk;
  562. return 0;
  563. /* End of DGELSD */
  564. } /* _starpu_dgelsd_ */