dbdsqr.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919
  1. /* dbdsqr.f -- translated by f2c (version 20061008).
  2. You must link the resulting object file with libf2c:
  3. on Microsoft Windows system, link with libf2c.lib;
  4. on Linux or Unix systems, link with .../path/to/libf2c.a -lm
  5. or, if you install libf2c.a in a standard place, with -lf2c -lm
  6. -- in that order, at the end of the command line, as in
  7. cc *.o -lf2c -lm
  8. Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
  9. http://www.netlib.org/f2c/libf2c.zip
  10. */
  11. #include "f2c.h"
  12. #include "blaswrap.h"
  13. /* Table of constant values */
  14. static doublereal c_b15 = -.125;
  15. static integer c__1 = 1;
  16. static doublereal c_b49 = 1.;
  17. static doublereal c_b72 = -1.;
  18. /* Subroutine */ int _starpu_dbdsqr_(char *uplo, integer *n, integer *ncvt, integer *
  19. nru, integer *ncc, doublereal *d__, doublereal *e, doublereal *vt,
  20. integer *ldvt, doublereal *u, integer *ldu, doublereal *c__, integer *
  21. ldc, doublereal *work, integer *info)
  22. {
  23. /* System generated locals */
  24. integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1,
  25. i__2;
  26. doublereal d__1, d__2, d__3, d__4;
  27. /* Builtin functions */
  28. double pow_dd(doublereal *, doublereal *), sqrt(doublereal), d_sign(
  29. doublereal *, doublereal *);
  30. /* Local variables */
  31. doublereal f, g, h__;
  32. integer i__, j, m;
  33. doublereal r__, cs;
  34. integer ll;
  35. doublereal sn, mu;
  36. integer nm1, nm12, nm13, lll;
  37. doublereal eps, sll, tol, abse;
  38. integer idir;
  39. doublereal abss;
  40. integer oldm;
  41. doublereal cosl;
  42. integer isub, iter;
  43. doublereal unfl, sinl, cosr, smin, smax, sinr;
  44. extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *,
  45. doublereal *, integer *, doublereal *, doublereal *), _starpu_dlas2_(
  46. doublereal *, doublereal *, doublereal *, doublereal *,
  47. doublereal *), _starpu_dscal_(integer *, doublereal *, doublereal *,
  48. integer *);
  49. extern logical _starpu_lsame_(char *, char *);
  50. doublereal oldcs;
  51. extern /* Subroutine */ int _starpu_dlasr_(char *, char *, char *, integer *,
  52. integer *, doublereal *, doublereal *, doublereal *, integer *);
  53. integer oldll;
  54. doublereal shift, sigmn, oldsn;
  55. extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *,
  56. doublereal *, integer *);
  57. integer maxit;
  58. doublereal sminl, sigmx;
  59. logical lower;
  60. extern /* Subroutine */ int _starpu_dlasq1_(integer *, doublereal *, doublereal *,
  61. doublereal *, integer *), _starpu_dlasv2_(doublereal *, doublereal *,
  62. doublereal *, doublereal *, doublereal *, doublereal *,
  63. doublereal *, doublereal *, doublereal *);
  64. extern doublereal _starpu_dlamch_(char *);
  65. extern /* Subroutine */ int _starpu_dlartg_(doublereal *, doublereal *,
  66. doublereal *, doublereal *, doublereal *), _starpu_xerbla_(char *,
  67. integer *);
  68. doublereal sminoa, thresh;
  69. logical rotate;
  70. doublereal tolmul;
  71. /* -- LAPACK routine (version 3.2) -- */
  72. /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
  73. /* January 2007 */
  74. /* .. Scalar Arguments .. */
  75. /* .. */
  76. /* .. Array Arguments .. */
  77. /* .. */
  78. /* Purpose */
  79. /* ======= */
  80. /* DBDSQR computes the singular values and, optionally, the right and/or */
  81. /* left singular vectors from the singular value decomposition (SVD) of */
  82. /* a real N-by-N (upper or lower) bidiagonal matrix B using the implicit */
  83. /* zero-shift QR algorithm. The SVD of B has the form */
  84. /* B = Q * S * P**T */
  85. /* where S is the diagonal matrix of singular values, Q is an orthogonal */
  86. /* matrix of left singular vectors, and P is an orthogonal matrix of */
  87. /* right singular vectors. If left singular vectors are requested, this */
  88. /* subroutine actually returns U*Q instead of Q, and, if right singular */
  89. /* vectors are requested, this subroutine returns P**T*VT instead of */
  90. /* P**T, for given real input matrices U and VT. When U and VT are the */
  91. /* orthogonal matrices that reduce a general matrix A to bidiagonal */
  92. /* form: A = U*B*VT, as computed by DGEBRD, then */
  93. /* A = (U*Q) * S * (P**T*VT) */
  94. /* is the SVD of A. Optionally, the subroutine may also compute Q**T*C */
  95. /* for a given real input matrix C. */
  96. /* See "Computing Small Singular Values of Bidiagonal Matrices With */
  97. /* Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, */
  98. /* LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, */
  99. /* no. 5, pp. 873-912, Sept 1990) and */
  100. /* "Accurate singular values and differential qd algorithms," by */
  101. /* B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics */
  102. /* Department, University of California at Berkeley, July 1992 */
  103. /* for a detailed description of the algorithm. */
  104. /* Arguments */
  105. /* ========= */
  106. /* UPLO (input) CHARACTER*1 */
  107. /* = 'U': B is upper bidiagonal; */
  108. /* = 'L': B is lower bidiagonal. */
  109. /* N (input) INTEGER */
  110. /* The order of the matrix B. N >= 0. */
  111. /* NCVT (input) INTEGER */
  112. /* The number of columns of the matrix VT. NCVT >= 0. */
  113. /* NRU (input) INTEGER */
  114. /* The number of rows of the matrix U. NRU >= 0. */
  115. /* NCC (input) INTEGER */
  116. /* The number of columns of the matrix C. NCC >= 0. */
  117. /* D (input/output) DOUBLE PRECISION array, dimension (N) */
  118. /* On entry, the n diagonal elements of the bidiagonal matrix B. */
  119. /* On exit, if INFO=0, the singular values of B in decreasing */
  120. /* order. */
  121. /* E (input/output) DOUBLE PRECISION array, dimension (N-1) */
  122. /* On entry, the N-1 offdiagonal elements of the bidiagonal */
  123. /* matrix B. */
  124. /* On exit, if INFO = 0, E is destroyed; if INFO > 0, D and E */
  125. /* will contain the diagonal and superdiagonal elements of a */
  126. /* bidiagonal matrix orthogonally equivalent to the one given */
  127. /* as input. */
  128. /* VT (input/output) DOUBLE PRECISION array, dimension (LDVT, NCVT) */
  129. /* On entry, an N-by-NCVT matrix VT. */
  130. /* On exit, VT is overwritten by P**T * VT. */
  131. /* Not referenced if NCVT = 0. */
  132. /* LDVT (input) INTEGER */
  133. /* The leading dimension of the array VT. */
  134. /* LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0. */
  135. /* U (input/output) DOUBLE PRECISION array, dimension (LDU, N) */
  136. /* On entry, an NRU-by-N matrix U. */
  137. /* On exit, U is overwritten by U * Q. */
  138. /* Not referenced if NRU = 0. */
  139. /* LDU (input) INTEGER */
  140. /* The leading dimension of the array U. LDU >= max(1,NRU). */
  141. /* C (input/output) DOUBLE PRECISION array, dimension (LDC, NCC) */
  142. /* On entry, an N-by-NCC matrix C. */
  143. /* On exit, C is overwritten by Q**T * C. */
  144. /* Not referenced if NCC = 0. */
  145. /* LDC (input) INTEGER */
  146. /* The leading dimension of the array C. */
  147. /* LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0. */
  148. /* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) */
  149. /* INFO (output) INTEGER */
  150. /* = 0: successful exit */
  151. /* < 0: If INFO = -i, the i-th argument had an illegal value */
  152. /* > 0: */
  153. /* if NCVT = NRU = NCC = 0, */
  154. /* = 1, a split was marked by a positive value in E */
  155. /* = 2, current block of Z not diagonalized after 30*N */
  156. /* iterations (in inner while loop) */
  157. /* = 3, termination criterion of outer while loop not met */
  158. /* (program created more than N unreduced blocks) */
  159. /* else NCVT = NRU = NCC = 0, */
  160. /* the algorithm did not converge; D and E contain the */
  161. /* elements of a bidiagonal matrix which is orthogonally */
  162. /* similar to the input matrix B; if INFO = i, i */
  163. /* elements of E have not converged to zero. */
  164. /* Internal Parameters */
  165. /* =================== */
  166. /* TOLMUL DOUBLE PRECISION, default = max(10,min(100,EPS**(-1/8))) */
  167. /* TOLMUL controls the convergence criterion of the QR loop. */
  168. /* If it is positive, TOLMUL*EPS is the desired relative */
  169. /* precision in the computed singular values. */
  170. /* If it is negative, abs(TOLMUL*EPS*sigma_max) is the */
  171. /* desired absolute accuracy in the computed singular */
  172. /* values (corresponds to relative accuracy */
  173. /* abs(TOLMUL*EPS) in the largest singular value. */
  174. /* abs(TOLMUL) should be between 1 and 1/EPS, and preferably */
  175. /* between 10 (for fast convergence) and .1/EPS */
  176. /* (for there to be some accuracy in the results). */
  177. /* Default is to lose at either one eighth or 2 of the */
  178. /* available decimal digits in each computed singular value */
  179. /* (whichever is smaller). */
  180. /* MAXITR INTEGER, default = 6 */
  181. /* MAXITR controls the maximum number of passes of the */
  182. /* algorithm through its inner loop. The algorithms stops */
  183. /* (and so fails to converge) if the number of passes */
  184. /* through the inner loop exceeds MAXITR*N**2. */
  185. /* ===================================================================== */
  186. /* .. Parameters .. */
  187. /* .. */
  188. /* .. Local Scalars .. */
  189. /* .. */
  190. /* .. External Functions .. */
  191. /* .. */
  192. /* .. External Subroutines .. */
  193. /* .. */
  194. /* .. Intrinsic Functions .. */
  195. /* .. */
  196. /* .. Executable Statements .. */
  197. /* Test the input parameters. */
  198. /* Parameter adjustments */
  199. --d__;
  200. --e;
  201. vt_dim1 = *ldvt;
  202. vt_offset = 1 + vt_dim1;
  203. vt -= vt_offset;
  204. u_dim1 = *ldu;
  205. u_offset = 1 + u_dim1;
  206. u -= u_offset;
  207. c_dim1 = *ldc;
  208. c_offset = 1 + c_dim1;
  209. c__ -= c_offset;
  210. --work;
  211. /* Function Body */
  212. *info = 0;
  213. lower = _starpu_lsame_(uplo, "L");
  214. if (! _starpu_lsame_(uplo, "U") && ! lower) {
  215. *info = -1;
  216. } else if (*n < 0) {
  217. *info = -2;
  218. } else if (*ncvt < 0) {
  219. *info = -3;
  220. } else if (*nru < 0) {
  221. *info = -4;
  222. } else if (*ncc < 0) {
  223. *info = -5;
  224. } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1,*n)) {
  225. *info = -9;
  226. } else if (*ldu < max(1,*nru)) {
  227. *info = -11;
  228. } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1,*n)) {
  229. *info = -13;
  230. }
  231. if (*info != 0) {
  232. i__1 = -(*info);
  233. _starpu_xerbla_("DBDSQR", &i__1);
  234. return 0;
  235. }
  236. if (*n == 0) {
  237. return 0;
  238. }
  239. if (*n == 1) {
  240. goto L160;
  241. }
  242. /* ROTATE is true if any singular vectors desired, false otherwise */
  243. rotate = *ncvt > 0 || *nru > 0 || *ncc > 0;
  244. /* If no singular vectors desired, use qd algorithm */
  245. if (! rotate) {
  246. _starpu_dlasq1_(n, &d__[1], &e[1], &work[1], info);
  247. return 0;
  248. }
  249. nm1 = *n - 1;
  250. nm12 = nm1 + nm1;
  251. nm13 = nm12 + nm1;
  252. idir = 0;
  253. /* Get machine constants */
  254. eps = _starpu_dlamch_("Epsilon");
  255. unfl = _starpu_dlamch_("Safe minimum");
  256. /* If matrix lower bidiagonal, rotate to be upper bidiagonal */
  257. /* by applying Givens rotations on the left */
  258. if (lower) {
  259. i__1 = *n - 1;
  260. for (i__ = 1; i__ <= i__1; ++i__) {
  261. _starpu_dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
  262. d__[i__] = r__;
  263. e[i__] = sn * d__[i__ + 1];
  264. d__[i__ + 1] = cs * d__[i__ + 1];
  265. work[i__] = cs;
  266. work[nm1 + i__] = sn;
  267. /* L10: */
  268. }
  269. /* Update singular vectors if desired */
  270. if (*nru > 0) {
  271. _starpu_dlasr_("R", "V", "F", nru, n, &work[1], &work[*n], &u[u_offset],
  272. ldu);
  273. }
  274. if (*ncc > 0) {
  275. _starpu_dlasr_("L", "V", "F", n, ncc, &work[1], &work[*n], &c__[c_offset],
  276. ldc);
  277. }
  278. }
  279. /* Compute singular values to relative accuracy TOL */
  280. /* (By setting TOL to be negative, algorithm will compute */
  281. /* singular values to absolute accuracy ABS(TOL)*norm(input matrix)) */
  282. /* Computing MAX */
  283. /* Computing MIN */
  284. d__3 = 100., d__4 = pow_dd(&eps, &c_b15);
  285. d__1 = 10., d__2 = min(d__3,d__4);
  286. tolmul = max(d__1,d__2);
  287. tol = tolmul * eps;
  288. /* Compute approximate maximum, minimum singular values */
  289. smax = 0.;
  290. i__1 = *n;
  291. for (i__ = 1; i__ <= i__1; ++i__) {
  292. /* Computing MAX */
  293. d__2 = smax, d__3 = (d__1 = d__[i__], abs(d__1));
  294. smax = max(d__2,d__3);
  295. /* L20: */
  296. }
  297. i__1 = *n - 1;
  298. for (i__ = 1; i__ <= i__1; ++i__) {
  299. /* Computing MAX */
  300. d__2 = smax, d__3 = (d__1 = e[i__], abs(d__1));
  301. smax = max(d__2,d__3);
  302. /* L30: */
  303. }
  304. sminl = 0.;
  305. if (tol >= 0.) {
  306. /* Relative accuracy desired */
  307. sminoa = abs(d__[1]);
  308. if (sminoa == 0.) {
  309. goto L50;
  310. }
  311. mu = sminoa;
  312. i__1 = *n;
  313. for (i__ = 2; i__ <= i__1; ++i__) {
  314. mu = (d__2 = d__[i__], abs(d__2)) * (mu / (mu + (d__1 = e[i__ - 1]
  315. , abs(d__1))));
  316. sminoa = min(sminoa,mu);
  317. if (sminoa == 0.) {
  318. goto L50;
  319. }
  320. /* L40: */
  321. }
  322. L50:
  323. sminoa /= sqrt((doublereal) (*n));
  324. /* Computing MAX */
  325. d__1 = tol * sminoa, d__2 = *n * 6 * *n * unfl;
  326. thresh = max(d__1,d__2);
  327. } else {
  328. /* Absolute accuracy desired */
  329. /* Computing MAX */
  330. d__1 = abs(tol) * smax, d__2 = *n * 6 * *n * unfl;
  331. thresh = max(d__1,d__2);
  332. }
  333. /* Prepare for main iteration loop for the singular values */
  334. /* (MAXIT is the maximum number of passes through the inner */
  335. /* loop permitted before nonconvergence signalled.) */
  336. maxit = *n * 6 * *n;
  337. iter = 0;
  338. oldll = -1;
  339. oldm = -1;
  340. /* M points to last element of unconverged part of matrix */
  341. m = *n;
  342. /* Begin main iteration loop */
  343. L60:
  344. /* Check for convergence or exceeding iteration count */
  345. if (m <= 1) {
  346. goto L160;
  347. }
  348. if (iter > maxit) {
  349. goto L200;
  350. }
  351. /* Find diagonal block of matrix to work on */
  352. if (tol < 0. && (d__1 = d__[m], abs(d__1)) <= thresh) {
  353. d__[m] = 0.;
  354. }
  355. smax = (d__1 = d__[m], abs(d__1));
  356. smin = smax;
  357. i__1 = m - 1;
  358. for (lll = 1; lll <= i__1; ++lll) {
  359. ll = m - lll;
  360. abss = (d__1 = d__[ll], abs(d__1));
  361. abse = (d__1 = e[ll], abs(d__1));
  362. if (tol < 0. && abss <= thresh) {
  363. d__[ll] = 0.;
  364. }
  365. if (abse <= thresh) {
  366. goto L80;
  367. }
  368. smin = min(smin,abss);
  369. /* Computing MAX */
  370. d__1 = max(smax,abss);
  371. smax = max(d__1,abse);
  372. /* L70: */
  373. }
  374. ll = 0;
  375. goto L90;
  376. L80:
  377. e[ll] = 0.;
  378. /* Matrix splits since E(LL) = 0 */
  379. if (ll == m - 1) {
  380. /* Convergence of bottom singular value, return to top of loop */
  381. --m;
  382. goto L60;
  383. }
  384. L90:
  385. ++ll;
  386. /* E(LL) through E(M-1) are nonzero, E(LL-1) is zero */
  387. if (ll == m - 1) {
  388. /* 2 by 2 block, handle separately */
  389. _starpu_dlasv2_(&d__[m - 1], &e[m - 1], &d__[m], &sigmn, &sigmx, &sinr, &cosr,
  390. &sinl, &cosl);
  391. d__[m - 1] = sigmx;
  392. e[m - 1] = 0.;
  393. d__[m] = sigmn;
  394. /* Compute singular vectors, if desired */
  395. if (*ncvt > 0) {
  396. _starpu_drot_(ncvt, &vt[m - 1 + vt_dim1], ldvt, &vt[m + vt_dim1], ldvt, &
  397. cosr, &sinr);
  398. }
  399. if (*nru > 0) {
  400. _starpu_drot_(nru, &u[(m - 1) * u_dim1 + 1], &c__1, &u[m * u_dim1 + 1], &
  401. c__1, &cosl, &sinl);
  402. }
  403. if (*ncc > 0) {
  404. _starpu_drot_(ncc, &c__[m - 1 + c_dim1], ldc, &c__[m + c_dim1], ldc, &
  405. cosl, &sinl);
  406. }
  407. m += -2;
  408. goto L60;
  409. }
  410. /* If working on new submatrix, choose shift direction */
  411. /* (from larger end diagonal element towards smaller) */
  412. if (ll > oldm || m < oldll) {
  413. if ((d__1 = d__[ll], abs(d__1)) >= (d__2 = d__[m], abs(d__2))) {
  414. /* Chase bulge from top (big end) to bottom (small end) */
  415. idir = 1;
  416. } else {
  417. /* Chase bulge from bottom (big end) to top (small end) */
  418. idir = 2;
  419. }
  420. }
  421. /* Apply convergence tests */
  422. if (idir == 1) {
  423. /* Run convergence test in forward direction */
  424. /* First apply standard test to bottom of matrix */
  425. if ((d__2 = e[m - 1], abs(d__2)) <= abs(tol) * (d__1 = d__[m], abs(
  426. d__1)) || tol < 0. && (d__3 = e[m - 1], abs(d__3)) <= thresh)
  427. {
  428. e[m - 1] = 0.;
  429. goto L60;
  430. }
  431. if (tol >= 0.) {
  432. /* If relative accuracy desired, */
  433. /* apply convergence criterion forward */
  434. mu = (d__1 = d__[ll], abs(d__1));
  435. sminl = mu;
  436. i__1 = m - 1;
  437. for (lll = ll; lll <= i__1; ++lll) {
  438. if ((d__1 = e[lll], abs(d__1)) <= tol * mu) {
  439. e[lll] = 0.;
  440. goto L60;
  441. }
  442. mu = (d__2 = d__[lll + 1], abs(d__2)) * (mu / (mu + (d__1 = e[
  443. lll], abs(d__1))));
  444. sminl = min(sminl,mu);
  445. /* L100: */
  446. }
  447. }
  448. } else {
  449. /* Run convergence test in backward direction */
  450. /* First apply standard test to top of matrix */
  451. if ((d__2 = e[ll], abs(d__2)) <= abs(tol) * (d__1 = d__[ll], abs(d__1)
  452. ) || tol < 0. && (d__3 = e[ll], abs(d__3)) <= thresh) {
  453. e[ll] = 0.;
  454. goto L60;
  455. }
  456. if (tol >= 0.) {
  457. /* If relative accuracy desired, */
  458. /* apply convergence criterion backward */
  459. mu = (d__1 = d__[m], abs(d__1));
  460. sminl = mu;
  461. i__1 = ll;
  462. for (lll = m - 1; lll >= i__1; --lll) {
  463. if ((d__1 = e[lll], abs(d__1)) <= tol * mu) {
  464. e[lll] = 0.;
  465. goto L60;
  466. }
  467. mu = (d__2 = d__[lll], abs(d__2)) * (mu / (mu + (d__1 = e[lll]
  468. , abs(d__1))));
  469. sminl = min(sminl,mu);
  470. /* L110: */
  471. }
  472. }
  473. }
  474. oldll = ll;
  475. oldm = m;
  476. /* Compute shift. First, test if shifting would ruin relative */
  477. /* accuracy, and if so set the shift to zero. */
  478. /* Computing MAX */
  479. d__1 = eps, d__2 = tol * .01;
  480. if (tol >= 0. && *n * tol * (sminl / smax) <= max(d__1,d__2)) {
  481. /* Use a zero shift to avoid loss of relative accuracy */
  482. shift = 0.;
  483. } else {
  484. /* Compute the shift from 2-by-2 block at end of matrix */
  485. if (idir == 1) {
  486. sll = (d__1 = d__[ll], abs(d__1));
  487. _starpu_dlas2_(&d__[m - 1], &e[m - 1], &d__[m], &shift, &r__);
  488. } else {
  489. sll = (d__1 = d__[m], abs(d__1));
  490. _starpu_dlas2_(&d__[ll], &e[ll], &d__[ll + 1], &shift, &r__);
  491. }
  492. /* Test if shift negligible, and if so set to zero */
  493. if (sll > 0.) {
  494. /* Computing 2nd power */
  495. d__1 = shift / sll;
  496. if (d__1 * d__1 < eps) {
  497. shift = 0.;
  498. }
  499. }
  500. }
  501. /* Increment iteration count */
  502. iter = iter + m - ll;
  503. /* If SHIFT = 0, do simplified QR iteration */
  504. if (shift == 0.) {
  505. if (idir == 1) {
  506. /* Chase bulge from top to bottom */
  507. /* Save cosines and sines for later singular vector updates */
  508. cs = 1.;
  509. oldcs = 1.;
  510. i__1 = m - 1;
  511. for (i__ = ll; i__ <= i__1; ++i__) {
  512. d__1 = d__[i__] * cs;
  513. _starpu_dlartg_(&d__1, &e[i__], &cs, &sn, &r__);
  514. if (i__ > ll) {
  515. e[i__ - 1] = oldsn * r__;
  516. }
  517. d__1 = oldcs * r__;
  518. d__2 = d__[i__ + 1] * sn;
  519. _starpu_dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]);
  520. work[i__ - ll + 1] = cs;
  521. work[i__ - ll + 1 + nm1] = sn;
  522. work[i__ - ll + 1 + nm12] = oldcs;
  523. work[i__ - ll + 1 + nm13] = oldsn;
  524. /* L120: */
  525. }
  526. h__ = d__[m] * cs;
  527. d__[m] = h__ * oldcs;
  528. e[m - 1] = h__ * oldsn;
  529. /* Update singular vectors */
  530. if (*ncvt > 0) {
  531. i__1 = m - ll + 1;
  532. _starpu_dlasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[
  533. ll + vt_dim1], ldvt);
  534. }
  535. if (*nru > 0) {
  536. i__1 = m - ll + 1;
  537. _starpu_dlasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13
  538. + 1], &u[ll * u_dim1 + 1], ldu);
  539. }
  540. if (*ncc > 0) {
  541. i__1 = m - ll + 1;
  542. _starpu_dlasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13
  543. + 1], &c__[ll + c_dim1], ldc);
  544. }
  545. /* Test convergence */
  546. if ((d__1 = e[m - 1], abs(d__1)) <= thresh) {
  547. e[m - 1] = 0.;
  548. }
  549. } else {
  550. /* Chase bulge from bottom to top */
  551. /* Save cosines and sines for later singular vector updates */
  552. cs = 1.;
  553. oldcs = 1.;
  554. i__1 = ll + 1;
  555. for (i__ = m; i__ >= i__1; --i__) {
  556. d__1 = d__[i__] * cs;
  557. _starpu_dlartg_(&d__1, &e[i__ - 1], &cs, &sn, &r__);
  558. if (i__ < m) {
  559. e[i__] = oldsn * r__;
  560. }
  561. d__1 = oldcs * r__;
  562. d__2 = d__[i__ - 1] * sn;
  563. _starpu_dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]);
  564. work[i__ - ll] = cs;
  565. work[i__ - ll + nm1] = -sn;
  566. work[i__ - ll + nm12] = oldcs;
  567. work[i__ - ll + nm13] = -oldsn;
  568. /* L130: */
  569. }
  570. h__ = d__[ll] * cs;
  571. d__[ll] = h__ * oldcs;
  572. e[ll] = h__ * oldsn;
  573. /* Update singular vectors */
  574. if (*ncvt > 0) {
  575. i__1 = m - ll + 1;
  576. _starpu_dlasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[
  577. nm13 + 1], &vt[ll + vt_dim1], ldvt);
  578. }
  579. if (*nru > 0) {
  580. i__1 = m - ll + 1;
  581. _starpu_dlasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll *
  582. u_dim1 + 1], ldu);
  583. }
  584. if (*ncc > 0) {
  585. i__1 = m - ll + 1;
  586. _starpu_dlasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[
  587. ll + c_dim1], ldc);
  588. }
  589. /* Test convergence */
  590. if ((d__1 = e[ll], abs(d__1)) <= thresh) {
  591. e[ll] = 0.;
  592. }
  593. }
  594. } else {
  595. /* Use nonzero shift */
  596. if (idir == 1) {
  597. /* Chase bulge from top to bottom */
  598. /* Save cosines and sines for later singular vector updates */
  599. f = ((d__1 = d__[ll], abs(d__1)) - shift) * (d_sign(&c_b49, &d__[
  600. ll]) + shift / d__[ll]);
  601. g = e[ll];
  602. i__1 = m - 1;
  603. for (i__ = ll; i__ <= i__1; ++i__) {
  604. _starpu_dlartg_(&f, &g, &cosr, &sinr, &r__);
  605. if (i__ > ll) {
  606. e[i__ - 1] = r__;
  607. }
  608. f = cosr * d__[i__] + sinr * e[i__];
  609. e[i__] = cosr * e[i__] - sinr * d__[i__];
  610. g = sinr * d__[i__ + 1];
  611. d__[i__ + 1] = cosr * d__[i__ + 1];
  612. _starpu_dlartg_(&f, &g, &cosl, &sinl, &r__);
  613. d__[i__] = r__;
  614. f = cosl * e[i__] + sinl * d__[i__ + 1];
  615. d__[i__ + 1] = cosl * d__[i__ + 1] - sinl * e[i__];
  616. if (i__ < m - 1) {
  617. g = sinl * e[i__ + 1];
  618. e[i__ + 1] = cosl * e[i__ + 1];
  619. }
  620. work[i__ - ll + 1] = cosr;
  621. work[i__ - ll + 1 + nm1] = sinr;
  622. work[i__ - ll + 1 + nm12] = cosl;
  623. work[i__ - ll + 1 + nm13] = sinl;
  624. /* L140: */
  625. }
  626. e[m - 1] = f;
  627. /* Update singular vectors */
  628. if (*ncvt > 0) {
  629. i__1 = m - ll + 1;
  630. _starpu_dlasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[
  631. ll + vt_dim1], ldvt);
  632. }
  633. if (*nru > 0) {
  634. i__1 = m - ll + 1;
  635. _starpu_dlasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13
  636. + 1], &u[ll * u_dim1 + 1], ldu);
  637. }
  638. if (*ncc > 0) {
  639. i__1 = m - ll + 1;
  640. _starpu_dlasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13
  641. + 1], &c__[ll + c_dim1], ldc);
  642. }
  643. /* Test convergence */
  644. if ((d__1 = e[m - 1], abs(d__1)) <= thresh) {
  645. e[m - 1] = 0.;
  646. }
  647. } else {
  648. /* Chase bulge from bottom to top */
  649. /* Save cosines and sines for later singular vector updates */
  650. f = ((d__1 = d__[m], abs(d__1)) - shift) * (d_sign(&c_b49, &d__[m]
  651. ) + shift / d__[m]);
  652. g = e[m - 1];
  653. i__1 = ll + 1;
  654. for (i__ = m; i__ >= i__1; --i__) {
  655. _starpu_dlartg_(&f, &g, &cosr, &sinr, &r__);
  656. if (i__ < m) {
  657. e[i__] = r__;
  658. }
  659. f = cosr * d__[i__] + sinr * e[i__ - 1];
  660. e[i__ - 1] = cosr * e[i__ - 1] - sinr * d__[i__];
  661. g = sinr * d__[i__ - 1];
  662. d__[i__ - 1] = cosr * d__[i__ - 1];
  663. _starpu_dlartg_(&f, &g, &cosl, &sinl, &r__);
  664. d__[i__] = r__;
  665. f = cosl * e[i__ - 1] + sinl * d__[i__ - 1];
  666. d__[i__ - 1] = cosl * d__[i__ - 1] - sinl * e[i__ - 1];
  667. if (i__ > ll + 1) {
  668. g = sinl * e[i__ - 2];
  669. e[i__ - 2] = cosl * e[i__ - 2];
  670. }
  671. work[i__ - ll] = cosr;
  672. work[i__ - ll + nm1] = -sinr;
  673. work[i__ - ll + nm12] = cosl;
  674. work[i__ - ll + nm13] = -sinl;
  675. /* L150: */
  676. }
  677. e[ll] = f;
  678. /* Test convergence */
  679. if ((d__1 = e[ll], abs(d__1)) <= thresh) {
  680. e[ll] = 0.;
  681. }
  682. /* Update singular vectors if desired */
  683. if (*ncvt > 0) {
  684. i__1 = m - ll + 1;
  685. _starpu_dlasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[
  686. nm13 + 1], &vt[ll + vt_dim1], ldvt);
  687. }
  688. if (*nru > 0) {
  689. i__1 = m - ll + 1;
  690. _starpu_dlasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll *
  691. u_dim1 + 1], ldu);
  692. }
  693. if (*ncc > 0) {
  694. i__1 = m - ll + 1;
  695. _starpu_dlasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[
  696. ll + c_dim1], ldc);
  697. }
  698. }
  699. }
  700. /* QR iteration finished, go back and check convergence */
  701. goto L60;
  702. /* All singular values converged, so make them positive */
  703. L160:
  704. i__1 = *n;
  705. for (i__ = 1; i__ <= i__1; ++i__) {
  706. if (d__[i__] < 0.) {
  707. d__[i__] = -d__[i__];
  708. /* Change sign of singular vectors, if desired */
  709. if (*ncvt > 0) {
  710. _starpu_dscal_(ncvt, &c_b72, &vt[i__ + vt_dim1], ldvt);
  711. }
  712. }
  713. /* L170: */
  714. }
  715. /* Sort the singular values into decreasing order (insertion sort on */
  716. /* singular values, but only one transposition per singular vector) */
  717. i__1 = *n - 1;
  718. for (i__ = 1; i__ <= i__1; ++i__) {
  719. /* Scan for smallest D(I) */
  720. isub = 1;
  721. smin = d__[1];
  722. i__2 = *n + 1 - i__;
  723. for (j = 2; j <= i__2; ++j) {
  724. if (d__[j] <= smin) {
  725. isub = j;
  726. smin = d__[j];
  727. }
  728. /* L180: */
  729. }
  730. if (isub != *n + 1 - i__) {
  731. /* Swap singular values and vectors */
  732. d__[isub] = d__[*n + 1 - i__];
  733. d__[*n + 1 - i__] = smin;
  734. if (*ncvt > 0) {
  735. _starpu_dswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[*n + 1 - i__ +
  736. vt_dim1], ldvt);
  737. }
  738. if (*nru > 0) {
  739. _starpu_dswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[(*n + 1 - i__) *
  740. u_dim1 + 1], &c__1);
  741. }
  742. if (*ncc > 0) {
  743. _starpu_dswap_(ncc, &c__[isub + c_dim1], ldc, &c__[*n + 1 - i__ +
  744. c_dim1], ldc);
  745. }
  746. }
  747. /* L190: */
  748. }
  749. goto L220;
  750. /* Maximum number of iterations exceeded, failure to converge */
  751. L200:
  752. *info = 0;
  753. i__1 = *n - 1;
  754. for (i__ = 1; i__ <= i__1; ++i__) {
  755. if (e[i__] != 0.) {
  756. ++(*info);
  757. }
  758. /* L210: */
  759. }
  760. L220:
  761. return 0;
  762. /* End of DBDSQR */
  763. } /* _starpu_dbdsqr_ */