dlaed2.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533
  1. /* dlaed2.f -- translated by f2c (version 20061008).
  2. You must link the resulting object file with libf2c:
  3. on Microsoft Windows system, link with libf2c.lib;
  4. on Linux or Unix systems, link with .../path/to/libf2c.a -lm
  5. or, if you install libf2c.a in a standard place, with -lf2c -lm
  6. -- in that order, at the end of the command line, as in
  7. cc *.o -lf2c -lm
  8. Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
  9. http://www.netlib.org/f2c/libf2c.zip
  10. */
  11. #include "f2c.h"
  12. #include "blaswrap.h"
  13. /* Table of constant values */
  14. static doublereal c_b3 = -1.;
  15. static integer c__1 = 1;
  16. /* Subroutine */ int _starpu_dlaed2_(integer *k, integer *n, integer *n1, doublereal *
  17. d__, doublereal *q, integer *ldq, integer *indxq, doublereal *rho,
  18. doublereal *z__, doublereal *dlamda, doublereal *w, doublereal *q2,
  19. integer *indx, integer *indxc, integer *indxp, integer *coltyp,
  20. integer *info)
  21. {
  22. /* System generated locals */
  23. integer q_dim1, q_offset, i__1, i__2;
  24. doublereal d__1, d__2, d__3, d__4;
  25. /* Builtin functions */
  26. double sqrt(doublereal);
  27. /* Local variables */
  28. doublereal c__;
  29. integer i__, j;
  30. doublereal s, t;
  31. integer k2, n2, ct, nj, pj, js, iq1, iq2, n1p1;
  32. doublereal eps, tau, tol;
  33. integer psm[4], imax, jmax;
  34. extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *,
  35. doublereal *, integer *, doublereal *, doublereal *);
  36. integer ctot[4];
  37. extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *,
  38. integer *), _starpu_dcopy_(integer *, doublereal *, integer *, doublereal
  39. *, integer *);
  40. extern doublereal _starpu_dlapy2_(doublereal *, doublereal *), _starpu_dlamch_(char *);
  41. extern integer _starpu_idamax_(integer *, doublereal *, integer *);
  42. extern /* Subroutine */ int _starpu_dlamrg_(integer *, integer *, doublereal *,
  43. integer *, integer *, integer *), _starpu_dlacpy_(char *, integer *,
  44. integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *);
  45. /* -- LAPACK routine (version 3.2) -- */
  46. /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
  47. /* November 2006 */
  48. /* .. Scalar Arguments .. */
  49. /* .. */
  50. /* .. Array Arguments .. */
  51. /* .. */
  52. /* Purpose */
  53. /* ======= */
  54. /* DLAED2 merges the two sets of eigenvalues together into a single */
  55. /* sorted set. Then it tries to deflate the size of the problem. */
  56. /* There are two ways in which deflation can occur: when two or more */
  57. /* eigenvalues are close together or if there is a tiny entry in the */
  58. /* Z vector. For each such occurrence the order of the related secular */
  59. /* equation problem is reduced by one. */
  60. /* Arguments */
  61. /* ========= */
  62. /* K (output) INTEGER */
  63. /* The number of non-deflated eigenvalues, and the order of the */
  64. /* related secular equation. 0 <= K <=N. */
  65. /* N (input) INTEGER */
  66. /* The dimension of the symmetric tridiagonal matrix. N >= 0. */
  67. /* N1 (input) INTEGER */
  68. /* The location of the last eigenvalue in the leading sub-matrix. */
  69. /* min(1,N) <= N1 <= N/2. */
  70. /* D (input/output) DOUBLE PRECISION array, dimension (N) */
  71. /* On entry, D contains the eigenvalues of the two submatrices to */
  72. /* be combined. */
  73. /* On exit, D contains the trailing (N-K) updated eigenvalues */
  74. /* (those which were deflated) sorted into increasing order. */
  75. /* Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N) */
  76. /* On entry, Q contains the eigenvectors of two submatrices in */
  77. /* the two square blocks with corners at (1,1), (N1,N1) */
  78. /* and (N1+1, N1+1), (N,N). */
  79. /* On exit, Q contains the trailing (N-K) updated eigenvectors */
  80. /* (those which were deflated) in its last N-K columns. */
  81. /* LDQ (input) INTEGER */
  82. /* The leading dimension of the array Q. LDQ >= max(1,N). */
  83. /* INDXQ (input/output) INTEGER array, dimension (N) */
  84. /* The permutation which separately sorts the two sub-problems */
  85. /* in D into ascending order. Note that elements in the second */
  86. /* half of this permutation must first have N1 added to their */
  87. /* values. Destroyed on exit. */
  88. /* RHO (input/output) DOUBLE PRECISION */
  89. /* On entry, the off-diagonal element associated with the rank-1 */
  90. /* cut which originally split the two submatrices which are now */
  91. /* being recombined. */
  92. /* On exit, RHO has been modified to the value required by */
  93. /* DLAED3. */
  94. /* Z (input) DOUBLE PRECISION array, dimension (N) */
  95. /* On entry, Z contains the updating vector (the last */
  96. /* row of the first sub-eigenvector matrix and the first row of */
  97. /* the second sub-eigenvector matrix). */
  98. /* On exit, the contents of Z have been destroyed by the updating */
  99. /* process. */
  100. /* DLAMDA (output) DOUBLE PRECISION array, dimension (N) */
  101. /* A copy of the first K eigenvalues which will be used by */
  102. /* DLAED3 to form the secular equation. */
  103. /* W (output) DOUBLE PRECISION array, dimension (N) */
  104. /* The first k values of the final deflation-altered z-vector */
  105. /* which will be passed to DLAED3. */
  106. /* Q2 (output) DOUBLE PRECISION array, dimension (N1**2+(N-N1)**2) */
  107. /* A copy of the first K eigenvectors which will be used by */
  108. /* DLAED3 in a matrix multiply (DGEMM) to solve for the new */
  109. /* eigenvectors. */
  110. /* INDX (workspace) INTEGER array, dimension (N) */
  111. /* The permutation used to sort the contents of DLAMDA into */
  112. /* ascending order. */
  113. /* INDXC (output) INTEGER array, dimension (N) */
  114. /* The permutation used to arrange the columns of the deflated */
  115. /* Q matrix into three groups: the first group contains non-zero */
  116. /* elements only at and above N1, the second contains */
  117. /* non-zero elements only below N1, and the third is dense. */
  118. /* INDXP (workspace) INTEGER array, dimension (N) */
  119. /* The permutation used to place deflated values of D at the end */
  120. /* of the array. INDXP(1:K) points to the nondeflated D-values */
  121. /* and INDXP(K+1:N) points to the deflated eigenvalues. */
  122. /* COLTYP (workspace/output) INTEGER array, dimension (N) */
  123. /* During execution, a label which will indicate which of the */
  124. /* following types a column in the Q2 matrix is: */
  125. /* 1 : non-zero in the upper half only; */
  126. /* 2 : dense; */
  127. /* 3 : non-zero in the lower half only; */
  128. /* 4 : deflated. */
  129. /* On exit, COLTYP(i) is the number of columns of type i, */
  130. /* for i=1 to 4 only. */
  131. /* INFO (output) INTEGER */
  132. /* = 0: successful exit. */
  133. /* < 0: if INFO = -i, the i-th argument had an illegal value. */
  134. /* Further Details */
  135. /* =============== */
  136. /* Based on contributions by */
  137. /* Jeff Rutter, Computer Science Division, University of California */
  138. /* at Berkeley, USA */
  139. /* Modified by Francoise Tisseur, University of Tennessee. */
  140. /* ===================================================================== */
  141. /* .. Parameters .. */
  142. /* .. */
  143. /* .. Local Arrays .. */
  144. /* .. */
  145. /* .. Local Scalars .. */
  146. /* .. */
  147. /* .. External Functions .. */
  148. /* .. */
  149. /* .. External Subroutines .. */
  150. /* .. */
  151. /* .. Intrinsic Functions .. */
  152. /* .. */
  153. /* .. Executable Statements .. */
  154. /* Test the input parameters. */
  155. /* Parameter adjustments */
  156. --d__;
  157. q_dim1 = *ldq;
  158. q_offset = 1 + q_dim1;
  159. q -= q_offset;
  160. --indxq;
  161. --z__;
  162. --dlamda;
  163. --w;
  164. --q2;
  165. --indx;
  166. --indxc;
  167. --indxp;
  168. --coltyp;
  169. /* Function Body */
  170. *info = 0;
  171. if (*n < 0) {
  172. *info = -2;
  173. } else if (*ldq < max(1,*n)) {
  174. *info = -6;
  175. } else /* if(complicated condition) */ {
  176. /* Computing MIN */
  177. i__1 = 1, i__2 = *n / 2;
  178. if (min(i__1,i__2) > *n1 || *n / 2 < *n1) {
  179. *info = -3;
  180. }
  181. }
  182. if (*info != 0) {
  183. i__1 = -(*info);
  184. _starpu_xerbla_("DLAED2", &i__1);
  185. return 0;
  186. }
  187. /* Quick return if possible */
  188. if (*n == 0) {
  189. return 0;
  190. }
  191. n2 = *n - *n1;
  192. n1p1 = *n1 + 1;
  193. if (*rho < 0.) {
  194. _starpu_dscal_(&n2, &c_b3, &z__[n1p1], &c__1);
  195. }
  196. /* Normalize z so that norm(z) = 1. Since z is the concatenation of */
  197. /* two normalized vectors, norm2(z) = sqrt(2). */
  198. t = 1. / sqrt(2.);
  199. _starpu_dscal_(n, &t, &z__[1], &c__1);
  200. /* RHO = ABS( norm(z)**2 * RHO ) */
  201. *rho = (d__1 = *rho * 2., abs(d__1));
  202. /* Sort the eigenvalues into increasing order */
  203. i__1 = *n;
  204. for (i__ = n1p1; i__ <= i__1; ++i__) {
  205. indxq[i__] += *n1;
  206. /* L10: */
  207. }
  208. /* re-integrate the deflated parts from the last pass */
  209. i__1 = *n;
  210. for (i__ = 1; i__ <= i__1; ++i__) {
  211. dlamda[i__] = d__[indxq[i__]];
  212. /* L20: */
  213. }
  214. _starpu_dlamrg_(n1, &n2, &dlamda[1], &c__1, &c__1, &indxc[1]);
  215. i__1 = *n;
  216. for (i__ = 1; i__ <= i__1; ++i__) {
  217. indx[i__] = indxq[indxc[i__]];
  218. /* L30: */
  219. }
  220. /* Calculate the allowable deflation tolerance */
  221. imax = _starpu_idamax_(n, &z__[1], &c__1);
  222. jmax = _starpu_idamax_(n, &d__[1], &c__1);
  223. eps = _starpu_dlamch_("Epsilon");
  224. /* Computing MAX */
  225. d__3 = (d__1 = d__[jmax], abs(d__1)), d__4 = (d__2 = z__[imax], abs(d__2))
  226. ;
  227. tol = eps * 8. * max(d__3,d__4);
  228. /* If the rank-1 modifier is small enough, no more needs to be done */
  229. /* except to reorganize Q so that its columns correspond with the */
  230. /* elements in D. */
  231. if (*rho * (d__1 = z__[imax], abs(d__1)) <= tol) {
  232. *k = 0;
  233. iq2 = 1;
  234. i__1 = *n;
  235. for (j = 1; j <= i__1; ++j) {
  236. i__ = indx[j];
  237. _starpu_dcopy_(n, &q[i__ * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
  238. dlamda[j] = d__[i__];
  239. iq2 += *n;
  240. /* L40: */
  241. }
  242. _starpu_dlacpy_("A", n, n, &q2[1], n, &q[q_offset], ldq);
  243. _starpu_dcopy_(n, &dlamda[1], &c__1, &d__[1], &c__1);
  244. goto L190;
  245. }
  246. /* If there are multiple eigenvalues then the problem deflates. Here */
  247. /* the number of equal eigenvalues are found. As each equal */
  248. /* eigenvalue is found, an elementary reflector is computed to rotate */
  249. /* the corresponding eigensubspace so that the corresponding */
  250. /* components of Z are zero in this new basis. */
  251. i__1 = *n1;
  252. for (i__ = 1; i__ <= i__1; ++i__) {
  253. coltyp[i__] = 1;
  254. /* L50: */
  255. }
  256. i__1 = *n;
  257. for (i__ = n1p1; i__ <= i__1; ++i__) {
  258. coltyp[i__] = 3;
  259. /* L60: */
  260. }
  261. *k = 0;
  262. k2 = *n + 1;
  263. i__1 = *n;
  264. for (j = 1; j <= i__1; ++j) {
  265. nj = indx[j];
  266. if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) {
  267. /* Deflate due to small z component. */
  268. --k2;
  269. coltyp[nj] = 4;
  270. indxp[k2] = nj;
  271. if (j == *n) {
  272. goto L100;
  273. }
  274. } else {
  275. pj = nj;
  276. goto L80;
  277. }
  278. /* L70: */
  279. }
  280. L80:
  281. ++j;
  282. nj = indx[j];
  283. if (j > *n) {
  284. goto L100;
  285. }
  286. if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) {
  287. /* Deflate due to small z component. */
  288. --k2;
  289. coltyp[nj] = 4;
  290. indxp[k2] = nj;
  291. } else {
  292. /* Check if eigenvalues are close enough to allow deflation. */
  293. s = z__[pj];
  294. c__ = z__[nj];
  295. /* Find sqrt(a**2+b**2) without overflow or */
  296. /* destructive underflow. */
  297. tau = _starpu_dlapy2_(&c__, &s);
  298. t = d__[nj] - d__[pj];
  299. c__ /= tau;
  300. s = -s / tau;
  301. if ((d__1 = t * c__ * s, abs(d__1)) <= tol) {
  302. /* Deflation is possible. */
  303. z__[nj] = tau;
  304. z__[pj] = 0.;
  305. if (coltyp[nj] != coltyp[pj]) {
  306. coltyp[nj] = 2;
  307. }
  308. coltyp[pj] = 4;
  309. _starpu_drot_(n, &q[pj * q_dim1 + 1], &c__1, &q[nj * q_dim1 + 1], &c__1, &
  310. c__, &s);
  311. /* Computing 2nd power */
  312. d__1 = c__;
  313. /* Computing 2nd power */
  314. d__2 = s;
  315. t = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2);
  316. /* Computing 2nd power */
  317. d__1 = s;
  318. /* Computing 2nd power */
  319. d__2 = c__;
  320. d__[nj] = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2);
  321. d__[pj] = t;
  322. --k2;
  323. i__ = 1;
  324. L90:
  325. if (k2 + i__ <= *n) {
  326. if (d__[pj] < d__[indxp[k2 + i__]]) {
  327. indxp[k2 + i__ - 1] = indxp[k2 + i__];
  328. indxp[k2 + i__] = pj;
  329. ++i__;
  330. goto L90;
  331. } else {
  332. indxp[k2 + i__ - 1] = pj;
  333. }
  334. } else {
  335. indxp[k2 + i__ - 1] = pj;
  336. }
  337. pj = nj;
  338. } else {
  339. ++(*k);
  340. dlamda[*k] = d__[pj];
  341. w[*k] = z__[pj];
  342. indxp[*k] = pj;
  343. pj = nj;
  344. }
  345. }
  346. goto L80;
  347. L100:
  348. /* Record the last eigenvalue. */
  349. ++(*k);
  350. dlamda[*k] = d__[pj];
  351. w[*k] = z__[pj];
  352. indxp[*k] = pj;
  353. /* Count up the total number of the various types of columns, then */
  354. /* form a permutation which positions the four column types into */
  355. /* four uniform groups (although one or more of these groups may be */
  356. /* empty). */
  357. for (j = 1; j <= 4; ++j) {
  358. ctot[j - 1] = 0;
  359. /* L110: */
  360. }
  361. i__1 = *n;
  362. for (j = 1; j <= i__1; ++j) {
  363. ct = coltyp[j];
  364. ++ctot[ct - 1];
  365. /* L120: */
  366. }
  367. /* PSM(*) = Position in SubMatrix (of types 1 through 4) */
  368. psm[0] = 1;
  369. psm[1] = ctot[0] + 1;
  370. psm[2] = psm[1] + ctot[1];
  371. psm[3] = psm[2] + ctot[2];
  372. *k = *n - ctot[3];
  373. /* Fill out the INDXC array so that the permutation which it induces */
  374. /* will place all type-1 columns first, all type-2 columns next, */
  375. /* then all type-3's, and finally all type-4's. */
  376. i__1 = *n;
  377. for (j = 1; j <= i__1; ++j) {
  378. js = indxp[j];
  379. ct = coltyp[js];
  380. indx[psm[ct - 1]] = js;
  381. indxc[psm[ct - 1]] = j;
  382. ++psm[ct - 1];
  383. /* L130: */
  384. }
  385. /* Sort the eigenvalues and corresponding eigenvectors into DLAMDA */
  386. /* and Q2 respectively. The eigenvalues/vectors which were not */
  387. /* deflated go into the first K slots of DLAMDA and Q2 respectively, */
  388. /* while those which were deflated go into the last N - K slots. */
  389. i__ = 1;
  390. iq1 = 1;
  391. iq2 = (ctot[0] + ctot[1]) * *n1 + 1;
  392. i__1 = ctot[0];
  393. for (j = 1; j <= i__1; ++j) {
  394. js = indx[i__];
  395. _starpu_dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
  396. z__[i__] = d__[js];
  397. ++i__;
  398. iq1 += *n1;
  399. /* L140: */
  400. }
  401. i__1 = ctot[1];
  402. for (j = 1; j <= i__1; ++j) {
  403. js = indx[i__];
  404. _starpu_dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
  405. _starpu_dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
  406. z__[i__] = d__[js];
  407. ++i__;
  408. iq1 += *n1;
  409. iq2 += n2;
  410. /* L150: */
  411. }
  412. i__1 = ctot[2];
  413. for (j = 1; j <= i__1; ++j) {
  414. js = indx[i__];
  415. _starpu_dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
  416. z__[i__] = d__[js];
  417. ++i__;
  418. iq2 += n2;
  419. /* L160: */
  420. }
  421. iq1 = iq2;
  422. i__1 = ctot[3];
  423. for (j = 1; j <= i__1; ++j) {
  424. js = indx[i__];
  425. _starpu_dcopy_(n, &q[js * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
  426. iq2 += *n;
  427. z__[i__] = d__[js];
  428. ++i__;
  429. /* L170: */
  430. }
  431. /* The deflated eigenvalues and their corresponding vectors go back */
  432. /* into the last N - K slots of D and Q respectively. */
  433. _starpu_dlacpy_("A", n, &ctot[3], &q2[iq1], n, &q[(*k + 1) * q_dim1 + 1], ldq);
  434. i__1 = *n - *k;
  435. _starpu_dcopy_(&i__1, &z__[*k + 1], &c__1, &d__[*k + 1], &c__1);
  436. /* Copy CTOT into COLTYP for referencing in DLAED3. */
  437. for (j = 1; j <= 4; ++j) {
  438. coltyp[j] = ctot[j - 1];
  439. /* L180: */
  440. }
  441. L190:
  442. return 0;
  443. /* End of DLAED2 */
  444. } /* _starpu_dlaed2_ */