dlaed8.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476
  1. /* dlaed8.f -- translated by f2c (version 20061008).
  2. You must link the resulting object file with libf2c:
  3. on Microsoft Windows system, link with libf2c.lib;
  4. on Linux or Unix systems, link with .../path/to/libf2c.a -lm
  5. or, if you install libf2c.a in a standard place, with -lf2c -lm
  6. -- in that order, at the end of the command line, as in
  7. cc *.o -lf2c -lm
  8. Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
  9. http://www.netlib.org/f2c/libf2c.zip
  10. */
  11. #include "f2c.h"
  12. #include "blaswrap.h"
  13. /* Table of constant values */
  14. static doublereal c_b3 = -1.;
  15. static integer c__1 = 1;
  16. /* Subroutine */ int _starpu_dlaed8_(integer *icompq, integer *k, integer *n, integer
  17. *qsiz, doublereal *d__, doublereal *q, integer *ldq, integer *indxq,
  18. doublereal *rho, integer *cutpnt, doublereal *z__, doublereal *dlamda,
  19. doublereal *q2, integer *ldq2, doublereal *w, integer *perm, integer
  20. *givptr, integer *givcol, doublereal *givnum, integer *indxp, integer
  21. *indx, integer *info)
  22. {
  23. /* System generated locals */
  24. integer q_dim1, q_offset, q2_dim1, q2_offset, i__1;
  25. doublereal d__1;
  26. /* Builtin functions */
  27. double sqrt(doublereal);
  28. /* Local variables */
  29. doublereal c__;
  30. integer i__, j;
  31. doublereal s, t;
  32. integer k2, n1, n2, jp, n1p1;
  33. doublereal eps, tau, tol;
  34. integer jlam, imax, jmax;
  35. extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *,
  36. doublereal *, integer *, doublereal *, doublereal *), _starpu_dscal_(
  37. integer *, doublereal *, doublereal *, integer *), _starpu_dcopy_(integer
  38. *, doublereal *, integer *, doublereal *, integer *);
  39. extern doublereal _starpu_dlapy2_(doublereal *, doublereal *), _starpu_dlamch_(char *);
  40. extern integer _starpu_idamax_(integer *, doublereal *, integer *);
  41. extern /* Subroutine */ int _starpu_dlamrg_(integer *, integer *, doublereal *,
  42. integer *, integer *, integer *), _starpu_dlacpy_(char *, integer *,
  43. integer *, doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *, integer *);
  44. /* -- LAPACK routine (version 3.2) -- */
  45. /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
  46. /* November 2006 */
  47. /* .. Scalar Arguments .. */
  48. /* .. */
  49. /* .. Array Arguments .. */
  50. /* .. */
  51. /* Purpose */
  52. /* ======= */
  53. /* DLAED8 merges the two sets of eigenvalues together into a single */
  54. /* sorted set. Then it tries to deflate the size of the problem. */
  55. /* There are two ways in which deflation can occur: when two or more */
  56. /* eigenvalues are close together or if there is a tiny element in the */
  57. /* Z vector. For each such occurrence the order of the related secular */
  58. /* equation problem is reduced by one. */
  59. /* Arguments */
  60. /* ========= */
  61. /* ICOMPQ (input) INTEGER */
  62. /* = 0: Compute eigenvalues only. */
  63. /* = 1: Compute eigenvectors of original dense symmetric matrix */
  64. /* also. On entry, Q contains the orthogonal matrix used */
  65. /* to reduce the original matrix to tridiagonal form. */
  66. /* K (output) INTEGER */
  67. /* The number of non-deflated eigenvalues, and the order of the */
  68. /* related secular equation. */
  69. /* N (input) INTEGER */
  70. /* The dimension of the symmetric tridiagonal matrix. N >= 0. */
  71. /* QSIZ (input) INTEGER */
  72. /* The dimension of the orthogonal matrix used to reduce */
  73. /* the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. */
  74. /* D (input/output) DOUBLE PRECISION array, dimension (N) */
  75. /* On entry, the eigenvalues of the two submatrices to be */
  76. /* combined. On exit, the trailing (N-K) updated eigenvalues */
  77. /* (those which were deflated) sorted into increasing order. */
  78. /* Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) */
  79. /* If ICOMPQ = 0, Q is not referenced. Otherwise, */
  80. /* on entry, Q contains the eigenvectors of the partially solved */
  81. /* system which has been previously updated in matrix */
  82. /* multiplies with other partially solved eigensystems. */
  83. /* On exit, Q contains the trailing (N-K) updated eigenvectors */
  84. /* (those which were deflated) in its last N-K columns. */
  85. /* LDQ (input) INTEGER */
  86. /* The leading dimension of the array Q. LDQ >= max(1,N). */
  87. /* INDXQ (input) INTEGER array, dimension (N) */
  88. /* The permutation which separately sorts the two sub-problems */
  89. /* in D into ascending order. Note that elements in the second */
  90. /* half of this permutation must first have CUTPNT added to */
  91. /* their values in order to be accurate. */
  92. /* RHO (input/output) DOUBLE PRECISION */
  93. /* On entry, the off-diagonal element associated with the rank-1 */
  94. /* cut which originally split the two submatrices which are now */
  95. /* being recombined. */
  96. /* On exit, RHO has been modified to the value required by */
  97. /* DLAED3. */
  98. /* CUTPNT (input) INTEGER */
  99. /* The location of the last eigenvalue in the leading */
  100. /* sub-matrix. min(1,N) <= CUTPNT <= N. */
  101. /* Z (input) DOUBLE PRECISION array, dimension (N) */
  102. /* On entry, Z contains the updating vector (the last row of */
  103. /* the first sub-eigenvector matrix and the first row of the */
  104. /* second sub-eigenvector matrix). */
  105. /* On exit, the contents of Z are destroyed by the updating */
  106. /* process. */
  107. /* DLAMDA (output) DOUBLE PRECISION array, dimension (N) */
  108. /* A copy of the first K eigenvalues which will be used by */
  109. /* DLAED3 to form the secular equation. */
  110. /* Q2 (output) DOUBLE PRECISION array, dimension (LDQ2,N) */
  111. /* If ICOMPQ = 0, Q2 is not referenced. Otherwise, */
  112. /* a copy of the first K eigenvectors which will be used by */
  113. /* DLAED7 in a matrix multiply (DGEMM) to update the new */
  114. /* eigenvectors. */
  115. /* LDQ2 (input) INTEGER */
  116. /* The leading dimension of the array Q2. LDQ2 >= max(1,N). */
  117. /* W (output) DOUBLE PRECISION array, dimension (N) */
  118. /* The first k values of the final deflation-altered z-vector and */
  119. /* will be passed to DLAED3. */
  120. /* PERM (output) INTEGER array, dimension (N) */
  121. /* The permutations (from deflation and sorting) to be applied */
  122. /* to each eigenblock. */
  123. /* GIVPTR (output) INTEGER */
  124. /* The number of Givens rotations which took place in this */
  125. /* subproblem. */
  126. /* GIVCOL (output) INTEGER array, dimension (2, N) */
  127. /* Each pair of numbers indicates a pair of columns to take place */
  128. /* in a Givens rotation. */
  129. /* GIVNUM (output) DOUBLE PRECISION array, dimension (2, N) */
  130. /* Each number indicates the S value to be used in the */
  131. /* corresponding Givens rotation. */
  132. /* INDXP (workspace) INTEGER array, dimension (N) */
  133. /* The permutation used to place deflated values of D at the end */
  134. /* of the array. INDXP(1:K) points to the nondeflated D-values */
  135. /* and INDXP(K+1:N) points to the deflated eigenvalues. */
  136. /* INDX (workspace) INTEGER array, dimension (N) */
  137. /* The permutation used to sort the contents of D into ascending */
  138. /* order. */
  139. /* INFO (output) INTEGER */
  140. /* = 0: successful exit. */
  141. /* < 0: if INFO = -i, the i-th argument had an illegal value. */
  142. /* Further Details */
  143. /* =============== */
  144. /* Based on contributions by */
  145. /* Jeff Rutter, Computer Science Division, University of California */
  146. /* at Berkeley, USA */
  147. /* ===================================================================== */
  148. /* .. Parameters .. */
  149. /* .. */
  150. /* .. Local Scalars .. */
  151. /* .. */
  152. /* .. External Functions .. */
  153. /* .. */
  154. /* .. External Subroutines .. */
  155. /* .. */
  156. /* .. Intrinsic Functions .. */
  157. /* .. */
  158. /* .. Executable Statements .. */
  159. /* Test the input parameters. */
  160. /* Parameter adjustments */
  161. --d__;
  162. q_dim1 = *ldq;
  163. q_offset = 1 + q_dim1;
  164. q -= q_offset;
  165. --indxq;
  166. --z__;
  167. --dlamda;
  168. q2_dim1 = *ldq2;
  169. q2_offset = 1 + q2_dim1;
  170. q2 -= q2_offset;
  171. --w;
  172. --perm;
  173. givcol -= 3;
  174. givnum -= 3;
  175. --indxp;
  176. --indx;
  177. /* Function Body */
  178. *info = 0;
  179. if (*icompq < 0 || *icompq > 1) {
  180. *info = -1;
  181. } else if (*n < 0) {
  182. *info = -3;
  183. } else if (*icompq == 1 && *qsiz < *n) {
  184. *info = -4;
  185. } else if (*ldq < max(1,*n)) {
  186. *info = -7;
  187. } else if (*cutpnt < min(1,*n) || *cutpnt > *n) {
  188. *info = -10;
  189. } else if (*ldq2 < max(1,*n)) {
  190. *info = -14;
  191. }
  192. if (*info != 0) {
  193. i__1 = -(*info);
  194. _starpu_xerbla_("DLAED8", &i__1);
  195. return 0;
  196. }
  197. /* Quick return if possible */
  198. if (*n == 0) {
  199. return 0;
  200. }
  201. n1 = *cutpnt;
  202. n2 = *n - n1;
  203. n1p1 = n1 + 1;
  204. if (*rho < 0.) {
  205. _starpu_dscal_(&n2, &c_b3, &z__[n1p1], &c__1);
  206. }
  207. /* Normalize z so that norm(z) = 1 */
  208. t = 1. / sqrt(2.);
  209. i__1 = *n;
  210. for (j = 1; j <= i__1; ++j) {
  211. indx[j] = j;
  212. /* L10: */
  213. }
  214. _starpu_dscal_(n, &t, &z__[1], &c__1);
  215. *rho = (d__1 = *rho * 2., abs(d__1));
  216. /* Sort the eigenvalues into increasing order */
  217. i__1 = *n;
  218. for (i__ = *cutpnt + 1; i__ <= i__1; ++i__) {
  219. indxq[i__] += *cutpnt;
  220. /* L20: */
  221. }
  222. i__1 = *n;
  223. for (i__ = 1; i__ <= i__1; ++i__) {
  224. dlamda[i__] = d__[indxq[i__]];
  225. w[i__] = z__[indxq[i__]];
  226. /* L30: */
  227. }
  228. i__ = 1;
  229. j = *cutpnt + 1;
  230. _starpu_dlamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]);
  231. i__1 = *n;
  232. for (i__ = 1; i__ <= i__1; ++i__) {
  233. d__[i__] = dlamda[indx[i__]];
  234. z__[i__] = w[indx[i__]];
  235. /* L40: */
  236. }
  237. /* Calculate the allowable deflation tolerence */
  238. imax = _starpu_idamax_(n, &z__[1], &c__1);
  239. jmax = _starpu_idamax_(n, &d__[1], &c__1);
  240. eps = _starpu_dlamch_("Epsilon");
  241. tol = eps * 8. * (d__1 = d__[jmax], abs(d__1));
  242. /* If the rank-1 modifier is small enough, no more needs to be done */
  243. /* except to reorganize Q so that its columns correspond with the */
  244. /* elements in D. */
  245. if (*rho * (d__1 = z__[imax], abs(d__1)) <= tol) {
  246. *k = 0;
  247. if (*icompq == 0) {
  248. i__1 = *n;
  249. for (j = 1; j <= i__1; ++j) {
  250. perm[j] = indxq[indx[j]];
  251. /* L50: */
  252. }
  253. } else {
  254. i__1 = *n;
  255. for (j = 1; j <= i__1; ++j) {
  256. perm[j] = indxq[indx[j]];
  257. _starpu_dcopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1
  258. + 1], &c__1);
  259. /* L60: */
  260. }
  261. _starpu_dlacpy_("A", qsiz, n, &q2[q2_dim1 + 1], ldq2, &q[q_dim1 + 1], ldq);
  262. }
  263. return 0;
  264. }
  265. /* If there are multiple eigenvalues then the problem deflates. Here */
  266. /* the number of equal eigenvalues are found. As each equal */
  267. /* eigenvalue is found, an elementary reflector is computed to rotate */
  268. /* the corresponding eigensubspace so that the corresponding */
  269. /* components of Z are zero in this new basis. */
  270. *k = 0;
  271. *givptr = 0;
  272. k2 = *n + 1;
  273. i__1 = *n;
  274. for (j = 1; j <= i__1; ++j) {
  275. if (*rho * (d__1 = z__[j], abs(d__1)) <= tol) {
  276. /* Deflate due to small z component. */
  277. --k2;
  278. indxp[k2] = j;
  279. if (j == *n) {
  280. goto L110;
  281. }
  282. } else {
  283. jlam = j;
  284. goto L80;
  285. }
  286. /* L70: */
  287. }
  288. L80:
  289. ++j;
  290. if (j > *n) {
  291. goto L100;
  292. }
  293. if (*rho * (d__1 = z__[j], abs(d__1)) <= tol) {
  294. /* Deflate due to small z component. */
  295. --k2;
  296. indxp[k2] = j;
  297. } else {
  298. /* Check if eigenvalues are close enough to allow deflation. */
  299. s = z__[jlam];
  300. c__ = z__[j];
  301. /* Find sqrt(a**2+b**2) without overflow or */
  302. /* destructive underflow. */
  303. tau = _starpu_dlapy2_(&c__, &s);
  304. t = d__[j] - d__[jlam];
  305. c__ /= tau;
  306. s = -s / tau;
  307. if ((d__1 = t * c__ * s, abs(d__1)) <= tol) {
  308. /* Deflation is possible. */
  309. z__[j] = tau;
  310. z__[jlam] = 0.;
  311. /* Record the appropriate Givens rotation */
  312. ++(*givptr);
  313. givcol[(*givptr << 1) + 1] = indxq[indx[jlam]];
  314. givcol[(*givptr << 1) + 2] = indxq[indx[j]];
  315. givnum[(*givptr << 1) + 1] = c__;
  316. givnum[(*givptr << 1) + 2] = s;
  317. if (*icompq == 1) {
  318. _starpu_drot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[
  319. indxq[indx[j]] * q_dim1 + 1], &c__1, &c__, &s);
  320. }
  321. t = d__[jlam] * c__ * c__ + d__[j] * s * s;
  322. d__[j] = d__[jlam] * s * s + d__[j] * c__ * c__;
  323. d__[jlam] = t;
  324. --k2;
  325. i__ = 1;
  326. L90:
  327. if (k2 + i__ <= *n) {
  328. if (d__[jlam] < d__[indxp[k2 + i__]]) {
  329. indxp[k2 + i__ - 1] = indxp[k2 + i__];
  330. indxp[k2 + i__] = jlam;
  331. ++i__;
  332. goto L90;
  333. } else {
  334. indxp[k2 + i__ - 1] = jlam;
  335. }
  336. } else {
  337. indxp[k2 + i__ - 1] = jlam;
  338. }
  339. jlam = j;
  340. } else {
  341. ++(*k);
  342. w[*k] = z__[jlam];
  343. dlamda[*k] = d__[jlam];
  344. indxp[*k] = jlam;
  345. jlam = j;
  346. }
  347. }
  348. goto L80;
  349. L100:
  350. /* Record the last eigenvalue. */
  351. ++(*k);
  352. w[*k] = z__[jlam];
  353. dlamda[*k] = d__[jlam];
  354. indxp[*k] = jlam;
  355. L110:
  356. /* Sort the eigenvalues and corresponding eigenvectors into DLAMDA */
  357. /* and Q2 respectively. The eigenvalues/vectors which were not */
  358. /* deflated go into the first K slots of DLAMDA and Q2 respectively, */
  359. /* while those which were deflated go into the last N - K slots. */
  360. if (*icompq == 0) {
  361. i__1 = *n;
  362. for (j = 1; j <= i__1; ++j) {
  363. jp = indxp[j];
  364. dlamda[j] = d__[jp];
  365. perm[j] = indxq[indx[jp]];
  366. /* L120: */
  367. }
  368. } else {
  369. i__1 = *n;
  370. for (j = 1; j <= i__1; ++j) {
  371. jp = indxp[j];
  372. dlamda[j] = d__[jp];
  373. perm[j] = indxq[indx[jp]];
  374. _starpu_dcopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1]
  375. , &c__1);
  376. /* L130: */
  377. }
  378. }
  379. /* The deflated eigenvalues and their corresponding vectors go back */
  380. /* into the last N - K slots of D and Q respectively. */
  381. if (*k < *n) {
  382. if (*icompq == 0) {
  383. i__1 = *n - *k;
  384. _starpu_dcopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1);
  385. } else {
  386. i__1 = *n - *k;
  387. _starpu_dcopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1);
  388. i__1 = *n - *k;
  389. _starpu_dlacpy_("A", qsiz, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(*
  390. k + 1) * q_dim1 + 1], ldq);
  391. }
  392. }
  393. return 0;
  394. /* End of DLAED8 */
  395. } /* _starpu_dlaed8_ */