dlaed0.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441
  1. /* dlaed0.f -- translated by f2c (version 20061008).
  2. You must link the resulting object file with libf2c:
  3. on Microsoft Windows system, link with libf2c.lib;
  4. on Linux or Unix systems, link with .../path/to/libf2c.a -lm
  5. or, if you install libf2c.a in a standard place, with -lf2c -lm
  6. -- in that order, at the end of the command line, as in
  7. cc *.o -lf2c -lm
  8. Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
  9. http://www.netlib.org/f2c/libf2c.zip
  10. */
  11. #include "f2c.h"
  12. #include "blaswrap.h"
  13. /* Table of constant values */
  14. static integer c__9 = 9;
  15. static integer c__0 = 0;
  16. static integer c__2 = 2;
  17. static doublereal c_b23 = 1.;
  18. static doublereal c_b24 = 0.;
  19. static integer c__1 = 1;
  20. /* Subroutine */ int _starpu_dlaed0_(integer *icompq, integer *qsiz, integer *n,
  21. doublereal *d__, doublereal *e, doublereal *q, integer *ldq,
  22. doublereal *qstore, integer *ldqs, doublereal *work, integer *iwork,
  23. integer *info)
  24. {
  25. /* System generated locals */
  26. integer q_dim1, q_offset, qstore_dim1, qstore_offset, i__1, i__2;
  27. doublereal d__1;
  28. /* Builtin functions */
  29. double log(doublereal);
  30. integer pow_ii(integer *, integer *);
  31. /* Local variables */
  32. integer i__, j, k, iq, lgn, msd2, smm1, spm1, spm2;
  33. doublereal temp;
  34. integer curr;
  35. extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *,
  36. integer *, doublereal *, doublereal *, integer *, doublereal *,
  37. integer *, doublereal *, doublereal *, integer *);
  38. integer iperm;
  39. extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *,
  40. doublereal *, integer *);
  41. integer indxq, iwrem;
  42. extern /* Subroutine */ int _starpu_dlaed1_(integer *, doublereal *, doublereal *,
  43. integer *, integer *, doublereal *, integer *, doublereal *,
  44. integer *, integer *);
  45. integer iqptr;
  46. extern /* Subroutine */ int _starpu_dlaed7_(integer *, integer *, integer *,
  47. integer *, integer *, integer *, doublereal *, doublereal *,
  48. integer *, integer *, doublereal *, integer *, doublereal *,
  49. integer *, integer *, integer *, integer *, integer *, doublereal
  50. *, doublereal *, integer *, integer *);
  51. integer tlvls;
  52. extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *,
  53. doublereal *, integer *, doublereal *, integer *);
  54. integer igivcl;
  55. extern /* Subroutine */ int _starpu_xerbla_(char *, integer *);
  56. extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *,
  57. integer *, integer *);
  58. integer igivnm, submat, curprb, subpbs, igivpt;
  59. extern /* Subroutine */ int _starpu_dsteqr_(char *, integer *, doublereal *,
  60. doublereal *, doublereal *, integer *, doublereal *, integer *);
  61. integer curlvl, matsiz, iprmpt, smlsiz;
  62. /* -- LAPACK routine (version 3.2) -- */
  63. /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
  64. /* November 2006 */
  65. /* .. Scalar Arguments .. */
  66. /* .. */
  67. /* .. Array Arguments .. */
  68. /* .. */
  69. /* Purpose */
  70. /* ======= */
  71. /* DLAED0 computes all eigenvalues and corresponding eigenvectors of a */
  72. /* symmetric tridiagonal matrix using the divide and conquer method. */
  73. /* Arguments */
  74. /* ========= */
  75. /* ICOMPQ (input) INTEGER */
  76. /* = 0: Compute eigenvalues only. */
  77. /* = 1: Compute eigenvectors of original dense symmetric matrix */
  78. /* also. On entry, Q contains the orthogonal matrix used */
  79. /* to reduce the original matrix to tridiagonal form. */
  80. /* = 2: Compute eigenvalues and eigenvectors of tridiagonal */
  81. /* matrix. */
  82. /* QSIZ (input) INTEGER */
  83. /* The dimension of the orthogonal matrix used to reduce */
  84. /* the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. */
  85. /* N (input) INTEGER */
  86. /* The dimension of the symmetric tridiagonal matrix. N >= 0. */
  87. /* D (input/output) DOUBLE PRECISION array, dimension (N) */
  88. /* On entry, the main diagonal of the tridiagonal matrix. */
  89. /* On exit, its eigenvalues. */
  90. /* E (input) DOUBLE PRECISION array, dimension (N-1) */
  91. /* The off-diagonal elements of the tridiagonal matrix. */
  92. /* On exit, E has been destroyed. */
  93. /* Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N) */
  94. /* On entry, Q must contain an N-by-N orthogonal matrix. */
  95. /* If ICOMPQ = 0 Q is not referenced. */
  96. /* If ICOMPQ = 1 On entry, Q is a subset of the columns of the */
  97. /* orthogonal matrix used to reduce the full */
  98. /* matrix to tridiagonal form corresponding to */
  99. /* the subset of the full matrix which is being */
  100. /* decomposed at this time. */
  101. /* If ICOMPQ = 2 On entry, Q will be the identity matrix. */
  102. /* On exit, Q contains the eigenvectors of the */
  103. /* tridiagonal matrix. */
  104. /* LDQ (input) INTEGER */
  105. /* The leading dimension of the array Q. If eigenvectors are */
  106. /* desired, then LDQ >= max(1,N). In any case, LDQ >= 1. */
  107. /* QSTORE (workspace) DOUBLE PRECISION array, dimension (LDQS, N) */
  108. /* Referenced only when ICOMPQ = 1. Used to store parts of */
  109. /* the eigenvector matrix when the updating matrix multiplies */
  110. /* take place. */
  111. /* LDQS (input) INTEGER */
  112. /* The leading dimension of the array QSTORE. If ICOMPQ = 1, */
  113. /* then LDQS >= max(1,N). In any case, LDQS >= 1. */
  114. /* WORK (workspace) DOUBLE PRECISION array, */
  115. /* If ICOMPQ = 0 or 1, the dimension of WORK must be at least */
  116. /* 1 + 3*N + 2*N*lg N + 2*N**2 */
  117. /* ( lg( N ) = smallest integer k */
  118. /* such that 2^k >= N ) */
  119. /* If ICOMPQ = 2, the dimension of WORK must be at least */
  120. /* 4*N + N**2. */
  121. /* IWORK (workspace) INTEGER array, */
  122. /* If ICOMPQ = 0 or 1, the dimension of IWORK must be at least */
  123. /* 6 + 6*N + 5*N*lg N. */
  124. /* ( lg( N ) = smallest integer k */
  125. /* such that 2^k >= N ) */
  126. /* If ICOMPQ = 2, the dimension of IWORK must be at least */
  127. /* 3 + 5*N. */
  128. /* INFO (output) INTEGER */
  129. /* = 0: successful exit. */
  130. /* < 0: if INFO = -i, the i-th argument had an illegal value. */
  131. /* > 0: The algorithm failed to compute an eigenvalue while */
  132. /* working on the submatrix lying in rows and columns */
  133. /* INFO/(N+1) through mod(INFO,N+1). */
  134. /* Further Details */
  135. /* =============== */
  136. /* Based on contributions by */
  137. /* Jeff Rutter, Computer Science Division, University of California */
  138. /* at Berkeley, USA */
  139. /* ===================================================================== */
  140. /* .. Parameters .. */
  141. /* .. */
  142. /* .. Local Scalars .. */
  143. /* .. */
  144. /* .. External Subroutines .. */
  145. /* .. */
  146. /* .. External Functions .. */
  147. /* .. */
  148. /* .. Intrinsic Functions .. */
  149. /* .. */
  150. /* .. Executable Statements .. */
  151. /* Test the input parameters. */
  152. /* Parameter adjustments */
  153. --d__;
  154. --e;
  155. q_dim1 = *ldq;
  156. q_offset = 1 + q_dim1;
  157. q -= q_offset;
  158. qstore_dim1 = *ldqs;
  159. qstore_offset = 1 + qstore_dim1;
  160. qstore -= qstore_offset;
  161. --work;
  162. --iwork;
  163. /* Function Body */
  164. *info = 0;
  165. if (*icompq < 0 || *icompq > 2) {
  166. *info = -1;
  167. } else if (*icompq == 1 && *qsiz < max(0,*n)) {
  168. *info = -2;
  169. } else if (*n < 0) {
  170. *info = -3;
  171. } else if (*ldq < max(1,*n)) {
  172. *info = -7;
  173. } else if (*ldqs < max(1,*n)) {
  174. *info = -9;
  175. }
  176. if (*info != 0) {
  177. i__1 = -(*info);
  178. _starpu_xerbla_("DLAED0", &i__1);
  179. return 0;
  180. }
  181. /* Quick return if possible */
  182. if (*n == 0) {
  183. return 0;
  184. }
  185. smlsiz = _starpu_ilaenv_(&c__9, "DLAED0", " ", &c__0, &c__0, &c__0, &c__0);
  186. /* Determine the size and placement of the submatrices, and save in */
  187. /* the leading elements of IWORK. */
  188. iwork[1] = *n;
  189. subpbs = 1;
  190. tlvls = 0;
  191. L10:
  192. if (iwork[subpbs] > smlsiz) {
  193. for (j = subpbs; j >= 1; --j) {
  194. iwork[j * 2] = (iwork[j] + 1) / 2;
  195. iwork[(j << 1) - 1] = iwork[j] / 2;
  196. /* L20: */
  197. }
  198. ++tlvls;
  199. subpbs <<= 1;
  200. goto L10;
  201. }
  202. i__1 = subpbs;
  203. for (j = 2; j <= i__1; ++j) {
  204. iwork[j] += iwork[j - 1];
  205. /* L30: */
  206. }
  207. /* Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1 */
  208. /* using rank-1 modifications (cuts). */
  209. spm1 = subpbs - 1;
  210. i__1 = spm1;
  211. for (i__ = 1; i__ <= i__1; ++i__) {
  212. submat = iwork[i__] + 1;
  213. smm1 = submat - 1;
  214. d__[smm1] -= (d__1 = e[smm1], abs(d__1));
  215. d__[submat] -= (d__1 = e[smm1], abs(d__1));
  216. /* L40: */
  217. }
  218. indxq = (*n << 2) + 3;
  219. if (*icompq != 2) {
  220. /* Set up workspaces for eigenvalues only/accumulate new vectors */
  221. /* routine */
  222. temp = log((doublereal) (*n)) / log(2.);
  223. lgn = (integer) temp;
  224. if (pow_ii(&c__2, &lgn) < *n) {
  225. ++lgn;
  226. }
  227. if (pow_ii(&c__2, &lgn) < *n) {
  228. ++lgn;
  229. }
  230. iprmpt = indxq + *n + 1;
  231. iperm = iprmpt + *n * lgn;
  232. iqptr = iperm + *n * lgn;
  233. igivpt = iqptr + *n + 2;
  234. igivcl = igivpt + *n * lgn;
  235. igivnm = 1;
  236. iq = igivnm + (*n << 1) * lgn;
  237. /* Computing 2nd power */
  238. i__1 = *n;
  239. iwrem = iq + i__1 * i__1 + 1;
  240. /* Initialize pointers */
  241. i__1 = subpbs;
  242. for (i__ = 0; i__ <= i__1; ++i__) {
  243. iwork[iprmpt + i__] = 1;
  244. iwork[igivpt + i__] = 1;
  245. /* L50: */
  246. }
  247. iwork[iqptr] = 1;
  248. }
  249. /* Solve each submatrix eigenproblem at the bottom of the divide and */
  250. /* conquer tree. */
  251. curr = 0;
  252. i__1 = spm1;
  253. for (i__ = 0; i__ <= i__1; ++i__) {
  254. if (i__ == 0) {
  255. submat = 1;
  256. matsiz = iwork[1];
  257. } else {
  258. submat = iwork[i__] + 1;
  259. matsiz = iwork[i__ + 1] - iwork[i__];
  260. }
  261. if (*icompq == 2) {
  262. _starpu_dsteqr_("I", &matsiz, &d__[submat], &e[submat], &q[submat +
  263. submat * q_dim1], ldq, &work[1], info);
  264. if (*info != 0) {
  265. goto L130;
  266. }
  267. } else {
  268. _starpu_dsteqr_("I", &matsiz, &d__[submat], &e[submat], &work[iq - 1 +
  269. iwork[iqptr + curr]], &matsiz, &work[1], info);
  270. if (*info != 0) {
  271. goto L130;
  272. }
  273. if (*icompq == 1) {
  274. _starpu_dgemm_("N", "N", qsiz, &matsiz, &matsiz, &c_b23, &q[submat *
  275. q_dim1 + 1], ldq, &work[iq - 1 + iwork[iqptr + curr]],
  276. &matsiz, &c_b24, &qstore[submat * qstore_dim1 + 1],
  277. ldqs);
  278. }
  279. /* Computing 2nd power */
  280. i__2 = matsiz;
  281. iwork[iqptr + curr + 1] = iwork[iqptr + curr] + i__2 * i__2;
  282. ++curr;
  283. }
  284. k = 1;
  285. i__2 = iwork[i__ + 1];
  286. for (j = submat; j <= i__2; ++j) {
  287. iwork[indxq + j] = k;
  288. ++k;
  289. /* L60: */
  290. }
  291. /* L70: */
  292. }
  293. /* Successively merge eigensystems of adjacent submatrices */
  294. /* into eigensystem for the corresponding larger matrix. */
  295. /* while ( SUBPBS > 1 ) */
  296. curlvl = 1;
  297. L80:
  298. if (subpbs > 1) {
  299. spm2 = subpbs - 2;
  300. i__1 = spm2;
  301. for (i__ = 0; i__ <= i__1; i__ += 2) {
  302. if (i__ == 0) {
  303. submat = 1;
  304. matsiz = iwork[2];
  305. msd2 = iwork[1];
  306. curprb = 0;
  307. } else {
  308. submat = iwork[i__] + 1;
  309. matsiz = iwork[i__ + 2] - iwork[i__];
  310. msd2 = matsiz / 2;
  311. ++curprb;
  312. }
  313. /* Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2) */
  314. /* into an eigensystem of size MATSIZ. */
  315. /* DLAED1 is used only for the full eigensystem of a tridiagonal */
  316. /* matrix. */
  317. /* DLAED7 handles the cases in which eigenvalues only or eigenvalues */
  318. /* and eigenvectors of a full symmetric matrix (which was reduced to */
  319. /* tridiagonal form) are desired. */
  320. if (*icompq == 2) {
  321. _starpu_dlaed1_(&matsiz, &d__[submat], &q[submat + submat * q_dim1],
  322. ldq, &iwork[indxq + submat], &e[submat + msd2 - 1], &
  323. msd2, &work[1], &iwork[subpbs + 1], info);
  324. } else {
  325. _starpu_dlaed7_(icompq, &matsiz, qsiz, &tlvls, &curlvl, &curprb, &d__[
  326. submat], &qstore[submat * qstore_dim1 + 1], ldqs, &
  327. iwork[indxq + submat], &e[submat + msd2 - 1], &msd2, &
  328. work[iq], &iwork[iqptr], &iwork[iprmpt], &iwork[iperm]
  329. , &iwork[igivpt], &iwork[igivcl], &work[igivnm], &
  330. work[iwrem], &iwork[subpbs + 1], info);
  331. }
  332. if (*info != 0) {
  333. goto L130;
  334. }
  335. iwork[i__ / 2 + 1] = iwork[i__ + 2];
  336. /* L90: */
  337. }
  338. subpbs /= 2;
  339. ++curlvl;
  340. goto L80;
  341. }
  342. /* end while */
  343. /* Re-merge the eigenvalues/vectors which were deflated at the final */
  344. /* merge step. */
  345. if (*icompq == 1) {
  346. i__1 = *n;
  347. for (i__ = 1; i__ <= i__1; ++i__) {
  348. j = iwork[indxq + i__];
  349. work[i__] = d__[j];
  350. _starpu_dcopy_(qsiz, &qstore[j * qstore_dim1 + 1], &c__1, &q[i__ * q_dim1
  351. + 1], &c__1);
  352. /* L100: */
  353. }
  354. _starpu_dcopy_(n, &work[1], &c__1, &d__[1], &c__1);
  355. } else if (*icompq == 2) {
  356. i__1 = *n;
  357. for (i__ = 1; i__ <= i__1; ++i__) {
  358. j = iwork[indxq + i__];
  359. work[i__] = d__[j];
  360. _starpu_dcopy_(n, &q[j * q_dim1 + 1], &c__1, &work[*n * i__ + 1], &c__1);
  361. /* L110: */
  362. }
  363. _starpu_dcopy_(n, &work[1], &c__1, &d__[1], &c__1);
  364. _starpu_dlacpy_("A", n, n, &work[*n + 1], n, &q[q_offset], ldq);
  365. } else {
  366. i__1 = *n;
  367. for (i__ = 1; i__ <= i__1; ++i__) {
  368. j = iwork[indxq + i__];
  369. work[i__] = d__[j];
  370. /* L120: */
  371. }
  372. _starpu_dcopy_(n, &work[1], &c__1, &d__[1], &c__1);
  373. }
  374. goto L140;
  375. L130:
  376. *info = submat * (*n + 1) + submat + matsiz - 1;
  377. L140:
  378. return 0;
  379. /* End of DLAED0 */
  380. } /* _starpu_dlaed0_ */