dstein.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453
  1. /* dstein.f -- translated by f2c (version 20061008).
  2. You must link the resulting object file with libf2c:
  3. on Microsoft Windows system, link with libf2c.lib;
  4. on Linux or Unix systems, link with .../path/to/libf2c.a -lm
  5. or, if you install libf2c.a in a standard place, with -lf2c -lm
  6. -- in that order, at the end of the command line, as in
  7. cc *.o -lf2c -lm
  8. Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
  9. http://www.netlib.org/f2c/libf2c.zip
  10. */
  11. #include "f2c.h"
  12. #include "blaswrap.h"
  13. /* Table of constant values */
  14. static integer c__2 = 2;
  15. static integer c__1 = 1;
  16. static integer c_n1 = -1;
  17. /* Subroutine */ int _starpu_dstein_(integer *n, doublereal *d__, doublereal *e,
  18. integer *m, doublereal *w, integer *iblock, integer *isplit,
  19. doublereal *z__, integer *ldz, doublereal *work, integer *iwork,
  20. integer *ifail, integer *info)
  21. {
  22. /* System generated locals */
  23. integer z_dim1, z_offset, i__1, i__2, i__3;
  24. doublereal d__1, d__2, d__3, d__4, d__5;
  25. /* Builtin functions */
  26. double sqrt(doublereal);
  27. /* Local variables */
  28. integer i__, j, b1, j1, bn;
  29. doublereal xj, scl, eps, sep, nrm, tol;
  30. integer its;
  31. doublereal xjm, ztr, eps1;
  32. integer jblk, nblk;
  33. extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *,
  34. integer *);
  35. integer jmax;
  36. extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *);
  37. extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *,
  38. integer *);
  39. integer iseed[4], gpind, iinfo;
  40. extern doublereal _starpu_dasum_(integer *, doublereal *, integer *);
  41. extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *,
  42. doublereal *, integer *), _starpu_daxpy_(integer *, doublereal *,
  43. doublereal *, integer *, doublereal *, integer *);
  44. doublereal ortol;
  45. integer indrv1, indrv2, indrv3, indrv4, indrv5;
  46. extern doublereal _starpu_dlamch_(char *);
  47. extern /* Subroutine */ int _starpu_dlagtf_(integer *, doublereal *, doublereal *,
  48. doublereal *, doublereal *, doublereal *, doublereal *, integer *
  49. , integer *);
  50. extern integer _starpu_idamax_(integer *, doublereal *, integer *);
  51. extern /* Subroutine */ int _starpu_xerbla_(char *, integer *), _starpu_dlagts_(
  52. integer *, integer *, doublereal *, doublereal *, doublereal *,
  53. doublereal *, integer *, doublereal *, doublereal *, integer *);
  54. integer nrmchk;
  55. extern /* Subroutine */ int _starpu_dlarnv_(integer *, integer *, integer *,
  56. doublereal *);
  57. integer blksiz;
  58. doublereal onenrm, dtpcrt, pertol;
  59. /* -- LAPACK routine (version 3.2) -- */
  60. /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
  61. /* November 2006 */
  62. /* .. Scalar Arguments .. */
  63. /* .. */
  64. /* .. Array Arguments .. */
  65. /* .. */
  66. /* Purpose */
  67. /* ======= */
  68. /* DSTEIN computes the eigenvectors of a real symmetric tridiagonal */
  69. /* matrix T corresponding to specified eigenvalues, using inverse */
  70. /* iteration. */
  71. /* The maximum number of iterations allowed for each eigenvector is */
  72. /* specified by an internal parameter MAXITS (currently set to 5). */
  73. /* Arguments */
  74. /* ========= */
  75. /* N (input) INTEGER */
  76. /* The order of the matrix. N >= 0. */
  77. /* D (input) DOUBLE PRECISION array, dimension (N) */
  78. /* The n diagonal elements of the tridiagonal matrix T. */
  79. /* E (input) DOUBLE PRECISION array, dimension (N-1) */
  80. /* The (n-1) subdiagonal elements of the tridiagonal matrix */
  81. /* T, in elements 1 to N-1. */
  82. /* M (input) INTEGER */
  83. /* The number of eigenvectors to be found. 0 <= M <= N. */
  84. /* W (input) DOUBLE PRECISION array, dimension (N) */
  85. /* The first M elements of W contain the eigenvalues for */
  86. /* which eigenvectors are to be computed. The eigenvalues */
  87. /* should be grouped by split-off block and ordered from */
  88. /* smallest to largest within the block. ( The output array */
  89. /* W from DSTEBZ with ORDER = 'B' is expected here. ) */
  90. /* IBLOCK (input) INTEGER array, dimension (N) */
  91. /* The submatrix indices associated with the corresponding */
  92. /* eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to */
  93. /* the first submatrix from the top, =2 if W(i) belongs to */
  94. /* the second submatrix, etc. ( The output array IBLOCK */
  95. /* from DSTEBZ is expected here. ) */
  96. /* ISPLIT (input) INTEGER array, dimension (N) */
  97. /* The splitting points, at which T breaks up into submatrices. */
  98. /* The first submatrix consists of rows/columns 1 to */
  99. /* ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 */
  100. /* through ISPLIT( 2 ), etc. */
  101. /* ( The output array ISPLIT from DSTEBZ is expected here. ) */
  102. /* Z (output) DOUBLE PRECISION array, dimension (LDZ, M) */
  103. /* The computed eigenvectors. The eigenvector associated */
  104. /* with the eigenvalue W(i) is stored in the i-th column of */
  105. /* Z. Any vector which fails to converge is set to its current */
  106. /* iterate after MAXITS iterations. */
  107. /* LDZ (input) INTEGER */
  108. /* The leading dimension of the array Z. LDZ >= max(1,N). */
  109. /* WORK (workspace) DOUBLE PRECISION array, dimension (5*N) */
  110. /* IWORK (workspace) INTEGER array, dimension (N) */
  111. /* IFAIL (output) INTEGER array, dimension (M) */
  112. /* On normal exit, all elements of IFAIL are zero. */
  113. /* If one or more eigenvectors fail to converge after */
  114. /* MAXITS iterations, then their indices are stored in */
  115. /* array IFAIL. */
  116. /* INFO (output) INTEGER */
  117. /* = 0: successful exit. */
  118. /* < 0: if INFO = -i, the i-th argument had an illegal value */
  119. /* > 0: if INFO = i, then i eigenvectors failed to converge */
  120. /* in MAXITS iterations. Their indices are stored in */
  121. /* array IFAIL. */
  122. /* Internal Parameters */
  123. /* =================== */
  124. /* MAXITS INTEGER, default = 5 */
  125. /* The maximum number of iterations performed. */
  126. /* EXTRA INTEGER, default = 2 */
  127. /* The number of iterations performed after norm growth */
  128. /* criterion is satisfied, should be at least 1. */
  129. /* ===================================================================== */
  130. /* .. Parameters .. */
  131. /* .. */
  132. /* .. Local Scalars .. */
  133. /* .. */
  134. /* .. Local Arrays .. */
  135. /* .. */
  136. /* .. External Functions .. */
  137. /* .. */
  138. /* .. External Subroutines .. */
  139. /* .. */
  140. /* .. Intrinsic Functions .. */
  141. /* .. */
  142. /* .. Executable Statements .. */
  143. /* Test the input parameters. */
  144. /* Parameter adjustments */
  145. --d__;
  146. --e;
  147. --w;
  148. --iblock;
  149. --isplit;
  150. z_dim1 = *ldz;
  151. z_offset = 1 + z_dim1;
  152. z__ -= z_offset;
  153. --work;
  154. --iwork;
  155. --ifail;
  156. /* Function Body */
  157. *info = 0;
  158. i__1 = *m;
  159. for (i__ = 1; i__ <= i__1; ++i__) {
  160. ifail[i__] = 0;
  161. /* L10: */
  162. }
  163. if (*n < 0) {
  164. *info = -1;
  165. } else if (*m < 0 || *m > *n) {
  166. *info = -4;
  167. } else if (*ldz < max(1,*n)) {
  168. *info = -9;
  169. } else {
  170. i__1 = *m;
  171. for (j = 2; j <= i__1; ++j) {
  172. if (iblock[j] < iblock[j - 1]) {
  173. *info = -6;
  174. goto L30;
  175. }
  176. if (iblock[j] == iblock[j - 1] && w[j] < w[j - 1]) {
  177. *info = -5;
  178. goto L30;
  179. }
  180. /* L20: */
  181. }
  182. L30:
  183. ;
  184. }
  185. if (*info != 0) {
  186. i__1 = -(*info);
  187. _starpu_xerbla_("DSTEIN", &i__1);
  188. return 0;
  189. }
  190. /* Quick return if possible */
  191. if (*n == 0 || *m == 0) {
  192. return 0;
  193. } else if (*n == 1) {
  194. z__[z_dim1 + 1] = 1.;
  195. return 0;
  196. }
  197. /* Get machine constants. */
  198. eps = _starpu_dlamch_("Precision");
  199. /* Initialize seed for random number generator DLARNV. */
  200. for (i__ = 1; i__ <= 4; ++i__) {
  201. iseed[i__ - 1] = 1;
  202. /* L40: */
  203. }
  204. /* Initialize pointers. */
  205. indrv1 = 0;
  206. indrv2 = indrv1 + *n;
  207. indrv3 = indrv2 + *n;
  208. indrv4 = indrv3 + *n;
  209. indrv5 = indrv4 + *n;
  210. /* Compute eigenvectors of matrix blocks. */
  211. j1 = 1;
  212. i__1 = iblock[*m];
  213. for (nblk = 1; nblk <= i__1; ++nblk) {
  214. /* Find starting and ending indices of block nblk. */
  215. if (nblk == 1) {
  216. b1 = 1;
  217. } else {
  218. b1 = isplit[nblk - 1] + 1;
  219. }
  220. bn = isplit[nblk];
  221. blksiz = bn - b1 + 1;
  222. if (blksiz == 1) {
  223. goto L60;
  224. }
  225. gpind = b1;
  226. /* Compute reorthogonalization criterion and stopping criterion. */
  227. onenrm = (d__1 = d__[b1], abs(d__1)) + (d__2 = e[b1], abs(d__2));
  228. /* Computing MAX */
  229. d__3 = onenrm, d__4 = (d__1 = d__[bn], abs(d__1)) + (d__2 = e[bn - 1],
  230. abs(d__2));
  231. onenrm = max(d__3,d__4);
  232. i__2 = bn - 1;
  233. for (i__ = b1 + 1; i__ <= i__2; ++i__) {
  234. /* Computing MAX */
  235. d__4 = onenrm, d__5 = (d__1 = d__[i__], abs(d__1)) + (d__2 = e[
  236. i__ - 1], abs(d__2)) + (d__3 = e[i__], abs(d__3));
  237. onenrm = max(d__4,d__5);
  238. /* L50: */
  239. }
  240. ortol = onenrm * .001;
  241. dtpcrt = sqrt(.1 / blksiz);
  242. /* Loop through eigenvalues of block nblk. */
  243. L60:
  244. jblk = 0;
  245. i__2 = *m;
  246. for (j = j1; j <= i__2; ++j) {
  247. if (iblock[j] != nblk) {
  248. j1 = j;
  249. goto L160;
  250. }
  251. ++jblk;
  252. xj = w[j];
  253. /* Skip all the work if the block size is one. */
  254. if (blksiz == 1) {
  255. work[indrv1 + 1] = 1.;
  256. goto L120;
  257. }
  258. /* If eigenvalues j and j-1 are too close, add a relatively */
  259. /* small perturbation. */
  260. if (jblk > 1) {
  261. eps1 = (d__1 = eps * xj, abs(d__1));
  262. pertol = eps1 * 10.;
  263. sep = xj - xjm;
  264. if (sep < pertol) {
  265. xj = xjm + pertol;
  266. }
  267. }
  268. its = 0;
  269. nrmchk = 0;
  270. /* Get random starting vector. */
  271. _starpu_dlarnv_(&c__2, iseed, &blksiz, &work[indrv1 + 1]);
  272. /* Copy the matrix T so it won't be destroyed in factorization. */
  273. _starpu_dcopy_(&blksiz, &d__[b1], &c__1, &work[indrv4 + 1], &c__1);
  274. i__3 = blksiz - 1;
  275. _starpu_dcopy_(&i__3, &e[b1], &c__1, &work[indrv2 + 2], &c__1);
  276. i__3 = blksiz - 1;
  277. _starpu_dcopy_(&i__3, &e[b1], &c__1, &work[indrv3 + 1], &c__1);
  278. /* Compute LU factors with partial pivoting ( PT = LU ) */
  279. tol = 0.;
  280. _starpu_dlagtf_(&blksiz, &work[indrv4 + 1], &xj, &work[indrv2 + 2], &work[
  281. indrv3 + 1], &tol, &work[indrv5 + 1], &iwork[1], &iinfo);
  282. /* Update iteration count. */
  283. L70:
  284. ++its;
  285. if (its > 5) {
  286. goto L100;
  287. }
  288. /* Normalize and scale the righthand side vector Pb. */
  289. /* Computing MAX */
  290. d__2 = eps, d__3 = (d__1 = work[indrv4 + blksiz], abs(d__1));
  291. scl = blksiz * onenrm * max(d__2,d__3) / _starpu_dasum_(&blksiz, &work[
  292. indrv1 + 1], &c__1);
  293. _starpu_dscal_(&blksiz, &scl, &work[indrv1 + 1], &c__1);
  294. /* Solve the system LU = Pb. */
  295. _starpu_dlagts_(&c_n1, &blksiz, &work[indrv4 + 1], &work[indrv2 + 2], &
  296. work[indrv3 + 1], &work[indrv5 + 1], &iwork[1], &work[
  297. indrv1 + 1], &tol, &iinfo);
  298. /* Reorthogonalize by modified Gram-Schmidt if eigenvalues are */
  299. /* close enough. */
  300. if (jblk == 1) {
  301. goto L90;
  302. }
  303. if ((d__1 = xj - xjm, abs(d__1)) > ortol) {
  304. gpind = j;
  305. }
  306. if (gpind != j) {
  307. i__3 = j - 1;
  308. for (i__ = gpind; i__ <= i__3; ++i__) {
  309. ztr = -_starpu_ddot_(&blksiz, &work[indrv1 + 1], &c__1, &z__[b1 +
  310. i__ * z_dim1], &c__1);
  311. _starpu_daxpy_(&blksiz, &ztr, &z__[b1 + i__ * z_dim1], &c__1, &
  312. work[indrv1 + 1], &c__1);
  313. /* L80: */
  314. }
  315. }
  316. /* Check the infinity norm of the iterate. */
  317. L90:
  318. jmax = _starpu_idamax_(&blksiz, &work[indrv1 + 1], &c__1);
  319. nrm = (d__1 = work[indrv1 + jmax], abs(d__1));
  320. /* Continue for additional iterations after norm reaches */
  321. /* stopping criterion. */
  322. if (nrm < dtpcrt) {
  323. goto L70;
  324. }
  325. ++nrmchk;
  326. if (nrmchk < 3) {
  327. goto L70;
  328. }
  329. goto L110;
  330. /* If stopping criterion was not satisfied, update info and */
  331. /* store eigenvector number in array ifail. */
  332. L100:
  333. ++(*info);
  334. ifail[*info] = j;
  335. /* Accept iterate as jth eigenvector. */
  336. L110:
  337. scl = 1. / _starpu_dnrm2_(&blksiz, &work[indrv1 + 1], &c__1);
  338. jmax = _starpu_idamax_(&blksiz, &work[indrv1 + 1], &c__1);
  339. if (work[indrv1 + jmax] < 0.) {
  340. scl = -scl;
  341. }
  342. _starpu_dscal_(&blksiz, &scl, &work[indrv1 + 1], &c__1);
  343. L120:
  344. i__3 = *n;
  345. for (i__ = 1; i__ <= i__3; ++i__) {
  346. z__[i__ + j * z_dim1] = 0.;
  347. /* L130: */
  348. }
  349. i__3 = blksiz;
  350. for (i__ = 1; i__ <= i__3; ++i__) {
  351. z__[b1 + i__ - 1 + j * z_dim1] = work[indrv1 + i__];
  352. /* L140: */
  353. }
  354. /* Save the shift to check eigenvalue spacing at next */
  355. /* iteration. */
  356. xjm = xj;
  357. /* L150: */
  358. }
  359. L160:
  360. ;
  361. }
  362. return 0;
  363. /* End of DSTEIN */
  364. } /* _starpu_dstein_ */