dstedc.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489
  1. /* dstedc.f -- translated by f2c (version 20061008).
  2. You must link the resulting object file with libf2c:
  3. on Microsoft Windows system, link with libf2c.lib;
  4. on Linux or Unix systems, link with .../path/to/libf2c.a -lm
  5. or, if you install libf2c.a in a standard place, with -lf2c -lm
  6. -- in that order, at the end of the command line, as in
  7. cc *.o -lf2c -lm
  8. Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
  9. http://www.netlib.org/f2c/libf2c.zip
  10. */
  11. #include "f2c.h"
  12. #include "blaswrap.h"
  13. /* Table of constant values */
  14. static integer c__9 = 9;
  15. static integer c__0 = 0;
  16. static integer c__2 = 2;
  17. static doublereal c_b17 = 0.;
  18. static doublereal c_b18 = 1.;
  19. static integer c__1 = 1;
  20. /* Subroutine */ int _starpu_dstedc_(char *compz, integer *n, doublereal *d__,
  21. doublereal *e, doublereal *z__, integer *ldz, doublereal *work,
  22. integer *lwork, integer *iwork, integer *liwork, integer *info)
  23. {
  24. /* System generated locals */
  25. integer z_dim1, z_offset, i__1, i__2;
  26. doublereal d__1, d__2;
  27. /* Builtin functions */
  28. double log(doublereal);
  29. integer pow_ii(integer *, integer *);
  30. double sqrt(doublereal);
  31. /* Local variables */
  32. integer i__, j, k, m;
  33. doublereal p;
  34. integer ii, lgn;
  35. doublereal eps, tiny;
  36. extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *,
  37. integer *, doublereal *, doublereal *, integer *, doublereal *,
  38. integer *, doublereal *, doublereal *, integer *);
  39. extern logical _starpu_lsame_(char *, char *);
  40. extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *,
  41. doublereal *, integer *);
  42. integer lwmin;
  43. extern /* Subroutine */ int _starpu_dlaed0_(integer *, integer *, integer *,
  44. doublereal *, doublereal *, doublereal *, integer *, doublereal *,
  45. integer *, doublereal *, integer *, integer *);
  46. integer start;
  47. extern doublereal _starpu_dlamch_(char *);
  48. extern /* Subroutine */ int _starpu_dlascl_(char *, integer *, integer *,
  49. doublereal *, doublereal *, integer *, integer *, doublereal *,
  50. integer *, integer *), _starpu_dlacpy_(char *, integer *, integer
  51. *, doublereal *, integer *, doublereal *, integer *),
  52. _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *,
  53. doublereal *, integer *);
  54. extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *,
  55. integer *, integer *);
  56. extern /* Subroutine */ int _starpu_xerbla_(char *, integer *);
  57. integer finish;
  58. extern doublereal _starpu_dlanst_(char *, integer *, doublereal *, doublereal *);
  59. extern /* Subroutine */ int _starpu_dsterf_(integer *, doublereal *, doublereal *,
  60. integer *), _starpu_dlasrt_(char *, integer *, doublereal *, integer *);
  61. integer liwmin, icompz;
  62. extern /* Subroutine */ int _starpu_dsteqr_(char *, integer *, doublereal *,
  63. doublereal *, doublereal *, integer *, doublereal *, integer *);
  64. doublereal orgnrm;
  65. logical lquery;
  66. integer smlsiz, storez, strtrw;
  67. /* -- LAPACK driver routine (version 3.2) -- */
  68. /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
  69. /* November 2006 */
  70. /* .. Scalar Arguments .. */
  71. /* .. */
  72. /* .. Array Arguments .. */
  73. /* .. */
  74. /* Purpose */
  75. /* ======= */
  76. /* DSTEDC computes all eigenvalues and, optionally, eigenvectors of a */
  77. /* symmetric tridiagonal matrix using the divide and conquer method. */
  78. /* The eigenvectors of a full or band real symmetric matrix can also be */
  79. /* found if DSYTRD or DSPTRD or DSBTRD has been used to reduce this */
  80. /* matrix to tridiagonal form. */
  81. /* This code makes very mild assumptions about floating point */
  82. /* arithmetic. It will work on machines with a guard digit in */
  83. /* add/subtract, or on those binary machines without guard digits */
  84. /* which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. */
  85. /* It could conceivably fail on hexadecimal or decimal machines */
  86. /* without guard digits, but we know of none. See DLAED3 for details. */
  87. /* Arguments */
  88. /* ========= */
  89. /* COMPZ (input) CHARACTER*1 */
  90. /* = 'N': Compute eigenvalues only. */
  91. /* = 'I': Compute eigenvectors of tridiagonal matrix also. */
  92. /* = 'V': Compute eigenvectors of original dense symmetric */
  93. /* matrix also. On entry, Z contains the orthogonal */
  94. /* matrix used to reduce the original matrix to */
  95. /* tridiagonal form. */
  96. /* N (input) INTEGER */
  97. /* The dimension of the symmetric tridiagonal matrix. N >= 0. */
  98. /* D (input/output) DOUBLE PRECISION array, dimension (N) */
  99. /* On entry, the diagonal elements of the tridiagonal matrix. */
  100. /* On exit, if INFO = 0, the eigenvalues in ascending order. */
  101. /* E (input/output) DOUBLE PRECISION array, dimension (N-1) */
  102. /* On entry, the subdiagonal elements of the tridiagonal matrix. */
  103. /* On exit, E has been destroyed. */
  104. /* Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) */
  105. /* On entry, if COMPZ = 'V', then Z contains the orthogonal */
  106. /* matrix used in the reduction to tridiagonal form. */
  107. /* On exit, if INFO = 0, then if COMPZ = 'V', Z contains the */
  108. /* orthonormal eigenvectors of the original symmetric matrix, */
  109. /* and if COMPZ = 'I', Z contains the orthonormal eigenvectors */
  110. /* of the symmetric tridiagonal matrix. */
  111. /* If COMPZ = 'N', then Z is not referenced. */
  112. /* LDZ (input) INTEGER */
  113. /* The leading dimension of the array Z. LDZ >= 1. */
  114. /* If eigenvectors are desired, then LDZ >= max(1,N). */
  115. /* WORK (workspace/output) DOUBLE PRECISION array, */
  116. /* dimension (LWORK) */
  117. /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
  118. /* LWORK (input) INTEGER */
  119. /* The dimension of the array WORK. */
  120. /* If COMPZ = 'N' or N <= 1 then LWORK must be at least 1. */
  121. /* If COMPZ = 'V' and N > 1 then LWORK must be at least */
  122. /* ( 1 + 3*N + 2*N*lg N + 3*N**2 ), */
  123. /* where lg( N ) = smallest integer k such */
  124. /* that 2**k >= N. */
  125. /* If COMPZ = 'I' and N > 1 then LWORK must be at least */
  126. /* ( 1 + 4*N + N**2 ). */
  127. /* Note that for COMPZ = 'I' or 'V', then if N is less than or */
  128. /* equal to the minimum divide size, usually 25, then LWORK need */
  129. /* only be max(1,2*(N-1)). */
  130. /* If LWORK = -1, then a workspace query is assumed; the routine */
  131. /* only calculates the optimal size of the WORK array, returns */
  132. /* this value as the first entry of the WORK array, and no error */
  133. /* message related to LWORK is issued by XERBLA. */
  134. /* IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) */
  135. /* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */
  136. /* LIWORK (input) INTEGER */
  137. /* The dimension of the array IWORK. */
  138. /* If COMPZ = 'N' or N <= 1 then LIWORK must be at least 1. */
  139. /* If COMPZ = 'V' and N > 1 then LIWORK must be at least */
  140. /* ( 6 + 6*N + 5*N*lg N ). */
  141. /* If COMPZ = 'I' and N > 1 then LIWORK must be at least */
  142. /* ( 3 + 5*N ). */
  143. /* Note that for COMPZ = 'I' or 'V', then if N is less than or */
  144. /* equal to the minimum divide size, usually 25, then LIWORK */
  145. /* need only be 1. */
  146. /* If LIWORK = -1, then a workspace query is assumed; the */
  147. /* routine only calculates the optimal size of the IWORK array, */
  148. /* returns this value as the first entry of the IWORK array, and */
  149. /* no error message related to LIWORK is issued by XERBLA. */
  150. /* INFO (output) INTEGER */
  151. /* = 0: successful exit. */
  152. /* < 0: if INFO = -i, the i-th argument had an illegal value. */
  153. /* > 0: The algorithm failed to compute an eigenvalue while */
  154. /* working on the submatrix lying in rows and columns */
  155. /* INFO/(N+1) through mod(INFO,N+1). */
  156. /* Further Details */
  157. /* =============== */
  158. /* Based on contributions by */
  159. /* Jeff Rutter, Computer Science Division, University of California */
  160. /* at Berkeley, USA */
  161. /* Modified by Francoise Tisseur, University of Tennessee. */
  162. /* ===================================================================== */
  163. /* .. Parameters .. */
  164. /* .. */
  165. /* .. Local Scalars .. */
  166. /* .. */
  167. /* .. External Functions .. */
  168. /* .. */
  169. /* .. External Subroutines .. */
  170. /* .. */
  171. /* .. Intrinsic Functions .. */
  172. /* .. */
  173. /* .. Executable Statements .. */
  174. /* Test the input parameters. */
  175. /* Parameter adjustments */
  176. --d__;
  177. --e;
  178. z_dim1 = *ldz;
  179. z_offset = 1 + z_dim1;
  180. z__ -= z_offset;
  181. --work;
  182. --iwork;
  183. /* Function Body */
  184. *info = 0;
  185. lquery = *lwork == -1 || *liwork == -1;
  186. if (_starpu_lsame_(compz, "N")) {
  187. icompz = 0;
  188. } else if (_starpu_lsame_(compz, "V")) {
  189. icompz = 1;
  190. } else if (_starpu_lsame_(compz, "I")) {
  191. icompz = 2;
  192. } else {
  193. icompz = -1;
  194. }
  195. if (icompz < 0) {
  196. *info = -1;
  197. } else if (*n < 0) {
  198. *info = -2;
  199. } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) {
  200. *info = -6;
  201. }
  202. if (*info == 0) {
  203. /* Compute the workspace requirements */
  204. smlsiz = _starpu_ilaenv_(&c__9, "DSTEDC", " ", &c__0, &c__0, &c__0, &c__0);
  205. if (*n <= 1 || icompz == 0) {
  206. liwmin = 1;
  207. lwmin = 1;
  208. } else if (*n <= smlsiz) {
  209. liwmin = 1;
  210. lwmin = *n - 1 << 1;
  211. } else {
  212. lgn = (integer) (log((doublereal) (*n)) / log(2.));
  213. if (pow_ii(&c__2, &lgn) < *n) {
  214. ++lgn;
  215. }
  216. if (pow_ii(&c__2, &lgn) < *n) {
  217. ++lgn;
  218. }
  219. if (icompz == 1) {
  220. /* Computing 2nd power */
  221. i__1 = *n;
  222. lwmin = *n * 3 + 1 + (*n << 1) * lgn + i__1 * i__1 * 3;
  223. liwmin = *n * 6 + 6 + *n * 5 * lgn;
  224. } else if (icompz == 2) {
  225. /* Computing 2nd power */
  226. i__1 = *n;
  227. lwmin = (*n << 2) + 1 + i__1 * i__1;
  228. liwmin = *n * 5 + 3;
  229. }
  230. }
  231. work[1] = (doublereal) lwmin;
  232. iwork[1] = liwmin;
  233. if (*lwork < lwmin && ! lquery) {
  234. *info = -8;
  235. } else if (*liwork < liwmin && ! lquery) {
  236. *info = -10;
  237. }
  238. }
  239. if (*info != 0) {
  240. i__1 = -(*info);
  241. _starpu_xerbla_("DSTEDC", &i__1);
  242. return 0;
  243. } else if (lquery) {
  244. return 0;
  245. }
  246. /* Quick return if possible */
  247. if (*n == 0) {
  248. return 0;
  249. }
  250. if (*n == 1) {
  251. if (icompz != 0) {
  252. z__[z_dim1 + 1] = 1.;
  253. }
  254. return 0;
  255. }
  256. /* If the following conditional clause is removed, then the routine */
  257. /* will use the Divide and Conquer routine to compute only the */
  258. /* eigenvalues, which requires (3N + 3N**2) real workspace and */
  259. /* (2 + 5N + 2N lg(N)) integer workspace. */
  260. /* Since on many architectures DSTERF is much faster than any other */
  261. /* algorithm for finding eigenvalues only, it is used here */
  262. /* as the default. If the conditional clause is removed, then */
  263. /* information on the size of workspace needs to be changed. */
  264. /* If COMPZ = 'N', use DSTERF to compute the eigenvalues. */
  265. if (icompz == 0) {
  266. _starpu_dsterf_(n, &d__[1], &e[1], info);
  267. goto L50;
  268. }
  269. /* If N is smaller than the minimum divide size (SMLSIZ+1), then */
  270. /* solve the problem with another solver. */
  271. if (*n <= smlsiz) {
  272. _starpu_dsteqr_(compz, n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1], info);
  273. } else {
  274. /* If COMPZ = 'V', the Z matrix must be stored elsewhere for later */
  275. /* use. */
  276. if (icompz == 1) {
  277. storez = *n * *n + 1;
  278. } else {
  279. storez = 1;
  280. }
  281. if (icompz == 2) {
  282. _starpu_dlaset_("Full", n, n, &c_b17, &c_b18, &z__[z_offset], ldz);
  283. }
  284. /* Scale. */
  285. orgnrm = _starpu_dlanst_("M", n, &d__[1], &e[1]);
  286. if (orgnrm == 0.) {
  287. goto L50;
  288. }
  289. eps = _starpu_dlamch_("Epsilon");
  290. start = 1;
  291. /* while ( START <= N ) */
  292. L10:
  293. if (start <= *n) {
  294. /* Let FINISH be the position of the next subdiagonal entry */
  295. /* such that E( FINISH ) <= TINY or FINISH = N if no such */
  296. /* subdiagonal exists. The matrix identified by the elements */
  297. /* between START and FINISH constitutes an independent */
  298. /* sub-problem. */
  299. finish = start;
  300. L20:
  301. if (finish < *n) {
  302. tiny = eps * sqrt((d__1 = d__[finish], abs(d__1))) * sqrt((
  303. d__2 = d__[finish + 1], abs(d__2)));
  304. if ((d__1 = e[finish], abs(d__1)) > tiny) {
  305. ++finish;
  306. goto L20;
  307. }
  308. }
  309. /* (Sub) Problem determined. Compute its size and solve it. */
  310. m = finish - start + 1;
  311. if (m == 1) {
  312. start = finish + 1;
  313. goto L10;
  314. }
  315. if (m > smlsiz) {
  316. /* Scale. */
  317. orgnrm = _starpu_dlanst_("M", &m, &d__[start], &e[start]);
  318. _starpu_dlascl_("G", &c__0, &c__0, &orgnrm, &c_b18, &m, &c__1, &d__[
  319. start], &m, info);
  320. i__1 = m - 1;
  321. i__2 = m - 1;
  322. _starpu_dlascl_("G", &c__0, &c__0, &orgnrm, &c_b18, &i__1, &c__1, &e[
  323. start], &i__2, info);
  324. if (icompz == 1) {
  325. strtrw = 1;
  326. } else {
  327. strtrw = start;
  328. }
  329. _starpu_dlaed0_(&icompz, n, &m, &d__[start], &e[start], &z__[strtrw +
  330. start * z_dim1], ldz, &work[1], n, &work[storez], &
  331. iwork[1], info);
  332. if (*info != 0) {
  333. *info = (*info / (m + 1) + start - 1) * (*n + 1) + *info %
  334. (m + 1) + start - 1;
  335. goto L50;
  336. }
  337. /* Scale back. */
  338. _starpu_dlascl_("G", &c__0, &c__0, &c_b18, &orgnrm, &m, &c__1, &d__[
  339. start], &m, info);
  340. } else {
  341. if (icompz == 1) {
  342. /* Since QR won't update a Z matrix which is larger than */
  343. /* the length of D, we must solve the sub-problem in a */
  344. /* workspace and then multiply back into Z. */
  345. _starpu_dsteqr_("I", &m, &d__[start], &e[start], &work[1], &m, &
  346. work[m * m + 1], info);
  347. _starpu_dlacpy_("A", n, &m, &z__[start * z_dim1 + 1], ldz, &work[
  348. storez], n);
  349. _starpu_dgemm_("N", "N", n, &m, &m, &c_b18, &work[storez], n, &
  350. work[1], &m, &c_b17, &z__[start * z_dim1 + 1],
  351. ldz);
  352. } else if (icompz == 2) {
  353. _starpu_dsteqr_("I", &m, &d__[start], &e[start], &z__[start +
  354. start * z_dim1], ldz, &work[1], info);
  355. } else {
  356. _starpu_dsterf_(&m, &d__[start], &e[start], info);
  357. }
  358. if (*info != 0) {
  359. *info = start * (*n + 1) + finish;
  360. goto L50;
  361. }
  362. }
  363. start = finish + 1;
  364. goto L10;
  365. }
  366. /* endwhile */
  367. /* If the problem split any number of times, then the eigenvalues */
  368. /* will not be properly ordered. Here we permute the eigenvalues */
  369. /* (and the associated eigenvectors) into ascending order. */
  370. if (m != *n) {
  371. if (icompz == 0) {
  372. /* Use Quick Sort */
  373. _starpu_dlasrt_("I", n, &d__[1], info);
  374. } else {
  375. /* Use Selection Sort to minimize swaps of eigenvectors */
  376. i__1 = *n;
  377. for (ii = 2; ii <= i__1; ++ii) {
  378. i__ = ii - 1;
  379. k = i__;
  380. p = d__[i__];
  381. i__2 = *n;
  382. for (j = ii; j <= i__2; ++j) {
  383. if (d__[j] < p) {
  384. k = j;
  385. p = d__[j];
  386. }
  387. /* L30: */
  388. }
  389. if (k != i__) {
  390. d__[k] = d__[i__];
  391. d__[i__] = p;
  392. _starpu_dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k *
  393. z_dim1 + 1], &c__1);
  394. }
  395. /* L40: */
  396. }
  397. }
  398. }
  399. }
  400. L50:
  401. work[1] = (doublereal) lwmin;
  402. iwork[1] = liwmin;
  403. return 0;
  404. /* End of DSTEDC */
  405. } /* _starpu_dstedc_ */