dhseqr.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488
  1. /* dhseqr.f -- translated by f2c (version 20061008).
  2. You must link the resulting object file with libf2c:
  3. on Microsoft Windows system, link with libf2c.lib;
  4. on Linux or Unix systems, link with .../path/to/libf2c.a -lm
  5. or, if you install libf2c.a in a standard place, with -lf2c -lm
  6. -- in that order, at the end of the command line, as in
  7. cc *.o -lf2c -lm
  8. Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
  9. http://www.netlib.org/f2c/libf2c.zip
  10. */
  11. #include "f2c.h"
  12. #include "blaswrap.h"
  13. /* Table of constant values */
  14. static doublereal c_b11 = 0.;
  15. static doublereal c_b12 = 1.;
  16. static integer c__12 = 12;
  17. static integer c__2 = 2;
  18. static integer c__49 = 49;
  19. /* Subroutine */ int _starpu_dhseqr_(char *job, char *compz, integer *n, integer *ilo,
  20. integer *ihi, doublereal *h__, integer *ldh, doublereal *wr,
  21. doublereal *wi, doublereal *z__, integer *ldz, doublereal *work,
  22. integer *lwork, integer *info)
  23. {
  24. /* System generated locals */
  25. address a__1[2];
  26. integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2[2], i__3;
  27. doublereal d__1;
  28. char ch__1[2];
  29. /* Builtin functions */
  30. /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
  31. /* Local variables */
  32. integer i__;
  33. doublereal hl[2401] /* was [49][49] */;
  34. integer kbot, nmin;
  35. extern logical _starpu_lsame_(char *, char *);
  36. logical initz;
  37. doublereal workl[49];
  38. logical wantt, wantz;
  39. extern /* Subroutine */ int _starpu_dlaqr0_(logical *, logical *, integer *,
  40. integer *, integer *, doublereal *, integer *, doublereal *,
  41. doublereal *, integer *, integer *, doublereal *, integer *,
  42. doublereal *, integer *, integer *), _starpu_dlahqr_(logical *, logical *,
  43. integer *, integer *, integer *, doublereal *, integer *,
  44. doublereal *, doublereal *, integer *, integer *, doublereal *,
  45. integer *, integer *), _starpu_dlacpy_(char *, integer *, integer *,
  46. doublereal *, integer *, doublereal *, integer *),
  47. _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *,
  48. doublereal *, integer *);
  49. extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *,
  50. integer *, integer *);
  51. extern /* Subroutine */ int _starpu_xerbla_(char *, integer *);
  52. logical lquery;
  53. /* -- LAPACK driver routine (version 3.2) -- */
  54. /* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. */
  55. /* November 2006 */
  56. /* .. Scalar Arguments .. */
  57. /* .. */
  58. /* .. Array Arguments .. */
  59. /* .. */
  60. /* Purpose */
  61. /* ======= */
  62. /* DHSEQR computes the eigenvalues of a Hessenberg matrix H */
  63. /* and, optionally, the matrices T and Z from the Schur decomposition */
  64. /* H = Z T Z**T, where T is an upper quasi-triangular matrix (the */
  65. /* Schur form), and Z is the orthogonal matrix of Schur vectors. */
  66. /* Optionally Z may be postmultiplied into an input orthogonal */
  67. /* matrix Q so that this routine can give the Schur factorization */
  68. /* of a matrix A which has been reduced to the Hessenberg form H */
  69. /* by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. */
  70. /* Arguments */
  71. /* ========= */
  72. /* JOB (input) CHARACTER*1 */
  73. /* = 'E': compute eigenvalues only; */
  74. /* = 'S': compute eigenvalues and the Schur form T. */
  75. /* COMPZ (input) CHARACTER*1 */
  76. /* = 'N': no Schur vectors are computed; */
  77. /* = 'I': Z is initialized to the unit matrix and the matrix Z */
  78. /* of Schur vectors of H is returned; */
  79. /* = 'V': Z must contain an orthogonal matrix Q on entry, and */
  80. /* the product Q*Z is returned. */
  81. /* N (input) INTEGER */
  82. /* The order of the matrix H. N .GE. 0. */
  83. /* ILO (input) INTEGER */
  84. /* IHI (input) INTEGER */
  85. /* It is assumed that H is already upper triangular in rows */
  86. /* and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally */
  87. /* set by a previous call to DGEBAL, and then passed to DGEHRD */
  88. /* when the matrix output by DGEBAL is reduced to Hessenberg */
  89. /* form. Otherwise ILO and IHI should be set to 1 and N */
  90. /* respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. */
  91. /* If N = 0, then ILO = 1 and IHI = 0. */
  92. /* H (input/output) DOUBLE PRECISION array, dimension (LDH,N) */
  93. /* On entry, the upper Hessenberg matrix H. */
  94. /* On exit, if INFO = 0 and JOB = 'S', then H contains the */
  95. /* upper quasi-triangular matrix T from the Schur decomposition */
  96. /* (the Schur form); 2-by-2 diagonal blocks (corresponding to */
  97. /* complex conjugate pairs of eigenvalues) are returned in */
  98. /* standard form, with H(i,i) = H(i+1,i+1) and */
  99. /* H(i+1,i)*H(i,i+1).LT.0. If INFO = 0 and JOB = 'E', the */
  100. /* contents of H are unspecified on exit. (The output value of */
  101. /* H when INFO.GT.0 is given under the description of INFO */
  102. /* below.) */
  103. /* Unlike earlier versions of DHSEQR, this subroutine may */
  104. /* explicitly H(i,j) = 0 for i.GT.j and j = 1, 2, ... ILO-1 */
  105. /* or j = IHI+1, IHI+2, ... N. */
  106. /* LDH (input) INTEGER */
  107. /* The leading dimension of the array H. LDH .GE. max(1,N). */
  108. /* WR (output) DOUBLE PRECISION array, dimension (N) */
  109. /* WI (output) DOUBLE PRECISION array, dimension (N) */
  110. /* The real and imaginary parts, respectively, of the computed */
  111. /* eigenvalues. If two eigenvalues are computed as a complex */
  112. /* conjugate pair, they are stored in consecutive elements of */
  113. /* WR and WI, say the i-th and (i+1)th, with WI(i) .GT. 0 and */
  114. /* WI(i+1) .LT. 0. If JOB = 'S', the eigenvalues are stored in */
  115. /* the same order as on the diagonal of the Schur form returned */
  116. /* in H, with WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 */
  117. /* diagonal block, WI(i) = sqrt(-H(i+1,i)*H(i,i+1)) and */
  118. /* WI(i+1) = -WI(i). */
  119. /* Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) */
  120. /* If COMPZ = 'N', Z is not referenced. */
  121. /* If COMPZ = 'I', on entry Z need not be set and on exit, */
  122. /* if INFO = 0, Z contains the orthogonal matrix Z of the Schur */
  123. /* vectors of H. If COMPZ = 'V', on entry Z must contain an */
  124. /* N-by-N matrix Q, which is assumed to be equal to the unit */
  125. /* matrix except for the submatrix Z(ILO:IHI,ILO:IHI). On exit, */
  126. /* if INFO = 0, Z contains Q*Z. */
  127. /* Normally Q is the orthogonal matrix generated by DORGHR */
  128. /* after the call to DGEHRD which formed the Hessenberg matrix */
  129. /* H. (The output value of Z when INFO.GT.0 is given under */
  130. /* the description of INFO below.) */
  131. /* LDZ (input) INTEGER */
  132. /* The leading dimension of the array Z. if COMPZ = 'I' or */
  133. /* COMPZ = 'V', then LDZ.GE.MAX(1,N). Otherwize, LDZ.GE.1. */
  134. /* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) */
  135. /* On exit, if INFO = 0, WORK(1) returns an estimate of */
  136. /* the optimal value for LWORK. */
  137. /* LWORK (input) INTEGER */
  138. /* The dimension of the array WORK. LWORK .GE. max(1,N) */
  139. /* is sufficient and delivers very good and sometimes */
  140. /* optimal performance. However, LWORK as large as 11*N */
  141. /* may be required for optimal performance. A workspace */
  142. /* query is recommended to determine the optimal workspace */
  143. /* size. */
  144. /* If LWORK = -1, then DHSEQR does a workspace query. */
  145. /* In this case, DHSEQR checks the input parameters and */
  146. /* estimates the optimal workspace size for the given */
  147. /* values of N, ILO and IHI. The estimate is returned */
  148. /* in WORK(1). No error message related to LWORK is */
  149. /* issued by XERBLA. Neither H nor Z are accessed. */
  150. /* INFO (output) INTEGER */
  151. /* = 0: successful exit */
  152. /* .LT. 0: if INFO = -i, the i-th argument had an illegal */
  153. /* value */
  154. /* .GT. 0: if INFO = i, DHSEQR failed to compute all of */
  155. /* the eigenvalues. Elements 1:ilo-1 and i+1:n of WR */
  156. /* and WI contain those eigenvalues which have been */
  157. /* successfully computed. (Failures are rare.) */
  158. /* If INFO .GT. 0 and JOB = 'E', then on exit, the */
  159. /* remaining unconverged eigenvalues are the eigen- */
  160. /* values of the upper Hessenberg matrix rows and */
  161. /* columns ILO through INFO of the final, output */
  162. /* value of H. */
  163. /* If INFO .GT. 0 and JOB = 'S', then on exit */
  164. /* (*) (initial value of H)*U = U*(final value of H) */
  165. /* where U is an orthogonal matrix. The final */
  166. /* value of H is upper Hessenberg and quasi-triangular */
  167. /* in rows and columns INFO+1 through IHI. */
  168. /* If INFO .GT. 0 and COMPZ = 'V', then on exit */
  169. /* (final value of Z) = (initial value of Z)*U */
  170. /* where U is the orthogonal matrix in (*) (regard- */
  171. /* less of the value of JOB.) */
  172. /* If INFO .GT. 0 and COMPZ = 'I', then on exit */
  173. /* (final value of Z) = U */
  174. /* where U is the orthogonal matrix in (*) (regard- */
  175. /* less of the value of JOB.) */
  176. /* If INFO .GT. 0 and COMPZ = 'N', then Z is not */
  177. /* accessed. */
  178. /* ================================================================ */
  179. /* Default values supplied by */
  180. /* ILAENV(ISPEC,'DHSEQR',JOB(:1)//COMPZ(:1),N,ILO,IHI,LWORK). */
  181. /* It is suggested that these defaults be adjusted in order */
  182. /* to attain best performance in each particular */
  183. /* computational environment. */
  184. /* ISPEC=12: The DLAHQR vs DLAQR0 crossover point. */
  185. /* Default: 75. (Must be at least 11.) */
  186. /* ISPEC=13: Recommended deflation window size. */
  187. /* This depends on ILO, IHI and NS. NS is the */
  188. /* number of simultaneous shifts returned */
  189. /* by ILAENV(ISPEC=15). (See ISPEC=15 below.) */
  190. /* The default for (IHI-ILO+1).LE.500 is NS. */
  191. /* The default for (IHI-ILO+1).GT.500 is 3*NS/2. */
  192. /* ISPEC=14: Nibble crossover point. (See IPARMQ for */
  193. /* details.) Default: 14% of deflation window */
  194. /* size. */
  195. /* ISPEC=15: Number of simultaneous shifts in a multishift */
  196. /* QR iteration. */
  197. /* If IHI-ILO+1 is ... */
  198. /* greater than ...but less ... the */
  199. /* or equal to ... than default is */
  200. /* 1 30 NS = 2(+) */
  201. /* 30 60 NS = 4(+) */
  202. /* 60 150 NS = 10(+) */
  203. /* 150 590 NS = ** */
  204. /* 590 3000 NS = 64 */
  205. /* 3000 6000 NS = 128 */
  206. /* 6000 infinity NS = 256 */
  207. /* (+) By default some or all matrices of this order */
  208. /* are passed to the implicit double shift routine */
  209. /* DLAHQR and this parameter is ignored. See */
  210. /* ISPEC=12 above and comments in IPARMQ for */
  211. /* details. */
  212. /* (**) The asterisks (**) indicate an ad-hoc */
  213. /* function of N increasing from 10 to 64. */
  214. /* ISPEC=16: Select structured matrix multiply. */
  215. /* If the number of simultaneous shifts (specified */
  216. /* by ISPEC=15) is less than 14, then the default */
  217. /* for ISPEC=16 is 0. Otherwise the default for */
  218. /* ISPEC=16 is 2. */
  219. /* ================================================================ */
  220. /* Based on contributions by */
  221. /* Karen Braman and Ralph Byers, Department of Mathematics, */
  222. /* University of Kansas, USA */
  223. /* ================================================================ */
  224. /* References: */
  225. /* K. Braman, R. Byers and R. Mathias, The Multi-Shift QR */
  226. /* Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 */
  227. /* Performance, SIAM Journal of Matrix Analysis, volume 23, pages */
  228. /* 929--947, 2002. */
  229. /* K. Braman, R. Byers and R. Mathias, The Multi-Shift QR */
  230. /* Algorithm Part II: Aggressive Early Deflation, SIAM Journal */
  231. /* of Matrix Analysis, volume 23, pages 948--973, 2002. */
  232. /* ================================================================ */
  233. /* .. Parameters .. */
  234. /* ==== Matrices of order NTINY or smaller must be processed by */
  235. /* . DLAHQR because of insufficient subdiagonal scratch space. */
  236. /* . (This is a hard limit.) ==== */
  237. /* ==== NL allocates some local workspace to help small matrices */
  238. /* . through a rare DLAHQR failure. NL .GT. NTINY = 11 is */
  239. /* . required and NL .LE. NMIN = ILAENV(ISPEC=12,...) is recom- */
  240. /* . mended. (The default value of NMIN is 75.) Using NL = 49 */
  241. /* . allows up to six simultaneous shifts and a 16-by-16 */
  242. /* . deflation window. ==== */
  243. /* .. */
  244. /* .. Local Arrays .. */
  245. /* .. */
  246. /* .. Local Scalars .. */
  247. /* .. */
  248. /* .. External Functions .. */
  249. /* .. */
  250. /* .. External Subroutines .. */
  251. /* .. */
  252. /* .. Intrinsic Functions .. */
  253. /* .. */
  254. /* .. Executable Statements .. */
  255. /* ==== Decode and check the input parameters. ==== */
  256. /* Parameter adjustments */
  257. h_dim1 = *ldh;
  258. h_offset = 1 + h_dim1;
  259. h__ -= h_offset;
  260. --wr;
  261. --wi;
  262. z_dim1 = *ldz;
  263. z_offset = 1 + z_dim1;
  264. z__ -= z_offset;
  265. --work;
  266. /* Function Body */
  267. wantt = _starpu_lsame_(job, "S");
  268. initz = _starpu_lsame_(compz, "I");
  269. wantz = initz || _starpu_lsame_(compz, "V");
  270. work[1] = (doublereal) max(1,*n);
  271. lquery = *lwork == -1;
  272. *info = 0;
  273. if (! _starpu_lsame_(job, "E") && ! wantt) {
  274. *info = -1;
  275. } else if (! _starpu_lsame_(compz, "N") && ! wantz) {
  276. *info = -2;
  277. } else if (*n < 0) {
  278. *info = -3;
  279. } else if (*ilo < 1 || *ilo > max(1,*n)) {
  280. *info = -4;
  281. } else if (*ihi < min(*ilo,*n) || *ihi > *n) {
  282. *info = -5;
  283. } else if (*ldh < max(1,*n)) {
  284. *info = -7;
  285. } else if (*ldz < 1 || wantz && *ldz < max(1,*n)) {
  286. *info = -11;
  287. } else if (*lwork < max(1,*n) && ! lquery) {
  288. *info = -13;
  289. }
  290. if (*info != 0) {
  291. /* ==== Quick return in case of invalid argument. ==== */
  292. i__1 = -(*info);
  293. _starpu_xerbla_("DHSEQR", &i__1);
  294. return 0;
  295. } else if (*n == 0) {
  296. /* ==== Quick return in case N = 0; nothing to do. ==== */
  297. return 0;
  298. } else if (lquery) {
  299. /* ==== Quick return in case of a workspace query ==== */
  300. _starpu_dlaqr0_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[
  301. 1], ilo, ihi, &z__[z_offset], ldz, &work[1], lwork, info);
  302. /* ==== Ensure reported workspace size is backward-compatible with */
  303. /* . previous LAPACK versions. ==== */
  304. /* Computing MAX */
  305. d__1 = (doublereal) max(1,*n);
  306. work[1] = max(d__1,work[1]);
  307. return 0;
  308. } else {
  309. /* ==== copy eigenvalues isolated by DGEBAL ==== */
  310. i__1 = *ilo - 1;
  311. for (i__ = 1; i__ <= i__1; ++i__) {
  312. wr[i__] = h__[i__ + i__ * h_dim1];
  313. wi[i__] = 0.;
  314. /* L10: */
  315. }
  316. i__1 = *n;
  317. for (i__ = *ihi + 1; i__ <= i__1; ++i__) {
  318. wr[i__] = h__[i__ + i__ * h_dim1];
  319. wi[i__] = 0.;
  320. /* L20: */
  321. }
  322. /* ==== Initialize Z, if requested ==== */
  323. if (initz) {
  324. _starpu_dlaset_("A", n, n, &c_b11, &c_b12, &z__[z_offset], ldz)
  325. ;
  326. }
  327. /* ==== Quick return if possible ==== */
  328. if (*ilo == *ihi) {
  329. wr[*ilo] = h__[*ilo + *ilo * h_dim1];
  330. wi[*ilo] = 0.;
  331. return 0;
  332. }
  333. /* ==== DLAHQR/DLAQR0 crossover point ==== */
  334. /* Writing concatenation */
  335. i__2[0] = 1, a__1[0] = job;
  336. i__2[1] = 1, a__1[1] = compz;
  337. s_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2);
  338. nmin = _starpu_ilaenv_(&c__12, "DHSEQR", ch__1, n, ilo, ihi, lwork);
  339. nmin = max(11,nmin);
  340. /* ==== DLAQR0 for big matrices; DLAHQR for small ones ==== */
  341. if (*n > nmin) {
  342. _starpu_dlaqr0_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1],
  343. &wi[1], ilo, ihi, &z__[z_offset], ldz, &work[1], lwork,
  344. info);
  345. } else {
  346. /* ==== Small matrix ==== */
  347. _starpu_dlahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1],
  348. &wi[1], ilo, ihi, &z__[z_offset], ldz, info);
  349. if (*info > 0) {
  350. /* ==== A rare DLAHQR failure! DLAQR0 sometimes succeeds */
  351. /* . when DLAHQR fails. ==== */
  352. kbot = *info;
  353. if (*n >= 49) {
  354. /* ==== Larger matrices have enough subdiagonal scratch */
  355. /* . space to call DLAQR0 directly. ==== */
  356. _starpu_dlaqr0_(&wantt, &wantz, n, ilo, &kbot, &h__[h_offset],
  357. ldh, &wr[1], &wi[1], ilo, ihi, &z__[z_offset],
  358. ldz, &work[1], lwork, info);
  359. } else {
  360. /* ==== Tiny matrices don't have enough subdiagonal */
  361. /* . scratch space to benefit from DLAQR0. Hence, */
  362. /* . tiny matrices must be copied into a larger */
  363. /* . array before calling DLAQR0. ==== */
  364. _starpu_dlacpy_("A", n, n, &h__[h_offset], ldh, hl, &c__49);
  365. hl[*n + 1 + *n * 49 - 50] = 0.;
  366. i__1 = 49 - *n;
  367. _starpu_dlaset_("A", &c__49, &i__1, &c_b11, &c_b11, &hl[(*n + 1) *
  368. 49 - 49], &c__49);
  369. _starpu_dlaqr0_(&wantt, &wantz, &c__49, ilo, &kbot, hl, &c__49, &
  370. wr[1], &wi[1], ilo, ihi, &z__[z_offset], ldz,
  371. workl, &c__49, info);
  372. if (wantt || *info != 0) {
  373. _starpu_dlacpy_("A", n, n, hl, &c__49, &h__[h_offset], ldh);
  374. }
  375. }
  376. }
  377. }
  378. /* ==== Clear out the trash, if necessary. ==== */
  379. if ((wantt || *info != 0) && *n > 2) {
  380. i__1 = *n - 2;
  381. i__3 = *n - 2;
  382. _starpu_dlaset_("L", &i__1, &i__3, &c_b11, &c_b11, &h__[h_dim1 + 3], ldh);
  383. }
  384. /* ==== Ensure reported workspace size is backward-compatible with */
  385. /* . previous LAPACK versions. ==== */
  386. /* Computing MAX */
  387. d__1 = (doublereal) max(1,*n);
  388. work[1] = max(d__1,work[1]);
  389. }
  390. /* ==== End of DHSEQR ==== */
  391. return 0;
  392. } /* _starpu_dhseqr_ */