dlasq2.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603
  1. /* dlasq2.f -- translated by f2c (version 20061008).
  2. You must link the resulting object file with libf2c:
  3. on Microsoft Windows system, link with libf2c.lib;
  4. on Linux or Unix systems, link with .../path/to/libf2c.a -lm
  5. or, if you install libf2c.a in a standard place, with -lf2c -lm
  6. -- in that order, at the end of the command line, as in
  7. cc *.o -lf2c -lm
  8. Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
  9. http://www.netlib.org/f2c/libf2c.zip
  10. */
  11. #include "f2c.h"
  12. #include "blaswrap.h"
  13. /* Table of constant values */
  14. static integer c__1 = 1;
  15. static integer c__2 = 2;
  16. static integer c__10 = 10;
  17. static integer c__3 = 3;
  18. static integer c__4 = 4;
  19. static integer c__11 = 11;
  20. /* Subroutine */ int _starpu_dlasq2_(integer *n, doublereal *z__, integer *info)
  21. {
  22. /* System generated locals */
  23. integer i__1, i__2, i__3;
  24. doublereal d__1, d__2;
  25. /* Builtin functions */
  26. double sqrt(doublereal);
  27. /* Local variables */
  28. doublereal d__, e, g;
  29. integer k;
  30. doublereal s, t;
  31. integer i0, i4, n0;
  32. doublereal dn;
  33. integer pp;
  34. doublereal dn1, dn2, dee, eps, tau, tol;
  35. integer ipn4;
  36. doublereal tol2;
  37. logical ieee;
  38. integer nbig;
  39. doublereal dmin__, emin, emax;
  40. integer kmin, ndiv, iter;
  41. doublereal qmin, temp, qmax, zmax;
  42. integer splt;
  43. doublereal dmin1, dmin2;
  44. integer nfail;
  45. doublereal desig, trace, sigma;
  46. integer iinfo, ttype;
  47. extern /* Subroutine */ int _starpu_dlasq3_(integer *, integer *, doublereal *,
  48. integer *, doublereal *, doublereal *, doublereal *, doublereal *,
  49. integer *, integer *, integer *, logical *, integer *,
  50. doublereal *, doublereal *, doublereal *, doublereal *,
  51. doublereal *, doublereal *, doublereal *);
  52. extern doublereal _starpu_dlamch_(char *);
  53. doublereal deemin;
  54. integer iwhila, iwhilb;
  55. doublereal oldemn, safmin;
  56. extern /* Subroutine */ int _starpu_xerbla_(char *, integer *);
  57. extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *,
  58. integer *, integer *);
  59. extern /* Subroutine */ int _starpu_dlasrt_(char *, integer *, doublereal *,
  60. integer *);
  61. /* -- LAPACK routine (version 3.2) -- */
  62. /* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */
  63. /* -- Laboratory and Beresford Parlett of the Univ. of California at -- */
  64. /* -- Berkeley -- */
  65. /* -- November 2008 -- */
  66. /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
  67. /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
  68. /* .. Scalar Arguments .. */
  69. /* .. */
  70. /* .. Array Arguments .. */
  71. /* .. */
  72. /* Purpose */
  73. /* ======= */
  74. /* DLASQ2 computes all the eigenvalues of the symmetric positive */
  75. /* definite tridiagonal matrix associated with the qd array Z to high */
  76. /* relative accuracy are computed to high relative accuracy, in the */
  77. /* absence of denormalization, underflow and overflow. */
  78. /* To see the relation of Z to the tridiagonal matrix, let L be a */
  79. /* unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and */
  80. /* let U be an upper bidiagonal matrix with 1's above and diagonal */
  81. /* Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the */
  82. /* symmetric tridiagonal to which it is similar. */
  83. /* Note : DLASQ2 defines a logical variable, IEEE, which is true */
  84. /* on machines which follow ieee-754 floating-point standard in their */
  85. /* handling of infinities and NaNs, and false otherwise. This variable */
  86. /* is passed to DLASQ3. */
  87. /* Arguments */
  88. /* ========= */
  89. /* N (input) INTEGER */
  90. /* The number of rows and columns in the matrix. N >= 0. */
  91. /* Z (input/output) DOUBLE PRECISION array, dimension ( 4*N ) */
  92. /* On entry Z holds the qd array. On exit, entries 1 to N hold */
  93. /* the eigenvalues in decreasing order, Z( 2*N+1 ) holds the */
  94. /* trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If */
  95. /* N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 ) */
  96. /* holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of */
  97. /* shifts that failed. */
  98. /* INFO (output) INTEGER */
  99. /* = 0: successful exit */
  100. /* < 0: if the i-th argument is a scalar and had an illegal */
  101. /* value, then INFO = -i, if the i-th argument is an */
  102. /* array and the j-entry had an illegal value, then */
  103. /* INFO = -(i*100+j) */
  104. /* > 0: the algorithm failed */
  105. /* = 1, a split was marked by a positive value in E */
  106. /* = 2, current block of Z not diagonalized after 30*N */
  107. /* iterations (in inner while loop) */
  108. /* = 3, termination criterion of outer while loop not met */
  109. /* (program created more than N unreduced blocks) */
  110. /* Further Details */
  111. /* =============== */
  112. /* Local Variables: I0:N0 defines a current unreduced segment of Z. */
  113. /* The shifts are accumulated in SIGMA. Iteration count is in ITER. */
  114. /* Ping-pong is controlled by PP (alternates between 0 and 1). */
  115. /* ===================================================================== */
  116. /* .. Parameters .. */
  117. /* .. */
  118. /* .. Local Scalars .. */
  119. /* .. */
  120. /* .. External Subroutines .. */
  121. /* .. */
  122. /* .. External Functions .. */
  123. /* .. */
  124. /* .. Intrinsic Functions .. */
  125. /* .. */
  126. /* .. Executable Statements .. */
  127. /* Test the input arguments. */
  128. /* (in case DLASQ2 is not called by DLASQ1) */
  129. /* Parameter adjustments */
  130. --z__;
  131. /* Function Body */
  132. *info = 0;
  133. eps = _starpu_dlamch_("Precision");
  134. safmin = _starpu_dlamch_("Safe minimum");
  135. tol = eps * 100.;
  136. /* Computing 2nd power */
  137. d__1 = tol;
  138. tol2 = d__1 * d__1;
  139. if (*n < 0) {
  140. *info = -1;
  141. _starpu_xerbla_("DLASQ2", &c__1);
  142. return 0;
  143. } else if (*n == 0) {
  144. return 0;
  145. } else if (*n == 1) {
  146. /* 1-by-1 case. */
  147. if (z__[1] < 0.) {
  148. *info = -201;
  149. _starpu_xerbla_("DLASQ2", &c__2);
  150. }
  151. return 0;
  152. } else if (*n == 2) {
  153. /* 2-by-2 case. */
  154. if (z__[2] < 0. || z__[3] < 0.) {
  155. *info = -2;
  156. _starpu_xerbla_("DLASQ2", &c__2);
  157. return 0;
  158. } else if (z__[3] > z__[1]) {
  159. d__ = z__[3];
  160. z__[3] = z__[1];
  161. z__[1] = d__;
  162. }
  163. z__[5] = z__[1] + z__[2] + z__[3];
  164. if (z__[2] > z__[3] * tol2) {
  165. t = (z__[1] - z__[3] + z__[2]) * .5;
  166. s = z__[3] * (z__[2] / t);
  167. if (s <= t) {
  168. s = z__[3] * (z__[2] / (t * (sqrt(s / t + 1.) + 1.)));
  169. } else {
  170. s = z__[3] * (z__[2] / (t + sqrt(t) * sqrt(t + s)));
  171. }
  172. t = z__[1] + (s + z__[2]);
  173. z__[3] *= z__[1] / t;
  174. z__[1] = t;
  175. }
  176. z__[2] = z__[3];
  177. z__[6] = z__[2] + z__[1];
  178. return 0;
  179. }
  180. /* Check for negative data and compute sums of q's and e's. */
  181. z__[*n * 2] = 0.;
  182. emin = z__[2];
  183. qmax = 0.;
  184. zmax = 0.;
  185. d__ = 0.;
  186. e = 0.;
  187. i__1 = *n - 1 << 1;
  188. for (k = 1; k <= i__1; k += 2) {
  189. if (z__[k] < 0.) {
  190. *info = -(k + 200);
  191. _starpu_xerbla_("DLASQ2", &c__2);
  192. return 0;
  193. } else if (z__[k + 1] < 0.) {
  194. *info = -(k + 201);
  195. _starpu_xerbla_("DLASQ2", &c__2);
  196. return 0;
  197. }
  198. d__ += z__[k];
  199. e += z__[k + 1];
  200. /* Computing MAX */
  201. d__1 = qmax, d__2 = z__[k];
  202. qmax = max(d__1,d__2);
  203. /* Computing MIN */
  204. d__1 = emin, d__2 = z__[k + 1];
  205. emin = min(d__1,d__2);
  206. /* Computing MAX */
  207. d__1 = max(qmax,zmax), d__2 = z__[k + 1];
  208. zmax = max(d__1,d__2);
  209. /* L10: */
  210. }
  211. if (z__[(*n << 1) - 1] < 0.) {
  212. *info = -((*n << 1) + 199);
  213. _starpu_xerbla_("DLASQ2", &c__2);
  214. return 0;
  215. }
  216. d__ += z__[(*n << 1) - 1];
  217. /* Computing MAX */
  218. d__1 = qmax, d__2 = z__[(*n << 1) - 1];
  219. qmax = max(d__1,d__2);
  220. zmax = max(qmax,zmax);
  221. /* Check for diagonality. */
  222. if (e == 0.) {
  223. i__1 = *n;
  224. for (k = 2; k <= i__1; ++k) {
  225. z__[k] = z__[(k << 1) - 1];
  226. /* L20: */
  227. }
  228. _starpu_dlasrt_("D", n, &z__[1], &iinfo);
  229. z__[(*n << 1) - 1] = d__;
  230. return 0;
  231. }
  232. trace = d__ + e;
  233. /* Check for zero data. */
  234. if (trace == 0.) {
  235. z__[(*n << 1) - 1] = 0.;
  236. return 0;
  237. }
  238. /* Check whether the machine is IEEE conformable. */
  239. ieee = _starpu_ilaenv_(&c__10, "DLASQ2", "N", &c__1, &c__2, &c__3, &c__4) == 1 && _starpu_ilaenv_(&c__11, "DLASQ2", "N", &c__1, &c__2,
  240. &c__3, &c__4) == 1;
  241. /* Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...). */
  242. for (k = *n << 1; k >= 2; k += -2) {
  243. z__[k * 2] = 0.;
  244. z__[(k << 1) - 1] = z__[k];
  245. z__[(k << 1) - 2] = 0.;
  246. z__[(k << 1) - 3] = z__[k - 1];
  247. /* L30: */
  248. }
  249. i0 = 1;
  250. n0 = *n;
  251. /* Reverse the qd-array, if warranted. */
  252. if (z__[(i0 << 2) - 3] * 1.5 < z__[(n0 << 2) - 3]) {
  253. ipn4 = i0 + n0 << 2;
  254. i__1 = i0 + n0 - 1 << 1;
  255. for (i4 = i0 << 2; i4 <= i__1; i4 += 4) {
  256. temp = z__[i4 - 3];
  257. z__[i4 - 3] = z__[ipn4 - i4 - 3];
  258. z__[ipn4 - i4 - 3] = temp;
  259. temp = z__[i4 - 1];
  260. z__[i4 - 1] = z__[ipn4 - i4 - 5];
  261. z__[ipn4 - i4 - 5] = temp;
  262. /* L40: */
  263. }
  264. }
  265. /* Initial split checking via dqd and Li's test. */
  266. pp = 0;
  267. for (k = 1; k <= 2; ++k) {
  268. d__ = z__[(n0 << 2) + pp - 3];
  269. i__1 = (i0 << 2) + pp;
  270. for (i4 = (n0 - 1 << 2) + pp; i4 >= i__1; i4 += -4) {
  271. if (z__[i4 - 1] <= tol2 * d__) {
  272. z__[i4 - 1] = -0.;
  273. d__ = z__[i4 - 3];
  274. } else {
  275. d__ = z__[i4 - 3] * (d__ / (d__ + z__[i4 - 1]));
  276. }
  277. /* L50: */
  278. }
  279. /* dqd maps Z to ZZ plus Li's test. */
  280. emin = z__[(i0 << 2) + pp + 1];
  281. d__ = z__[(i0 << 2) + pp - 3];
  282. i__1 = (n0 - 1 << 2) + pp;
  283. for (i4 = (i0 << 2) + pp; i4 <= i__1; i4 += 4) {
  284. z__[i4 - (pp << 1) - 2] = d__ + z__[i4 - 1];
  285. if (z__[i4 - 1] <= tol2 * d__) {
  286. z__[i4 - 1] = -0.;
  287. z__[i4 - (pp << 1) - 2] = d__;
  288. z__[i4 - (pp << 1)] = 0.;
  289. d__ = z__[i4 + 1];
  290. } else if (safmin * z__[i4 + 1] < z__[i4 - (pp << 1) - 2] &&
  291. safmin * z__[i4 - (pp << 1) - 2] < z__[i4 + 1]) {
  292. temp = z__[i4 + 1] / z__[i4 - (pp << 1) - 2];
  293. z__[i4 - (pp << 1)] = z__[i4 - 1] * temp;
  294. d__ *= temp;
  295. } else {
  296. z__[i4 - (pp << 1)] = z__[i4 + 1] * (z__[i4 - 1] / z__[i4 - (
  297. pp << 1) - 2]);
  298. d__ = z__[i4 + 1] * (d__ / z__[i4 - (pp << 1) - 2]);
  299. }
  300. /* Computing MIN */
  301. d__1 = emin, d__2 = z__[i4 - (pp << 1)];
  302. emin = min(d__1,d__2);
  303. /* L60: */
  304. }
  305. z__[(n0 << 2) - pp - 2] = d__;
  306. /* Now find qmax. */
  307. qmax = z__[(i0 << 2) - pp - 2];
  308. i__1 = (n0 << 2) - pp - 2;
  309. for (i4 = (i0 << 2) - pp + 2; i4 <= i__1; i4 += 4) {
  310. /* Computing MAX */
  311. d__1 = qmax, d__2 = z__[i4];
  312. qmax = max(d__1,d__2);
  313. /* L70: */
  314. }
  315. /* Prepare for the next iteration on K. */
  316. pp = 1 - pp;
  317. /* L80: */
  318. }
  319. /* Initialise variables to pass to DLASQ3. */
  320. ttype = 0;
  321. dmin1 = 0.;
  322. dmin2 = 0.;
  323. dn = 0.;
  324. dn1 = 0.;
  325. dn2 = 0.;
  326. g = 0.;
  327. tau = 0.;
  328. iter = 2;
  329. nfail = 0;
  330. ndiv = n0 - i0 << 1;
  331. i__1 = *n + 1;
  332. for (iwhila = 1; iwhila <= i__1; ++iwhila) {
  333. if (n0 < 1) {
  334. goto L170;
  335. }
  336. /* While array unfinished do */
  337. /* E(N0) holds the value of SIGMA when submatrix in I0:N0 */
  338. /* splits from the rest of the array, but is negated. */
  339. desig = 0.;
  340. if (n0 == *n) {
  341. sigma = 0.;
  342. } else {
  343. sigma = -z__[(n0 << 2) - 1];
  344. }
  345. if (sigma < 0.) {
  346. *info = 1;
  347. return 0;
  348. }
  349. /* Find last unreduced submatrix's top index I0, find QMAX and */
  350. /* EMIN. Find Gershgorin-type bound if Q's much greater than E's. */
  351. emax = 0.;
  352. if (n0 > i0) {
  353. emin = (d__1 = z__[(n0 << 2) - 5], abs(d__1));
  354. } else {
  355. emin = 0.;
  356. }
  357. qmin = z__[(n0 << 2) - 3];
  358. qmax = qmin;
  359. for (i4 = n0 << 2; i4 >= 8; i4 += -4) {
  360. if (z__[i4 - 5] <= 0.) {
  361. goto L100;
  362. }
  363. if (qmin >= emax * 4.) {
  364. /* Computing MIN */
  365. d__1 = qmin, d__2 = z__[i4 - 3];
  366. qmin = min(d__1,d__2);
  367. /* Computing MAX */
  368. d__1 = emax, d__2 = z__[i4 - 5];
  369. emax = max(d__1,d__2);
  370. }
  371. /* Computing MAX */
  372. d__1 = qmax, d__2 = z__[i4 - 7] + z__[i4 - 5];
  373. qmax = max(d__1,d__2);
  374. /* Computing MIN */
  375. d__1 = emin, d__2 = z__[i4 - 5];
  376. emin = min(d__1,d__2);
  377. /* L90: */
  378. }
  379. i4 = 4;
  380. L100:
  381. i0 = i4 / 4;
  382. pp = 0;
  383. if (n0 - i0 > 1) {
  384. dee = z__[(i0 << 2) - 3];
  385. deemin = dee;
  386. kmin = i0;
  387. i__2 = (n0 << 2) - 3;
  388. for (i4 = (i0 << 2) + 1; i4 <= i__2; i4 += 4) {
  389. dee = z__[i4] * (dee / (dee + z__[i4 - 2]));
  390. if (dee <= deemin) {
  391. deemin = dee;
  392. kmin = (i4 + 3) / 4;
  393. }
  394. /* L110: */
  395. }
  396. if (kmin - i0 << 1 < n0 - kmin && deemin <= z__[(n0 << 2) - 3] *
  397. .5) {
  398. ipn4 = i0 + n0 << 2;
  399. pp = 2;
  400. i__2 = i0 + n0 - 1 << 1;
  401. for (i4 = i0 << 2; i4 <= i__2; i4 += 4) {
  402. temp = z__[i4 - 3];
  403. z__[i4 - 3] = z__[ipn4 - i4 - 3];
  404. z__[ipn4 - i4 - 3] = temp;
  405. temp = z__[i4 - 2];
  406. z__[i4 - 2] = z__[ipn4 - i4 - 2];
  407. z__[ipn4 - i4 - 2] = temp;
  408. temp = z__[i4 - 1];
  409. z__[i4 - 1] = z__[ipn4 - i4 - 5];
  410. z__[ipn4 - i4 - 5] = temp;
  411. temp = z__[i4];
  412. z__[i4] = z__[ipn4 - i4 - 4];
  413. z__[ipn4 - i4 - 4] = temp;
  414. /* L120: */
  415. }
  416. }
  417. }
  418. /* Put -(initial shift) into DMIN. */
  419. /* Computing MAX */
  420. d__1 = 0., d__2 = qmin - sqrt(qmin) * 2. * sqrt(emax);
  421. dmin__ = -max(d__1,d__2);
  422. /* Now I0:N0 is unreduced. */
  423. /* PP = 0 for ping, PP = 1 for pong. */
  424. /* PP = 2 indicates that flipping was applied to the Z array and */
  425. /* and that the tests for deflation upon entry in DLASQ3 */
  426. /* should not be performed. */
  427. nbig = (n0 - i0 + 1) * 30;
  428. i__2 = nbig;
  429. for (iwhilb = 1; iwhilb <= i__2; ++iwhilb) {
  430. if (i0 > n0) {
  431. goto L150;
  432. }
  433. /* While submatrix unfinished take a good dqds step. */
  434. _starpu_dlasq3_(&i0, &n0, &z__[1], &pp, &dmin__, &sigma, &desig, &qmax, &
  435. nfail, &iter, &ndiv, &ieee, &ttype, &dmin1, &dmin2, &dn, &
  436. dn1, &dn2, &g, &tau);
  437. pp = 1 - pp;
  438. /* When EMIN is very small check for splits. */
  439. if (pp == 0 && n0 - i0 >= 3) {
  440. if (z__[n0 * 4] <= tol2 * qmax || z__[(n0 << 2) - 1] <= tol2 *
  441. sigma) {
  442. splt = i0 - 1;
  443. qmax = z__[(i0 << 2) - 3];
  444. emin = z__[(i0 << 2) - 1];
  445. oldemn = z__[i0 * 4];
  446. i__3 = n0 - 3 << 2;
  447. for (i4 = i0 << 2; i4 <= i__3; i4 += 4) {
  448. if (z__[i4] <= tol2 * z__[i4 - 3] || z__[i4 - 1] <=
  449. tol2 * sigma) {
  450. z__[i4 - 1] = -sigma;
  451. splt = i4 / 4;
  452. qmax = 0.;
  453. emin = z__[i4 + 3];
  454. oldemn = z__[i4 + 4];
  455. } else {
  456. /* Computing MAX */
  457. d__1 = qmax, d__2 = z__[i4 + 1];
  458. qmax = max(d__1,d__2);
  459. /* Computing MIN */
  460. d__1 = emin, d__2 = z__[i4 - 1];
  461. emin = min(d__1,d__2);
  462. /* Computing MIN */
  463. d__1 = oldemn, d__2 = z__[i4];
  464. oldemn = min(d__1,d__2);
  465. }
  466. /* L130: */
  467. }
  468. z__[(n0 << 2) - 1] = emin;
  469. z__[n0 * 4] = oldemn;
  470. i0 = splt + 1;
  471. }
  472. }
  473. /* L140: */
  474. }
  475. *info = 2;
  476. return 0;
  477. /* end IWHILB */
  478. L150:
  479. /* L160: */
  480. ;
  481. }
  482. *info = 3;
  483. return 0;
  484. /* end IWHILA */
  485. L170:
  486. /* Move q's to the front. */
  487. i__1 = *n;
  488. for (k = 2; k <= i__1; ++k) {
  489. z__[k] = z__[(k << 2) - 3];
  490. /* L180: */
  491. }
  492. /* Sort and compute sum of eigenvalues. */
  493. _starpu_dlasrt_("D", n, &z__[1], &iinfo);
  494. e = 0.;
  495. for (k = *n; k >= 1; --k) {
  496. e += z__[k];
  497. /* L190: */
  498. }
  499. /* Store trace, sum(eigenvalues) and information on performance. */
  500. z__[(*n << 1) + 1] = trace;
  501. z__[(*n << 1) + 2] = e;
  502. z__[(*n << 1) + 3] = (doublereal) iter;
  503. /* Computing 2nd power */
  504. i__1 = *n;
  505. z__[(*n << 1) + 4] = (doublereal) ndiv / (doublereal) (i__1 * i__1);
  506. z__[(*n << 1) + 5] = nfail * 100. / (doublereal) iter;
  507. return 0;
  508. /* End of DLASQ2 */
  509. } /* _starpu_dlasq2_ */