123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603 |
- /* dlasq2.f -- translated by f2c (version 20061008).
- You must link the resulting object file with libf2c:
- on Microsoft Windows system, link with libf2c.lib;
- on Linux or Unix systems, link with .../path/to/libf2c.a -lm
- or, if you install libf2c.a in a standard place, with -lf2c -lm
- -- in that order, at the end of the command line, as in
- cc *.o -lf2c -lm
- Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
- http://www.netlib.org/f2c/libf2c.zip
- */
- #include "f2c.h"
- #include "blaswrap.h"
- /* Table of constant values */
- static integer c__1 = 1;
- static integer c__2 = 2;
- static integer c__10 = 10;
- static integer c__3 = 3;
- static integer c__4 = 4;
- static integer c__11 = 11;
- /* Subroutine */ int _starpu_dlasq2_(integer *n, doublereal *z__, integer *info)
- {
- /* System generated locals */
- integer i__1, i__2, i__3;
- doublereal d__1, d__2;
- /* Builtin functions */
- double sqrt(doublereal);
- /* Local variables */
- doublereal d__, e, g;
- integer k;
- doublereal s, t;
- integer i0, i4, n0;
- doublereal dn;
- integer pp;
- doublereal dn1, dn2, dee, eps, tau, tol;
- integer ipn4;
- doublereal tol2;
- logical ieee;
- integer nbig;
- doublereal dmin__, emin, emax;
- integer kmin, ndiv, iter;
- doublereal qmin, temp, qmax, zmax;
- integer splt;
- doublereal dmin1, dmin2;
- integer nfail;
- doublereal desig, trace, sigma;
- integer iinfo, ttype;
- extern /* Subroutine */ int _starpu_dlasq3_(integer *, integer *, doublereal *,
- integer *, doublereal *, doublereal *, doublereal *, doublereal *,
- integer *, integer *, integer *, logical *, integer *,
- doublereal *, doublereal *, doublereal *, doublereal *,
- doublereal *, doublereal *, doublereal *);
- extern doublereal _starpu_dlamch_(char *);
- doublereal deemin;
- integer iwhila, iwhilb;
- doublereal oldemn, safmin;
- extern /* Subroutine */ int _starpu_xerbla_(char *, integer *);
- extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *,
- integer *, integer *);
- extern /* Subroutine */ int _starpu_dlasrt_(char *, integer *, doublereal *,
- integer *);
- /* -- LAPACK routine (version 3.2) -- */
- /* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */
- /* -- Laboratory and Beresford Parlett of the Univ. of California at -- */
- /* -- Berkeley -- */
- /* -- November 2008 -- */
- /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
- /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
- /* .. Scalar Arguments .. */
- /* .. */
- /* .. Array Arguments .. */
- /* .. */
- /* Purpose */
- /* ======= */
- /* DLASQ2 computes all the eigenvalues of the symmetric positive */
- /* definite tridiagonal matrix associated with the qd array Z to high */
- /* relative accuracy are computed to high relative accuracy, in the */
- /* absence of denormalization, underflow and overflow. */
- /* To see the relation of Z to the tridiagonal matrix, let L be a */
- /* unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and */
- /* let U be an upper bidiagonal matrix with 1's above and diagonal */
- /* Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the */
- /* symmetric tridiagonal to which it is similar. */
- /* Note : DLASQ2 defines a logical variable, IEEE, which is true */
- /* on machines which follow ieee-754 floating-point standard in their */
- /* handling of infinities and NaNs, and false otherwise. This variable */
- /* is passed to DLASQ3. */
- /* Arguments */
- /* ========= */
- /* N (input) INTEGER */
- /* The number of rows and columns in the matrix. N >= 0. */
- /* Z (input/output) DOUBLE PRECISION array, dimension ( 4*N ) */
- /* On entry Z holds the qd array. On exit, entries 1 to N hold */
- /* the eigenvalues in decreasing order, Z( 2*N+1 ) holds the */
- /* trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If */
- /* N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 ) */
- /* holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of */
- /* shifts that failed. */
- /* INFO (output) INTEGER */
- /* = 0: successful exit */
- /* < 0: if the i-th argument is a scalar and had an illegal */
- /* value, then INFO = -i, if the i-th argument is an */
- /* array and the j-entry had an illegal value, then */
- /* INFO = -(i*100+j) */
- /* > 0: the algorithm failed */
- /* = 1, a split was marked by a positive value in E */
- /* = 2, current block of Z not diagonalized after 30*N */
- /* iterations (in inner while loop) */
- /* = 3, termination criterion of outer while loop not met */
- /* (program created more than N unreduced blocks) */
- /* Further Details */
- /* =============== */
- /* Local Variables: I0:N0 defines a current unreduced segment of Z. */
- /* The shifts are accumulated in SIGMA. Iteration count is in ITER. */
- /* Ping-pong is controlled by PP (alternates between 0 and 1). */
- /* ===================================================================== */
- /* .. Parameters .. */
- /* .. */
- /* .. Local Scalars .. */
- /* .. */
- /* .. External Subroutines .. */
- /* .. */
- /* .. External Functions .. */
- /* .. */
- /* .. Intrinsic Functions .. */
- /* .. */
- /* .. Executable Statements .. */
- /* Test the input arguments. */
- /* (in case DLASQ2 is not called by DLASQ1) */
- /* Parameter adjustments */
- --z__;
- /* Function Body */
- *info = 0;
- eps = _starpu_dlamch_("Precision");
- safmin = _starpu_dlamch_("Safe minimum");
- tol = eps * 100.;
- /* Computing 2nd power */
- d__1 = tol;
- tol2 = d__1 * d__1;
- if (*n < 0) {
- *info = -1;
- _starpu_xerbla_("DLASQ2", &c__1);
- return 0;
- } else if (*n == 0) {
- return 0;
- } else if (*n == 1) {
- /* 1-by-1 case. */
- if (z__[1] < 0.) {
- *info = -201;
- _starpu_xerbla_("DLASQ2", &c__2);
- }
- return 0;
- } else if (*n == 2) {
- /* 2-by-2 case. */
- if (z__[2] < 0. || z__[3] < 0.) {
- *info = -2;
- _starpu_xerbla_("DLASQ2", &c__2);
- return 0;
- } else if (z__[3] > z__[1]) {
- d__ = z__[3];
- z__[3] = z__[1];
- z__[1] = d__;
- }
- z__[5] = z__[1] + z__[2] + z__[3];
- if (z__[2] > z__[3] * tol2) {
- t = (z__[1] - z__[3] + z__[2]) * .5;
- s = z__[3] * (z__[2] / t);
- if (s <= t) {
- s = z__[3] * (z__[2] / (t * (sqrt(s / t + 1.) + 1.)));
- } else {
- s = z__[3] * (z__[2] / (t + sqrt(t) * sqrt(t + s)));
- }
- t = z__[1] + (s + z__[2]);
- z__[3] *= z__[1] / t;
- z__[1] = t;
- }
- z__[2] = z__[3];
- z__[6] = z__[2] + z__[1];
- return 0;
- }
- /* Check for negative data and compute sums of q's and e's. */
- z__[*n * 2] = 0.;
- emin = z__[2];
- qmax = 0.;
- zmax = 0.;
- d__ = 0.;
- e = 0.;
- i__1 = *n - 1 << 1;
- for (k = 1; k <= i__1; k += 2) {
- if (z__[k] < 0.) {
- *info = -(k + 200);
- _starpu_xerbla_("DLASQ2", &c__2);
- return 0;
- } else if (z__[k + 1] < 0.) {
- *info = -(k + 201);
- _starpu_xerbla_("DLASQ2", &c__2);
- return 0;
- }
- d__ += z__[k];
- e += z__[k + 1];
- /* Computing MAX */
- d__1 = qmax, d__2 = z__[k];
- qmax = max(d__1,d__2);
- /* Computing MIN */
- d__1 = emin, d__2 = z__[k + 1];
- emin = min(d__1,d__2);
- /* Computing MAX */
- d__1 = max(qmax,zmax), d__2 = z__[k + 1];
- zmax = max(d__1,d__2);
- /* L10: */
- }
- if (z__[(*n << 1) - 1] < 0.) {
- *info = -((*n << 1) + 199);
- _starpu_xerbla_("DLASQ2", &c__2);
- return 0;
- }
- d__ += z__[(*n << 1) - 1];
- /* Computing MAX */
- d__1 = qmax, d__2 = z__[(*n << 1) - 1];
- qmax = max(d__1,d__2);
- zmax = max(qmax,zmax);
- /* Check for diagonality. */
- if (e == 0.) {
- i__1 = *n;
- for (k = 2; k <= i__1; ++k) {
- z__[k] = z__[(k << 1) - 1];
- /* L20: */
- }
- _starpu_dlasrt_("D", n, &z__[1], &iinfo);
- z__[(*n << 1) - 1] = d__;
- return 0;
- }
- trace = d__ + e;
- /* Check for zero data. */
- if (trace == 0.) {
- z__[(*n << 1) - 1] = 0.;
- return 0;
- }
- /* Check whether the machine is IEEE conformable. */
- ieee = _starpu_ilaenv_(&c__10, "DLASQ2", "N", &c__1, &c__2, &c__3, &c__4) == 1 && _starpu_ilaenv_(&c__11, "DLASQ2", "N", &c__1, &c__2,
- &c__3, &c__4) == 1;
- /* Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...). */
- for (k = *n << 1; k >= 2; k += -2) {
- z__[k * 2] = 0.;
- z__[(k << 1) - 1] = z__[k];
- z__[(k << 1) - 2] = 0.;
- z__[(k << 1) - 3] = z__[k - 1];
- /* L30: */
- }
- i0 = 1;
- n0 = *n;
- /* Reverse the qd-array, if warranted. */
- if (z__[(i0 << 2) - 3] * 1.5 < z__[(n0 << 2) - 3]) {
- ipn4 = i0 + n0 << 2;
- i__1 = i0 + n0 - 1 << 1;
- for (i4 = i0 << 2; i4 <= i__1; i4 += 4) {
- temp = z__[i4 - 3];
- z__[i4 - 3] = z__[ipn4 - i4 - 3];
- z__[ipn4 - i4 - 3] = temp;
- temp = z__[i4 - 1];
- z__[i4 - 1] = z__[ipn4 - i4 - 5];
- z__[ipn4 - i4 - 5] = temp;
- /* L40: */
- }
- }
- /* Initial split checking via dqd and Li's test. */
- pp = 0;
- for (k = 1; k <= 2; ++k) {
- d__ = z__[(n0 << 2) + pp - 3];
- i__1 = (i0 << 2) + pp;
- for (i4 = (n0 - 1 << 2) + pp; i4 >= i__1; i4 += -4) {
- if (z__[i4 - 1] <= tol2 * d__) {
- z__[i4 - 1] = -0.;
- d__ = z__[i4 - 3];
- } else {
- d__ = z__[i4 - 3] * (d__ / (d__ + z__[i4 - 1]));
- }
- /* L50: */
- }
- /* dqd maps Z to ZZ plus Li's test. */
- emin = z__[(i0 << 2) + pp + 1];
- d__ = z__[(i0 << 2) + pp - 3];
- i__1 = (n0 - 1 << 2) + pp;
- for (i4 = (i0 << 2) + pp; i4 <= i__1; i4 += 4) {
- z__[i4 - (pp << 1) - 2] = d__ + z__[i4 - 1];
- if (z__[i4 - 1] <= tol2 * d__) {
- z__[i4 - 1] = -0.;
- z__[i4 - (pp << 1) - 2] = d__;
- z__[i4 - (pp << 1)] = 0.;
- d__ = z__[i4 + 1];
- } else if (safmin * z__[i4 + 1] < z__[i4 - (pp << 1) - 2] &&
- safmin * z__[i4 - (pp << 1) - 2] < z__[i4 + 1]) {
- temp = z__[i4 + 1] / z__[i4 - (pp << 1) - 2];
- z__[i4 - (pp << 1)] = z__[i4 - 1] * temp;
- d__ *= temp;
- } else {
- z__[i4 - (pp << 1)] = z__[i4 + 1] * (z__[i4 - 1] / z__[i4 - (
- pp << 1) - 2]);
- d__ = z__[i4 + 1] * (d__ / z__[i4 - (pp << 1) - 2]);
- }
- /* Computing MIN */
- d__1 = emin, d__2 = z__[i4 - (pp << 1)];
- emin = min(d__1,d__2);
- /* L60: */
- }
- z__[(n0 << 2) - pp - 2] = d__;
- /* Now find qmax. */
- qmax = z__[(i0 << 2) - pp - 2];
- i__1 = (n0 << 2) - pp - 2;
- for (i4 = (i0 << 2) - pp + 2; i4 <= i__1; i4 += 4) {
- /* Computing MAX */
- d__1 = qmax, d__2 = z__[i4];
- qmax = max(d__1,d__2);
- /* L70: */
- }
- /* Prepare for the next iteration on K. */
- pp = 1 - pp;
- /* L80: */
- }
- /* Initialise variables to pass to DLASQ3. */
- ttype = 0;
- dmin1 = 0.;
- dmin2 = 0.;
- dn = 0.;
- dn1 = 0.;
- dn2 = 0.;
- g = 0.;
- tau = 0.;
- iter = 2;
- nfail = 0;
- ndiv = n0 - i0 << 1;
- i__1 = *n + 1;
- for (iwhila = 1; iwhila <= i__1; ++iwhila) {
- if (n0 < 1) {
- goto L170;
- }
- /* While array unfinished do */
- /* E(N0) holds the value of SIGMA when submatrix in I0:N0 */
- /* splits from the rest of the array, but is negated. */
- desig = 0.;
- if (n0 == *n) {
- sigma = 0.;
- } else {
- sigma = -z__[(n0 << 2) - 1];
- }
- if (sigma < 0.) {
- *info = 1;
- return 0;
- }
- /* Find last unreduced submatrix's top index I0, find QMAX and */
- /* EMIN. Find Gershgorin-type bound if Q's much greater than E's. */
- emax = 0.;
- if (n0 > i0) {
- emin = (d__1 = z__[(n0 << 2) - 5], abs(d__1));
- } else {
- emin = 0.;
- }
- qmin = z__[(n0 << 2) - 3];
- qmax = qmin;
- for (i4 = n0 << 2; i4 >= 8; i4 += -4) {
- if (z__[i4 - 5] <= 0.) {
- goto L100;
- }
- if (qmin >= emax * 4.) {
- /* Computing MIN */
- d__1 = qmin, d__2 = z__[i4 - 3];
- qmin = min(d__1,d__2);
- /* Computing MAX */
- d__1 = emax, d__2 = z__[i4 - 5];
- emax = max(d__1,d__2);
- }
- /* Computing MAX */
- d__1 = qmax, d__2 = z__[i4 - 7] + z__[i4 - 5];
- qmax = max(d__1,d__2);
- /* Computing MIN */
- d__1 = emin, d__2 = z__[i4 - 5];
- emin = min(d__1,d__2);
- /* L90: */
- }
- i4 = 4;
- L100:
- i0 = i4 / 4;
- pp = 0;
- if (n0 - i0 > 1) {
- dee = z__[(i0 << 2) - 3];
- deemin = dee;
- kmin = i0;
- i__2 = (n0 << 2) - 3;
- for (i4 = (i0 << 2) + 1; i4 <= i__2; i4 += 4) {
- dee = z__[i4] * (dee / (dee + z__[i4 - 2]));
- if (dee <= deemin) {
- deemin = dee;
- kmin = (i4 + 3) / 4;
- }
- /* L110: */
- }
- if (kmin - i0 << 1 < n0 - kmin && deemin <= z__[(n0 << 2) - 3] *
- .5) {
- ipn4 = i0 + n0 << 2;
- pp = 2;
- i__2 = i0 + n0 - 1 << 1;
- for (i4 = i0 << 2; i4 <= i__2; i4 += 4) {
- temp = z__[i4 - 3];
- z__[i4 - 3] = z__[ipn4 - i4 - 3];
- z__[ipn4 - i4 - 3] = temp;
- temp = z__[i4 - 2];
- z__[i4 - 2] = z__[ipn4 - i4 - 2];
- z__[ipn4 - i4 - 2] = temp;
- temp = z__[i4 - 1];
- z__[i4 - 1] = z__[ipn4 - i4 - 5];
- z__[ipn4 - i4 - 5] = temp;
- temp = z__[i4];
- z__[i4] = z__[ipn4 - i4 - 4];
- z__[ipn4 - i4 - 4] = temp;
- /* L120: */
- }
- }
- }
- /* Put -(initial shift) into DMIN. */
- /* Computing MAX */
- d__1 = 0., d__2 = qmin - sqrt(qmin) * 2. * sqrt(emax);
- dmin__ = -max(d__1,d__2);
- /* Now I0:N0 is unreduced. */
- /* PP = 0 for ping, PP = 1 for pong. */
- /* PP = 2 indicates that flipping was applied to the Z array and */
- /* and that the tests for deflation upon entry in DLASQ3 */
- /* should not be performed. */
- nbig = (n0 - i0 + 1) * 30;
- i__2 = nbig;
- for (iwhilb = 1; iwhilb <= i__2; ++iwhilb) {
- if (i0 > n0) {
- goto L150;
- }
- /* While submatrix unfinished take a good dqds step. */
- _starpu_dlasq3_(&i0, &n0, &z__[1], &pp, &dmin__, &sigma, &desig, &qmax, &
- nfail, &iter, &ndiv, &ieee, &ttype, &dmin1, &dmin2, &dn, &
- dn1, &dn2, &g, &tau);
- pp = 1 - pp;
- /* When EMIN is very small check for splits. */
- if (pp == 0 && n0 - i0 >= 3) {
- if (z__[n0 * 4] <= tol2 * qmax || z__[(n0 << 2) - 1] <= tol2 *
- sigma) {
- splt = i0 - 1;
- qmax = z__[(i0 << 2) - 3];
- emin = z__[(i0 << 2) - 1];
- oldemn = z__[i0 * 4];
- i__3 = n0 - 3 << 2;
- for (i4 = i0 << 2; i4 <= i__3; i4 += 4) {
- if (z__[i4] <= tol2 * z__[i4 - 3] || z__[i4 - 1] <=
- tol2 * sigma) {
- z__[i4 - 1] = -sigma;
- splt = i4 / 4;
- qmax = 0.;
- emin = z__[i4 + 3];
- oldemn = z__[i4 + 4];
- } else {
- /* Computing MAX */
- d__1 = qmax, d__2 = z__[i4 + 1];
- qmax = max(d__1,d__2);
- /* Computing MIN */
- d__1 = emin, d__2 = z__[i4 - 1];
- emin = min(d__1,d__2);
- /* Computing MIN */
- d__1 = oldemn, d__2 = z__[i4];
- oldemn = min(d__1,d__2);
- }
- /* L130: */
- }
- z__[(n0 << 2) - 1] = emin;
- z__[n0 * 4] = oldemn;
- i0 = splt + 1;
- }
- }
- /* L140: */
- }
- *info = 2;
- return 0;
- /* end IWHILB */
- L150:
- /* L160: */
- ;
- }
- *info = 3;
- return 0;
- /* end IWHILA */
- L170:
- /* Move q's to the front. */
- i__1 = *n;
- for (k = 2; k <= i__1; ++k) {
- z__[k] = z__[(k << 2) - 3];
- /* L180: */
- }
- /* Sort and compute sum of eigenvalues. */
- _starpu_dlasrt_("D", n, &z__[1], &iinfo);
- e = 0.;
- for (k = *n; k >= 1; --k) {
- e += z__[k];
- /* L190: */
- }
- /* Store trace, sum(eigenvalues) and information on performance. */
- z__[(*n << 1) + 1] = trace;
- z__[(*n << 1) + 2] = e;
- z__[(*n << 1) + 3] = (doublereal) iter;
- /* Computing 2nd power */
- i__1 = *n;
- z__[(*n << 1) + 4] = (doublereal) ndiv / (doublereal) (i__1 * i__1);
- z__[(*n << 1) + 5] = nfail * 100. / (doublereal) iter;
- return 0;
- /* End of DLASQ2 */
- } /* _starpu_dlasq2_ */
|