123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442 |
- /* dlar1v.f -- translated by f2c (version 20061008).
- You must link the resulting object file with libf2c:
- on Microsoft Windows system, link with libf2c.lib;
- on Linux or Unix systems, link with .../path/to/libf2c.a -lm
- or, if you install libf2c.a in a standard place, with -lf2c -lm
- -- in that order, at the end of the command line, as in
- cc *.o -lf2c -lm
- Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
- http://www.netlib.org/f2c/libf2c.zip
- */
- #include "f2c.h"
- #include "blaswrap.h"
- /* Subroutine */ int _starpu_dlar1v_(integer *n, integer *b1, integer *bn, doublereal
- *lambda, doublereal *d__, doublereal *l, doublereal *ld, doublereal *
- lld, doublereal *pivmin, doublereal *gaptol, doublereal *z__, logical
- *wantnc, integer *negcnt, doublereal *ztz, doublereal *mingma,
- integer *r__, integer *isuppz, doublereal *nrminv, doublereal *resid,
- doublereal *rqcorr, doublereal *work)
- {
- /* System generated locals */
- integer i__1;
- doublereal d__1, d__2, d__3;
- /* Builtin functions */
- double sqrt(doublereal);
- /* Local variables */
- integer i__;
- doublereal s;
- integer r1, r2;
- doublereal eps, tmp;
- integer neg1, neg2, indp, inds;
- doublereal dplus;
- extern doublereal _starpu_dlamch_(char *);
- extern logical _starpu_disnan_(doublereal *);
- integer indlpl, indumn;
- doublereal dminus;
- logical sawnan1, sawnan2;
- /* -- LAPACK auxiliary routine (version 3.2) -- */
- /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
- /* November 2006 */
- /* .. Scalar Arguments .. */
- /* .. */
- /* .. Array Arguments .. */
- /* .. */
- /* Purpose */
- /* ======= */
- /* DLAR1V computes the (scaled) r-th column of the inverse of */
- /* the sumbmatrix in rows B1 through BN of the tridiagonal matrix */
- /* L D L^T - sigma I. When sigma is close to an eigenvalue, the */
- /* computed vector is an accurate eigenvector. Usually, r corresponds */
- /* to the index where the eigenvector is largest in magnitude. */
- /* The following steps accomplish this computation : */
- /* (a) Stationary qd transform, L D L^T - sigma I = L(+) D(+) L(+)^T, */
- /* (b) Progressive qd transform, L D L^T - sigma I = U(-) D(-) U(-)^T, */
- /* (c) Computation of the diagonal elements of the inverse of */
- /* L D L^T - sigma I by combining the above transforms, and choosing */
- /* r as the index where the diagonal of the inverse is (one of the) */
- /* largest in magnitude. */
- /* (d) Computation of the (scaled) r-th column of the inverse using the */
- /* twisted factorization obtained by combining the top part of the */
- /* the stationary and the bottom part of the progressive transform. */
- /* Arguments */
- /* ========= */
- /* N (input) INTEGER */
- /* The order of the matrix L D L^T. */
- /* B1 (input) INTEGER */
- /* First index of the submatrix of L D L^T. */
- /* BN (input) INTEGER */
- /* Last index of the submatrix of L D L^T. */
- /* LAMBDA (input) DOUBLE PRECISION */
- /* The shift. In order to compute an accurate eigenvector, */
- /* LAMBDA should be a good approximation to an eigenvalue */
- /* of L D L^T. */
- /* L (input) DOUBLE PRECISION array, dimension (N-1) */
- /* The (n-1) subdiagonal elements of the unit bidiagonal matrix */
- /* L, in elements 1 to N-1. */
- /* D (input) DOUBLE PRECISION array, dimension (N) */
- /* The n diagonal elements of the diagonal matrix D. */
- /* LD (input) DOUBLE PRECISION array, dimension (N-1) */
- /* The n-1 elements L(i)*D(i). */
- /* LLD (input) DOUBLE PRECISION array, dimension (N-1) */
- /* The n-1 elements L(i)*L(i)*D(i). */
- /* PIVMIN (input) DOUBLE PRECISION */
- /* The minimum pivot in the Sturm sequence. */
- /* GAPTOL (input) DOUBLE PRECISION */
- /* Tolerance that indicates when eigenvector entries are negligible */
- /* w.r.t. their contribution to the residual. */
- /* Z (input/output) DOUBLE PRECISION array, dimension (N) */
- /* On input, all entries of Z must be set to 0. */
- /* On output, Z contains the (scaled) r-th column of the */
- /* inverse. The scaling is such that Z(R) equals 1. */
- /* WANTNC (input) LOGICAL */
- /* Specifies whether NEGCNT has to be computed. */
- /* NEGCNT (output) INTEGER */
- /* If WANTNC is .TRUE. then NEGCNT = the number of pivots < pivmin */
- /* in the matrix factorization L D L^T, and NEGCNT = -1 otherwise. */
- /* ZTZ (output) DOUBLE PRECISION */
- /* The square of the 2-norm of Z. */
- /* MINGMA (output) DOUBLE PRECISION */
- /* The reciprocal of the largest (in magnitude) diagonal */
- /* element of the inverse of L D L^T - sigma I. */
- /* R (input/output) INTEGER */
- /* The twist index for the twisted factorization used to */
- /* compute Z. */
- /* On input, 0 <= R <= N. If R is input as 0, R is set to */
- /* the index where (L D L^T - sigma I)^{-1} is largest */
- /* in magnitude. If 1 <= R <= N, R is unchanged. */
- /* On output, R contains the twist index used to compute Z. */
- /* Ideally, R designates the position of the maximum entry in the */
- /* eigenvector. */
- /* ISUPPZ (output) INTEGER array, dimension (2) */
- /* The support of the vector in Z, i.e., the vector Z is */
- /* nonzero only in elements ISUPPZ(1) through ISUPPZ( 2 ). */
- /* NRMINV (output) DOUBLE PRECISION */
- /* NRMINV = 1/SQRT( ZTZ ) */
- /* RESID (output) DOUBLE PRECISION */
- /* The residual of the FP vector. */
- /* RESID = ABS( MINGMA )/SQRT( ZTZ ) */
- /* RQCORR (output) DOUBLE PRECISION */
- /* The Rayleigh Quotient correction to LAMBDA. */
- /* RQCORR = MINGMA*TMP */
- /* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) */
- /* Further Details */
- /* =============== */
- /* Based on contributions by */
- /* Beresford Parlett, University of California, Berkeley, USA */
- /* Jim Demmel, University of California, Berkeley, USA */
- /* Inderjit Dhillon, University of Texas, Austin, USA */
- /* Osni Marques, LBNL/NERSC, USA */
- /* Christof Voemel, University of California, Berkeley, USA */
- /* ===================================================================== */
- /* .. Parameters .. */
- /* .. */
- /* .. Local Scalars .. */
- /* .. */
- /* .. External Functions .. */
- /* .. */
- /* .. Intrinsic Functions .. */
- /* .. */
- /* .. Executable Statements .. */
- /* Parameter adjustments */
- --work;
- --isuppz;
- --z__;
- --lld;
- --ld;
- --l;
- --d__;
- /* Function Body */
- eps = _starpu_dlamch_("Precision");
- if (*r__ == 0) {
- r1 = *b1;
- r2 = *bn;
- } else {
- r1 = *r__;
- r2 = *r__;
- }
- /* Storage for LPLUS */
- indlpl = 0;
- /* Storage for UMINUS */
- indumn = *n;
- inds = (*n << 1) + 1;
- indp = *n * 3 + 1;
- if (*b1 == 1) {
- work[inds] = 0.;
- } else {
- work[inds + *b1 - 1] = lld[*b1 - 1];
- }
- /* Compute the stationary transform (using the differential form) */
- /* until the index R2. */
- sawnan1 = FALSE_;
- neg1 = 0;
- s = work[inds + *b1 - 1] - *lambda;
- i__1 = r1 - 1;
- for (i__ = *b1; i__ <= i__1; ++i__) {
- dplus = d__[i__] + s;
- work[indlpl + i__] = ld[i__] / dplus;
- if (dplus < 0.) {
- ++neg1;
- }
- work[inds + i__] = s * work[indlpl + i__] * l[i__];
- s = work[inds + i__] - *lambda;
- /* L50: */
- }
- sawnan1 = _starpu_disnan_(&s);
- if (sawnan1) {
- goto L60;
- }
- i__1 = r2 - 1;
- for (i__ = r1; i__ <= i__1; ++i__) {
- dplus = d__[i__] + s;
- work[indlpl + i__] = ld[i__] / dplus;
- work[inds + i__] = s * work[indlpl + i__] * l[i__];
- s = work[inds + i__] - *lambda;
- /* L51: */
- }
- sawnan1 = _starpu_disnan_(&s);
- L60:
- if (sawnan1) {
- /* Runs a slower version of the above loop if a NaN is detected */
- neg1 = 0;
- s = work[inds + *b1 - 1] - *lambda;
- i__1 = r1 - 1;
- for (i__ = *b1; i__ <= i__1; ++i__) {
- dplus = d__[i__] + s;
- if (abs(dplus) < *pivmin) {
- dplus = -(*pivmin);
- }
- work[indlpl + i__] = ld[i__] / dplus;
- if (dplus < 0.) {
- ++neg1;
- }
- work[inds + i__] = s * work[indlpl + i__] * l[i__];
- if (work[indlpl + i__] == 0.) {
- work[inds + i__] = lld[i__];
- }
- s = work[inds + i__] - *lambda;
- /* L70: */
- }
- i__1 = r2 - 1;
- for (i__ = r1; i__ <= i__1; ++i__) {
- dplus = d__[i__] + s;
- if (abs(dplus) < *pivmin) {
- dplus = -(*pivmin);
- }
- work[indlpl + i__] = ld[i__] / dplus;
- work[inds + i__] = s * work[indlpl + i__] * l[i__];
- if (work[indlpl + i__] == 0.) {
- work[inds + i__] = lld[i__];
- }
- s = work[inds + i__] - *lambda;
- /* L71: */
- }
- }
- /* Compute the progressive transform (using the differential form) */
- /* until the index R1 */
- sawnan2 = FALSE_;
- neg2 = 0;
- work[indp + *bn - 1] = d__[*bn] - *lambda;
- i__1 = r1;
- for (i__ = *bn - 1; i__ >= i__1; --i__) {
- dminus = lld[i__] + work[indp + i__];
- tmp = d__[i__] / dminus;
- if (dminus < 0.) {
- ++neg2;
- }
- work[indumn + i__] = l[i__] * tmp;
- work[indp + i__ - 1] = work[indp + i__] * tmp - *lambda;
- /* L80: */
- }
- tmp = work[indp + r1 - 1];
- sawnan2 = _starpu_disnan_(&tmp);
- if (sawnan2) {
- /* Runs a slower version of the above loop if a NaN is detected */
- neg2 = 0;
- i__1 = r1;
- for (i__ = *bn - 1; i__ >= i__1; --i__) {
- dminus = lld[i__] + work[indp + i__];
- if (abs(dminus) < *pivmin) {
- dminus = -(*pivmin);
- }
- tmp = d__[i__] / dminus;
- if (dminus < 0.) {
- ++neg2;
- }
- work[indumn + i__] = l[i__] * tmp;
- work[indp + i__ - 1] = work[indp + i__] * tmp - *lambda;
- if (tmp == 0.) {
- work[indp + i__ - 1] = d__[i__] - *lambda;
- }
- /* L100: */
- }
- }
- /* Find the index (from R1 to R2) of the largest (in magnitude) */
- /* diagonal element of the inverse */
- *mingma = work[inds + r1 - 1] + work[indp + r1 - 1];
- if (*mingma < 0.) {
- ++neg1;
- }
- if (*wantnc) {
- *negcnt = neg1 + neg2;
- } else {
- *negcnt = -1;
- }
- if (abs(*mingma) == 0.) {
- *mingma = eps * work[inds + r1 - 1];
- }
- *r__ = r1;
- i__1 = r2 - 1;
- for (i__ = r1; i__ <= i__1; ++i__) {
- tmp = work[inds + i__] + work[indp + i__];
- if (tmp == 0.) {
- tmp = eps * work[inds + i__];
- }
- if (abs(tmp) <= abs(*mingma)) {
- *mingma = tmp;
- *r__ = i__ + 1;
- }
- /* L110: */
- }
- /* Compute the FP vector: solve N^T v = e_r */
- isuppz[1] = *b1;
- isuppz[2] = *bn;
- z__[*r__] = 1.;
- *ztz = 1.;
- /* Compute the FP vector upwards from R */
- if (! sawnan1 && ! sawnan2) {
- i__1 = *b1;
- for (i__ = *r__ - 1; i__ >= i__1; --i__) {
- z__[i__] = -(work[indlpl + i__] * z__[i__ + 1]);
- if (((d__1 = z__[i__], abs(d__1)) + (d__2 = z__[i__ + 1], abs(
- d__2))) * (d__3 = ld[i__], abs(d__3)) < *gaptol) {
- z__[i__] = 0.;
- isuppz[1] = i__ + 1;
- goto L220;
- }
- *ztz += z__[i__] * z__[i__];
- /* L210: */
- }
- L220:
- ;
- } else {
- /* Run slower loop if NaN occurred. */
- i__1 = *b1;
- for (i__ = *r__ - 1; i__ >= i__1; --i__) {
- if (z__[i__ + 1] == 0.) {
- z__[i__] = -(ld[i__ + 1] / ld[i__]) * z__[i__ + 2];
- } else {
- z__[i__] = -(work[indlpl + i__] * z__[i__ + 1]);
- }
- if (((d__1 = z__[i__], abs(d__1)) + (d__2 = z__[i__ + 1], abs(
- d__2))) * (d__3 = ld[i__], abs(d__3)) < *gaptol) {
- z__[i__] = 0.;
- isuppz[1] = i__ + 1;
- goto L240;
- }
- *ztz += z__[i__] * z__[i__];
- /* L230: */
- }
- L240:
- ;
- }
- /* Compute the FP vector downwards from R in blocks of size BLKSIZ */
- if (! sawnan1 && ! sawnan2) {
- i__1 = *bn - 1;
- for (i__ = *r__; i__ <= i__1; ++i__) {
- z__[i__ + 1] = -(work[indumn + i__] * z__[i__]);
- if (((d__1 = z__[i__], abs(d__1)) + (d__2 = z__[i__ + 1], abs(
- d__2))) * (d__3 = ld[i__], abs(d__3)) < *gaptol) {
- z__[i__ + 1] = 0.;
- isuppz[2] = i__;
- goto L260;
- }
- *ztz += z__[i__ + 1] * z__[i__ + 1];
- /* L250: */
- }
- L260:
- ;
- } else {
- /* Run slower loop if NaN occurred. */
- i__1 = *bn - 1;
- for (i__ = *r__; i__ <= i__1; ++i__) {
- if (z__[i__] == 0.) {
- z__[i__ + 1] = -(ld[i__ - 1] / ld[i__]) * z__[i__ - 1];
- } else {
- z__[i__ + 1] = -(work[indumn + i__] * z__[i__]);
- }
- if (((d__1 = z__[i__], abs(d__1)) + (d__2 = z__[i__ + 1], abs(
- d__2))) * (d__3 = ld[i__], abs(d__3)) < *gaptol) {
- z__[i__ + 1] = 0.;
- isuppz[2] = i__;
- goto L280;
- }
- *ztz += z__[i__ + 1] * z__[i__ + 1];
- /* L270: */
- }
- L280:
- ;
- }
- /* Compute quantities for convergence test */
- tmp = 1. / *ztz;
- *nrminv = sqrt(tmp);
- *resid = abs(*mingma) * *nrminv;
- *rqcorr = *mingma * tmp;
- return 0;
- /* End of DLAR1V */
- } /* _starpu_dlar1v_ */
|