| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989 | /* dlarrv.f -- translated by f2c (version 20061008).   You must link the resulting object file with libf2c:	on Microsoft Windows system, link with libf2c.lib;	on Linux or Unix systems, link with .../path/to/libf2c.a -lm	or, if you install libf2c.a in a standard place, with -lf2c -lm	-- in that order, at the end of the command line, as in		cc *.o -lf2c -lm	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,		http://www.netlib.org/f2c/libf2c.zip*/#include "f2c.h"#include "blaswrap.h"/* Table of constant values */static doublereal c_b5 = 0.;static integer c__1 = 1;static integer c__2 = 2;/* Subroutine */ int _starpu_dlarrv_(integer *n, doublereal *vl, doublereal *vu, 	doublereal *d__, doublereal *l, doublereal *pivmin, integer *isplit, 	integer *m, integer *dol, integer *dou, doublereal *minrgp, 	doublereal *rtol1, doublereal *rtol2, doublereal *w, doublereal *werr, 	 doublereal *wgap, integer *iblock, integer *indexw, doublereal *gers, 	 doublereal *z__, integer *ldz, integer *isuppz, doublereal *work, 	integer *iwork, integer *info){    /* System generated locals */    integer z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5;    doublereal d__1, d__2;    logical L__1;    /* Builtin functions */    double log(doublereal);    /* Local variables */    integer minwsize, i__, j, k, p, q, miniwsize, ii;    doublereal gl;    integer im, in;    doublereal gu, gap, eps, tau, tol, tmp;    integer zto;    doublereal ztz;    integer iend, jblk;    doublereal lgap;    integer done;    doublereal rgap, left;    integer wend, iter;    doublereal bstw;    integer itmp1;    extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, 	    integer *);    integer indld;    doublereal fudge;    integer idone;    doublereal sigma;    integer iinfo, iindr;    doublereal resid;    logical eskip;    doublereal right;    extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, 	    doublereal *, integer *);    integer nclus, zfrom;    doublereal rqtol;    integer iindc1, iindc2;    extern /* Subroutine */ int _starpu_dlar1v_(integer *, integer *, integer *, 	    doublereal *, doublereal *, doublereal *, doublereal *, 	    doublereal *, doublereal *, doublereal *, doublereal *, logical *, 	     integer *, doublereal *, doublereal *, integer *, integer *, 	    doublereal *, doublereal *, doublereal *, doublereal *);    logical stp2ii;    doublereal lambda;    extern doublereal _starpu_dlamch_(char *);    integer ibegin, indeig;    logical needbs;    integer indlld;    doublereal sgndef, mingma;    extern /* Subroutine */ int _starpu_dlarrb_(integer *, doublereal *, doublereal *, 	     integer *, integer *, doublereal *, doublereal *, integer *, 	    doublereal *, doublereal *, doublereal *, doublereal *, integer *, 	     doublereal *, doublereal *, integer *, integer *);    integer oldien, oldncl, wbegin;    doublereal spdiam;    integer negcnt;    extern /* Subroutine */ int _starpu_dlarrf_(integer *, doublereal *, doublereal *, 	     doublereal *, integer *, integer *, doublereal *, doublereal *, 	    doublereal *, doublereal *, doublereal *, doublereal *, 	    doublereal *, doublereal *, doublereal *, doublereal *, 	    doublereal *, integer *);    integer oldcls;    doublereal savgap;    integer ndepth;    doublereal ssigma;    extern /* Subroutine */ int _starpu_dlaset_(char *, integer *, integer *, 	    doublereal *, doublereal *, doublereal *, integer *);    logical usedbs;    integer iindwk, offset;    doublereal gaptol;    integer newcls, oldfst, indwrk, windex, oldlst;    logical usedrq;    integer newfst, newftt, parity, windmn, windpl, isupmn, newlst, zusedl;    doublereal bstres;    integer newsiz, zusedu, zusedw;    doublereal nrminv, rqcorr;    logical tryrqc;    integer isupmx;/*  -- LAPACK auxiliary routine (version 3.2) -- *//*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. *//*     November 2006 *//*     .. Scalar Arguments .. *//*     .. *//*     .. Array Arguments .. *//*     .. *//*  Purpose *//*  ======= *//*  DLARRV computes the eigenvectors of the tridiagonal matrix *//*  T = L D L^T given L, D and APPROXIMATIONS to the eigenvalues of L D L^T. *//*  The input eigenvalues should have been computed by DLARRE. *//*  Arguments *//*  ========= *//*  N       (input) INTEGER *//*          The order of the matrix.  N >= 0. *//*  VL      (input) DOUBLE PRECISION *//*  VU      (input) DOUBLE PRECISION *//*          Lower and upper bounds of the interval that contains the desired *//*          eigenvalues. VL < VU. Needed to compute gaps on the left or right *//*          end of the extremal eigenvalues in the desired RANGE. *//*  D       (input/output) DOUBLE PRECISION array, dimension (N) *//*          On entry, the N diagonal elements of the diagonal matrix D. *//*          On exit, D may be overwritten. *//*  L       (input/output) DOUBLE PRECISION array, dimension (N) *//*          On entry, the (N-1) subdiagonal elements of the unit *//*          bidiagonal matrix L are in elements 1 to N-1 of L *//*          (if the matrix is not splitted.) At the end of each block *//*          is stored the corresponding shift as given by DLARRE. *//*          On exit, L is overwritten. *//*  PIVMIN  (in) DOUBLE PRECISION *//*          The minimum pivot allowed in the Sturm sequence. *//*  ISPLIT  (input) INTEGER array, dimension (N) *//*          The splitting points, at which T breaks up into blocks. *//*          The first block consists of rows/columns 1 to *//*          ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 *//*          through ISPLIT( 2 ), etc. *//*  M       (input) INTEGER *//*          The total number of input eigenvalues.  0 <= M <= N. *//*  DOL     (input) INTEGER *//*  DOU     (input) INTEGER *//*          If the user wants to compute only selected eigenvectors from all *//*          the eigenvalues supplied, he can specify an index range DOL:DOU. *//*          Or else the setting DOL=1, DOU=M should be applied. *//*          Note that DOL and DOU refer to the order in which the eigenvalues *//*          are stored in W. *//*          If the user wants to compute only selected eigenpairs, then *//*          the columns DOL-1 to DOU+1 of the eigenvector space Z contain the *//*          computed eigenvectors. All other columns of Z are set to zero. *//*  MINRGP  (input) DOUBLE PRECISION *//*  RTOL1   (input) DOUBLE PRECISION *//*  RTOL2   (input) DOUBLE PRECISION *//*           Parameters for bisection. *//*           An interval [LEFT,RIGHT] has converged if *//*           RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) *//*  W       (input/output) DOUBLE PRECISION array, dimension (N) *//*          The first M elements of W contain the APPROXIMATE eigenvalues for *//*          which eigenvectors are to be computed.  The eigenvalues *//*          should be grouped by split-off block and ordered from *//*          smallest to largest within the block ( The output array *//*          W from DLARRE is expected here ). Furthermore, they are with *//*          respect to the shift of the corresponding root representation *//*          for their block. On exit, W holds the eigenvalues of the *//*          UNshifted matrix. *//*  WERR    (input/output) DOUBLE PRECISION array, dimension (N) *//*          The first M elements contain the semiwidth of the uncertainty *//*          interval of the corresponding eigenvalue in W *//*  WGAP    (input/output) DOUBLE PRECISION array, dimension (N) *//*          The separation from the right neighbor eigenvalue in W. *//*  IBLOCK  (input) INTEGER array, dimension (N) *//*          The indices of the blocks (submatrices) associated with the *//*          corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue *//*          W(i) belongs to the first block from the top, =2 if W(i) *//*          belongs to the second block, etc. *//*  INDEXW  (input) INTEGER array, dimension (N) *//*          The indices of the eigenvalues within each block (submatrix); *//*          for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the *//*          i-th eigenvalue W(i) is the 10-th eigenvalue in the second block. *//*  GERS    (input) DOUBLE PRECISION array, dimension (2*N) *//*          The N Gerschgorin intervals (the i-th Gerschgorin interval *//*          is (GERS(2*i-1), GERS(2*i)). The Gerschgorin intervals should *//*          be computed from the original UNshifted matrix. *//*  Z       (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) *//*          If INFO = 0, the first M columns of Z contain the *//*          orthonormal eigenvectors of the matrix T *//*          corresponding to the input eigenvalues, with the i-th *//*          column of Z holding the eigenvector associated with W(i). *//*          Note: the user must ensure that at least max(1,M) columns are *//*          supplied in the array Z. *//*  LDZ     (input) INTEGER *//*          The leading dimension of the array Z.  LDZ >= 1, and if *//*          JOBZ = 'V', LDZ >= max(1,N). *//*  ISUPPZ  (output) INTEGER array, dimension ( 2*max(1,M) ) *//*          The support of the eigenvectors in Z, i.e., the indices *//*          indicating the nonzero elements in Z. The I-th eigenvector *//*          is nonzero only in elements ISUPPZ( 2*I-1 ) through *//*          ISUPPZ( 2*I ). *//*  WORK    (workspace) DOUBLE PRECISION array, dimension (12*N) *//*  IWORK   (workspace) INTEGER array, dimension (7*N) *//*  INFO    (output) INTEGER *//*          = 0:  successful exit *//*          > 0:  A problem occured in DLARRV. *//*          < 0:  One of the called subroutines signaled an internal problem. *//*                Needs inspection of the corresponding parameter IINFO *//*                for further information. *//*          =-1:  Problem in DLARRB when refining a child's eigenvalues. *//*          =-2:  Problem in DLARRF when computing the RRR of a child. *//*                When a child is inside a tight cluster, it can be difficult *//*                to find an RRR. A partial remedy from the user's point of *//*                view is to make the parameter MINRGP smaller and recompile. *//*                However, as the orthogonality of the computed vectors is *//*                proportional to 1/MINRGP, the user should be aware that *//*                he might be trading in precision when he decreases MINRGP. *//*          =-3:  Problem in DLARRB when refining a single eigenvalue *//*                after the Rayleigh correction was rejected. *//*          = 5:  The Rayleigh Quotient Iteration failed to converge to *//*                full accuracy in MAXITR steps. *//*  Further Details *//*  =============== *//*  Based on contributions by *//*     Beresford Parlett, University of California, Berkeley, USA *//*     Jim Demmel, University of California, Berkeley, USA *//*     Inderjit Dhillon, University of Texas, Austin, USA *//*     Osni Marques, LBNL/NERSC, USA *//*     Christof Voemel, University of California, Berkeley, USA *//*  ===================================================================== *//*     .. Parameters .. *//*     .. *//*     .. Local Scalars .. *//*     .. *//*     .. External Functions .. *//*     .. *//*     .. External Subroutines .. *//*     .. *//*     .. Intrinsic Functions .. *//*     .. *//*     .. Executable Statements .. *//*     .. *//*     The first N entries of WORK are reserved for the eigenvalues */    /* Parameter adjustments */    --d__;    --l;    --isplit;    --w;    --werr;    --wgap;    --iblock;    --indexw;    --gers;    z_dim1 = *ldz;    z_offset = 1 + z_dim1;    z__ -= z_offset;    --isuppz;    --work;    --iwork;    /* Function Body */    indld = *n + 1;    indlld = (*n << 1) + 1;    indwrk = *n * 3 + 1;    minwsize = *n * 12;    i__1 = minwsize;    for (i__ = 1; i__ <= i__1; ++i__) {	work[i__] = 0.;/* L5: */    }/*     IWORK(IINDR+1:IINDR+N) hold the twist indices R for the *//*     factorization used to compute the FP vector */    iindr = 0;/*     IWORK(IINDC1+1:IINC2+N) are used to store the clusters of the current *//*     layer and the one above. */    iindc1 = *n;    iindc2 = *n << 1;    iindwk = *n * 3 + 1;    miniwsize = *n * 7;    i__1 = miniwsize;    for (i__ = 1; i__ <= i__1; ++i__) {	iwork[i__] = 0;/* L10: */    }    zusedl = 1;    if (*dol > 1) {/*        Set lower bound for use of Z */	zusedl = *dol - 1;    }    zusedu = *m;    if (*dou < *m) {/*        Set lower bound for use of Z */	zusedu = *dou + 1;    }/*     The width of the part of Z that is used */    zusedw = zusedu - zusedl + 1;    _starpu_dlaset_("Full", n, &zusedw, &c_b5, &c_b5, &z__[zusedl * z_dim1 + 1], ldz);    eps = _starpu_dlamch_("Precision");    rqtol = eps * 2.;/*     Set expert flags for standard code. */    tryrqc = TRUE_;    if (*dol == 1 && *dou == *m) {    } else {/*        Only selected eigenpairs are computed. Since the other evalues *//*        are not refined by RQ iteration, bisection has to compute to full *//*        accuracy. */	*rtol1 = eps * 4.;	*rtol2 = eps * 4.;    }/*     The entries WBEGIN:WEND in W, WERR, WGAP correspond to the *//*     desired eigenvalues. The support of the nonzero eigenvector *//*     entries is contained in the interval IBEGIN:IEND. *//*     Remark that if k eigenpairs are desired, then the eigenvectors *//*     are stored in k contiguous columns of Z. *//*     DONE is the number of eigenvectors already computed */    done = 0;    ibegin = 1;    wbegin = 1;    i__1 = iblock[*m];    for (jblk = 1; jblk <= i__1; ++jblk) {	iend = isplit[jblk];	sigma = l[iend];/*        Find the eigenvectors of the submatrix indexed IBEGIN *//*        through IEND. */	wend = wbegin - 1;L15:	if (wend < *m) {	    if (iblock[wend + 1] == jblk) {		++wend;		goto L15;	    }	}	if (wend < wbegin) {	    ibegin = iend + 1;	    goto L170;	} else if (wend < *dol || wbegin > *dou) {	    ibegin = iend + 1;	    wbegin = wend + 1;	    goto L170;	}/*        Find local spectral diameter of the block */	gl = gers[(ibegin << 1) - 1];	gu = gers[ibegin * 2];	i__2 = iend;	for (i__ = ibegin + 1; i__ <= i__2; ++i__) {/* Computing MIN */	    d__1 = gers[(i__ << 1) - 1];	    gl = min(d__1,gl);/* Computing MAX */	    d__1 = gers[i__ * 2];	    gu = max(d__1,gu);/* L20: */	}	spdiam = gu - gl;/*        OLDIEN is the last index of the previous block */	oldien = ibegin - 1;/*        Calculate the size of the current block */	in = iend - ibegin + 1;/*        The number of eigenvalues in the current block */	im = wend - wbegin + 1;/*        This is for a 1x1 block */	if (ibegin == iend) {	    ++done;	    z__[ibegin + wbegin * z_dim1] = 1.;	    isuppz[(wbegin << 1) - 1] = ibegin;	    isuppz[wbegin * 2] = ibegin;	    w[wbegin] += sigma;	    work[wbegin] = w[wbegin];	    ibegin = iend + 1;	    ++wbegin;	    goto L170;	}/*        The desired (shifted) eigenvalues are stored in W(WBEGIN:WEND) *//*        Note that these can be approximations, in this case, the corresp. *//*        entries of WERR give the size of the uncertainty interval. *//*        The eigenvalue approximations will be refined when necessary as *//*        high relative accuracy is required for the computation of the *//*        corresponding eigenvectors. */	_starpu_dcopy_(&im, &w[wbegin], &c__1, &work[wbegin], &c__1);/*        We store in W the eigenvalue approximations w.r.t. the original *//*        matrix T. */	i__2 = im;	for (i__ = 1; i__ <= i__2; ++i__) {	    w[wbegin + i__ - 1] += sigma;/* L30: */	}/*        NDEPTH is the current depth of the representation tree */	ndepth = 0;/*        PARITY is either 1 or 0 */	parity = 1;/*        NCLUS is the number of clusters for the next level of the *//*        representation tree, we start with NCLUS = 1 for the root */	nclus = 1;	iwork[iindc1 + 1] = 1;	iwork[iindc1 + 2] = im;/*        IDONE is the number of eigenvectors already computed in the current *//*        block */	idone = 0;/*        loop while( IDONE.LT.IM ) *//*        generate the representation tree for the current block and *//*        compute the eigenvectors */L40:	if (idone < im) {/*           This is a crude protection against infinitely deep trees */	    if (ndepth > *m) {		*info = -2;		return 0;	    }/*           breadth first processing of the current level of the representation *//*           tree: OLDNCL = number of clusters on current level */	    oldncl = nclus;/*           reset NCLUS to count the number of child clusters */	    nclus = 0;	    parity = 1 - parity;	    if (parity == 0) {		oldcls = iindc1;		newcls = iindc2;	    } else {		oldcls = iindc2;		newcls = iindc1;	    }/*           Process the clusters on the current level */	    i__2 = oldncl;	    for (i__ = 1; i__ <= i__2; ++i__) {		j = oldcls + (i__ << 1);/*              OLDFST, OLDLST = first, last index of current cluster. *//*                               cluster indices start with 1 and are relative *//*                               to WBEGIN when accessing W, WGAP, WERR, Z */		oldfst = iwork[j - 1];		oldlst = iwork[j];		if (ndepth > 0) {/*                 Retrieve relatively robust representation (RRR) of cluster *//*                 that has been computed at the previous level *//*                 The RRR is stored in Z and overwritten once the eigenvectors *//*                 have been computed or when the cluster is refined */		    if (*dol == 1 && *dou == *m) {/*                    Get representation from location of the leftmost evalue *//*                    of the cluster */			j = wbegin + oldfst - 1;		    } else {			if (wbegin + oldfst - 1 < *dol) {/*                       Get representation from the left end of Z array */			    j = *dol - 1;			} else if (wbegin + oldfst - 1 > *dou) {/*                       Get representation from the right end of Z array */			    j = *dou;			} else {			    j = wbegin + oldfst - 1;			}		    }		    _starpu_dcopy_(&in, &z__[ibegin + j * z_dim1], &c__1, &d__[ibegin], &c__1);		    i__3 = in - 1;		    _starpu_dcopy_(&i__3, &z__[ibegin + (j + 1) * z_dim1], &c__1, &l[			    ibegin], &c__1);		    sigma = z__[iend + (j + 1) * z_dim1];/*                 Set the corresponding entries in Z to zero */		    _starpu_dlaset_("Full", &in, &c__2, &c_b5, &c_b5, &z__[ibegin + j 			    * z_dim1], ldz);		}/*              Compute DL and DLL of current RRR */		i__3 = iend - 1;		for (j = ibegin; j <= i__3; ++j) {		    tmp = d__[j] * l[j];		    work[indld - 1 + j] = tmp;		    work[indlld - 1 + j] = tmp * l[j];/* L50: */		}		if (ndepth > 0) {/*                 P and Q are index of the first and last eigenvalue to compute *//*                 within the current block */		    p = indexw[wbegin - 1 + oldfst];		    q = indexw[wbegin - 1 + oldlst];/*                 Offset for the arrays WORK, WGAP and WERR, i.e., th P-OFFSET *//*                 thru' Q-OFFSET elements of these arrays are to be used. *//*                  OFFSET = P-OLDFST */		    offset = indexw[wbegin] - 1;/*                 perform limited bisection (if necessary) to get approximate *//*                 eigenvalues to the precision needed. */		    _starpu_dlarrb_(&in, &d__[ibegin], &work[indlld + ibegin - 1], &p, 			     &q, rtol1, rtol2, &offset, &work[wbegin], &wgap[			    wbegin], &werr[wbegin], &work[indwrk], &iwork[			    iindwk], pivmin, &spdiam, &in, &iinfo);		    if (iinfo != 0) {			*info = -1;			return 0;		    }/*                 We also recompute the extremal gaps. W holds all eigenvalues *//*                 of the unshifted matrix and must be used for computation *//*                 of WGAP, the entries of WORK might stem from RRRs with *//*                 different shifts. The gaps from WBEGIN-1+OLDFST to *//*                 WBEGIN-1+OLDLST are correctly computed in DLARRB. *//*                 However, we only allow the gaps to become greater since *//*                 this is what should happen when we decrease WERR */		    if (oldfst > 1) {/* Computing MAX */			d__1 = wgap[wbegin + oldfst - 2], d__2 = w[wbegin + 				oldfst - 1] - werr[wbegin + oldfst - 1] - w[				wbegin + oldfst - 2] - werr[wbegin + oldfst - 				2];			wgap[wbegin + oldfst - 2] = max(d__1,d__2);		    }		    if (wbegin + oldlst - 1 < wend) {/* Computing MAX */			d__1 = wgap[wbegin + oldlst - 1], d__2 = w[wbegin + 				oldlst] - werr[wbegin + oldlst] - w[wbegin + 				oldlst - 1] - werr[wbegin + oldlst - 1];			wgap[wbegin + oldlst - 1] = max(d__1,d__2);		    }/*                 Each time the eigenvalues in WORK get refined, we store *//*                 the newly found approximation with all shifts applied in W */		    i__3 = oldlst;		    for (j = oldfst; j <= i__3; ++j) {			w[wbegin + j - 1] = work[wbegin + j - 1] + sigma;/* L53: */		    }		}/*              Process the current node. */		newfst = oldfst;		i__3 = oldlst;		for (j = oldfst; j <= i__3; ++j) {		    if (j == oldlst) {/*                    we are at the right end of the cluster, this is also the *//*                    boundary of the child cluster */			newlst = j;		    } else if (wgap[wbegin + j - 1] >= *minrgp * (d__1 = work[			    wbegin + j - 1], abs(d__1))) {/*                    the right relative gap is big enough, the child cluster *//*                    (NEWFST,..,NEWLST) is well separated from the following */			newlst = j;		    } else {/*                    inside a child cluster, the relative gap is not *//*                    big enough. */			goto L140;		    }/*                 Compute size of child cluster found */		    newsiz = newlst - newfst + 1;/*                 NEWFTT is the place in Z where the new RRR or the computed *//*                 eigenvector is to be stored */		    if (*dol == 1 && *dou == *m) {/*                    Store representation at location of the leftmost evalue *//*                    of the cluster */			newftt = wbegin + newfst - 1;		    } else {			if (wbegin + newfst - 1 < *dol) {/*                       Store representation at the left end of Z array */			    newftt = *dol - 1;			} else if (wbegin + newfst - 1 > *dou) {/*                       Store representation at the right end of Z array */			    newftt = *dou;			} else {			    newftt = wbegin + newfst - 1;			}		    }		    if (newsiz > 1) {/*                    Current child is not a singleton but a cluster. *//*                    Compute and store new representation of child. *//*                    Compute left and right cluster gap. *//*                    LGAP and RGAP are not computed from WORK because *//*                    the eigenvalue approximations may stem from RRRs *//*                    different shifts. However, W hold all eigenvalues *//*                    of the unshifted matrix. Still, the entries in WGAP *//*                    have to be computed from WORK since the entries *//*                    in W might be of the same order so that gaps are not *//*                    exhibited correctly for very close eigenvalues. */			if (newfst == 1) {/* Computing MAX */			    d__1 = 0., d__2 = w[wbegin] - werr[wbegin] - *vl;			    lgap = max(d__1,d__2);			} else {			    lgap = wgap[wbegin + newfst - 2];			}			rgap = wgap[wbegin + newlst - 1];/*                    Compute left- and rightmost eigenvalue of child *//*                    to high precision in order to shift as close *//*                    as possible and obtain as large relative gaps *//*                    as possible */			for (k = 1; k <= 2; ++k) {			    if (k == 1) {				p = indexw[wbegin - 1 + newfst];			    } else {				p = indexw[wbegin - 1 + newlst];			    }			    offset = indexw[wbegin] - 1;			    _starpu_dlarrb_(&in, &d__[ibegin], &work[indlld + ibegin 				    - 1], &p, &p, &rqtol, &rqtol, &offset, &				    work[wbegin], &wgap[wbegin], &werr[wbegin], &work[indwrk], &iwork[iindwk], pivmin, &				    spdiam, &in, &iinfo);/* L55: */			}			if (wbegin + newlst - 1 < *dol || wbegin + newfst - 1 				> *dou) {/*                       if the cluster contains no desired eigenvalues *//*                       skip the computation of that branch of the rep. tree *//*                       We could skip before the refinement of the extremal *//*                       eigenvalues of the child, but then the representation *//*                       tree could be different from the one when nothing is *//*                       skipped. For this reason we skip at this place. */			    idone = idone + newlst - newfst + 1;			    goto L139;			}/*                    Compute RRR of child cluster. *//*                    Note that the new RRR is stored in Z *//*                    DLARRF needs LWORK = 2*N */			_starpu_dlarrf_(&in, &d__[ibegin], &l[ibegin], &work[indld + 				ibegin - 1], &newfst, &newlst, &work[wbegin], 				&wgap[wbegin], &werr[wbegin], &spdiam, &lgap, 				&rgap, pivmin, &tau, &z__[ibegin + newftt * 				z_dim1], &z__[ibegin + (newftt + 1) * z_dim1], 				 &work[indwrk], &iinfo);			if (iinfo == 0) {/*                       a new RRR for the cluster was found by DLARRF *//*                       update shift and store it */			    ssigma = sigma + tau;			    z__[iend + (newftt + 1) * z_dim1] = ssigma;/*                       WORK() are the midpoints and WERR() the semi-width *//*                       Note that the entries in W are unchanged. */			    i__4 = newlst;			    for (k = newfst; k <= i__4; ++k) {				fudge = eps * 3. * (d__1 = work[wbegin + k - 					1], abs(d__1));				work[wbegin + k - 1] -= tau;				fudge += eps * 4. * (d__1 = work[wbegin + k - 					1], abs(d__1));/*                          Fudge errors */				werr[wbegin + k - 1] += fudge;/*                          Gaps are not fudged. Provided that WERR is small *//*                          when eigenvalues are close, a zero gap indicates *//*                          that a new representation is needed for resolving *//*                          the cluster. A fudge could lead to a wrong decision *//*                          of judging eigenvalues 'separated' which in *//*                          reality are not. This could have a negative impact *//*                          on the orthogonality of the computed eigenvectors. *//* L116: */			    }			    ++nclus;			    k = newcls + (nclus << 1);			    iwork[k - 1] = newfst;			    iwork[k] = newlst;			} else {			    *info = -2;			    return 0;			}		    } else {/*                    Compute eigenvector of singleton */			iter = 0;			tol = log((doublereal) in) * 4. * eps;			k = newfst;			windex = wbegin + k - 1;/* Computing MAX */			i__4 = windex - 1;			windmn = max(i__4,1);/* Computing MIN */			i__4 = windex + 1;			windpl = min(i__4,*m);			lambda = work[windex];			++done;/*                    Check if eigenvector computation is to be skipped */			if (windex < *dol || windex > *dou) {			    eskip = TRUE_;			    goto L125;			} else {			    eskip = FALSE_;			}			left = work[windex] - werr[windex];			right = work[windex] + werr[windex];			indeig = indexw[windex];/*                    Note that since we compute the eigenpairs for a child, *//*                    all eigenvalue approximations are w.r.t the same shift. *//*                    In this case, the entries in WORK should be used for *//*                    computing the gaps since they exhibit even very small *//*                    differences in the eigenvalues, as opposed to the *//*                    entries in W which might "look" the same. */			if (k == 1) {/*                       In the case RANGE='I' and with not much initial *//*                       accuracy in LAMBDA and VL, the formula *//*                       LGAP = MAX( ZERO, (SIGMA - VL) + LAMBDA ) *//*                       can lead to an overestimation of the left gap and *//*                       thus to inadequately early RQI 'convergence'. *//*                       Prevent this by forcing a small left gap. *//* Computing MAX */			    d__1 = abs(left), d__2 = abs(right);			    lgap = eps * max(d__1,d__2);			} else {			    lgap = wgap[windmn];			}			if (k == im) {/*                       In the case RANGE='I' and with not much initial *//*                       accuracy in LAMBDA and VU, the formula *//*                       can lead to an overestimation of the right gap and *//*                       thus to inadequately early RQI 'convergence'. *//*                       Prevent this by forcing a small right gap. *//* Computing MAX */			    d__1 = abs(left), d__2 = abs(right);			    rgap = eps * max(d__1,d__2);			} else {			    rgap = wgap[windex];			}			gap = min(lgap,rgap);			if (k == 1 || k == im) {/*                       The eigenvector support can become wrong *//*                       because significant entries could be cut off due to a *//*                       large GAPTOL parameter in LAR1V. Prevent this. */			    gaptol = 0.;			} else {			    gaptol = gap * eps;			}			isupmn = in;			isupmx = 1;/*                    Update WGAP so that it holds the minimum gap *//*                    to the left or the right. This is crucial in the *//*                    case where bisection is used to ensure that the *//*                    eigenvalue is refined up to the required precision. *//*                    The correct value is restored afterwards. */			savgap = wgap[windex];			wgap[windex] = gap;/*                    We want to use the Rayleigh Quotient Correction *//*                    as often as possible since it converges quadratically *//*                    when we are close enough to the desired eigenvalue. *//*                    However, the Rayleigh Quotient can have the wrong sign *//*                    and lead us away from the desired eigenvalue. In this *//*                    case, the best we can do is to use bisection. */			usedbs = FALSE_;			usedrq = FALSE_;/*                    Bisection is initially turned off unless it is forced */			needbs = ! tryrqc;L120:/*                    Check if bisection should be used to refine eigenvalue */			if (needbs) {/*                       Take the bisection as new iterate */			    usedbs = TRUE_;			    itmp1 = iwork[iindr + windex];			    offset = indexw[wbegin] - 1;			    d__1 = eps * 2.;			    _starpu_dlarrb_(&in, &d__[ibegin], &work[indlld + ibegin 				    - 1], &indeig, &indeig, &c_b5, &d__1, &				    offset, &work[wbegin], &wgap[wbegin], &				    werr[wbegin], &work[indwrk], &iwork[				    iindwk], pivmin, &spdiam, &itmp1, &iinfo);			    if (iinfo != 0) {				*info = -3;				return 0;			    }			    lambda = work[windex];/*                       Reset twist index from inaccurate LAMBDA to *//*                       force computation of true MINGMA */			    iwork[iindr + windex] = 0;			}/*                    Given LAMBDA, compute the eigenvector. */			L__1 = ! usedbs;			_starpu_dlar1v_(&in, &c__1, &in, &lambda, &d__[ibegin], &l[				ibegin], &work[indld + ibegin - 1], &work[				indlld + ibegin - 1], pivmin, &gaptol, &z__[				ibegin + windex * z_dim1], &L__1, &negcnt, &				ztz, &mingma, &iwork[iindr + windex], &isuppz[				(windex << 1) - 1], &nrminv, &resid, &rqcorr, 				&work[indwrk]);			if (iter == 0) {			    bstres = resid;			    bstw = lambda;			} else if (resid < bstres) {			    bstres = resid;			    bstw = lambda;			}/* Computing MIN */			i__4 = isupmn, i__5 = isuppz[(windex << 1) - 1];			isupmn = min(i__4,i__5);/* Computing MAX */			i__4 = isupmx, i__5 = isuppz[windex * 2];			isupmx = max(i__4,i__5);			++iter;/*                    sin alpha <= |resid|/gap *//*                    Note that both the residual and the gap are *//*                    proportional to the matrix, so ||T|| doesn't play *//*                    a role in the quotient *//*                    Convergence test for Rayleigh-Quotient iteration *//*                    (omitted when Bisection has been used) */			if (resid > tol * gap && abs(rqcorr) > rqtol * abs(				lambda) && ! usedbs) {/*                       We need to check that the RQCORR update doesn't *//*                       move the eigenvalue away from the desired one and *//*                       towards a neighbor. -> protection with bisection */			    if (indeig <= negcnt) {/*                          The wanted eigenvalue lies to the left */				sgndef = -1.;			    } else {/*                          The wanted eigenvalue lies to the right */				sgndef = 1.;			    }/*                       We only use the RQCORR if it improves the *//*                       the iterate reasonably. */			    if (rqcorr * sgndef >= 0. && lambda + rqcorr <= 				    right && lambda + rqcorr >= left) {				usedrq = TRUE_;/*                          Store new midpoint of bisection interval in WORK */				if (sgndef == 1.) {/*                             The current LAMBDA is on the left of the true *//*                             eigenvalue */				    left = lambda;/*                             We prefer to assume that the error estimate *//*                             is correct. We could make the interval not *//*                             as a bracket but to be modified if the RQCORR *//*                             chooses to. In this case, the RIGHT side should *//*                             be modified as follows: *//*                              RIGHT = MAX(RIGHT, LAMBDA + RQCORR) */				} else {/*                             The current LAMBDA is on the right of the true *//*                             eigenvalue */				    right = lambda;/*                             See comment about assuming the error estimate is *//*                             correct above. *//*                              LEFT = MIN(LEFT, LAMBDA + RQCORR) */				}				work[windex] = (right + left) * .5;/*                          Take RQCORR since it has the correct sign and *//*                          improves the iterate reasonably */				lambda += rqcorr;/*                          Update width of error interval */				werr[windex] = (right - left) * .5;			    } else {				needbs = TRUE_;			    }			    if (right - left < rqtol * abs(lambda)) {/*                             The eigenvalue is computed to bisection accuracy *//*                             compute eigenvector and stop */				usedbs = TRUE_;				goto L120;			    } else if (iter < 10) {				goto L120;			    } else if (iter == 10) {				needbs = TRUE_;				goto L120;			    } else {				*info = 5;				return 0;			    }			} else {			    stp2ii = FALSE_;			    if (usedrq && usedbs && bstres <= resid) {				lambda = bstw;				stp2ii = TRUE_;			    }			    if (stp2ii) {/*                          improve error angle by second step */				L__1 = ! usedbs;				_starpu_dlar1v_(&in, &c__1, &in, &lambda, &d__[ibegin], &l[ibegin], &work[indld + ibegin - 					1], &work[indlld + ibegin - 1], 					pivmin, &gaptol, &z__[ibegin + windex 					* z_dim1], &L__1, &negcnt, &ztz, &					mingma, &iwork[iindr + windex], &					isuppz[(windex << 1) - 1], &nrminv, &					resid, &rqcorr, &work[indwrk]);			    }			    work[windex] = lambda;			}/*                    Compute FP-vector support w.r.t. whole matrix */			isuppz[(windex << 1) - 1] += oldien;			isuppz[windex * 2] += oldien;			zfrom = isuppz[(windex << 1) - 1];			zto = isuppz[windex * 2];			isupmn += oldien;			isupmx += oldien;/*                    Ensure vector is ok if support in the RQI has changed */			if (isupmn < zfrom) {			    i__4 = zfrom - 1;			    for (ii = isupmn; ii <= i__4; ++ii) {				z__[ii + windex * z_dim1] = 0.;/* L122: */			    }			}			if (isupmx > zto) {			    i__4 = isupmx;			    for (ii = zto + 1; ii <= i__4; ++ii) {				z__[ii + windex * z_dim1] = 0.;/* L123: */			    }			}			i__4 = zto - zfrom + 1;			_starpu_dscal_(&i__4, &nrminv, &z__[zfrom + windex * z_dim1], 				&c__1);L125:/*                    Update W */			w[windex] = lambda + sigma;/*                    Recompute the gaps on the left and right *//*                    But only allow them to become larger and not *//*                    smaller (which can only happen through "bad" *//*                    cancellation and doesn't reflect the theory *//*                    where the initial gaps are underestimated due *//*                    to WERR being too crude.) */			if (! eskip) {			    if (k > 1) {/* Computing MAX */				d__1 = wgap[windmn], d__2 = w[windex] - werr[					windex] - w[windmn] - werr[windmn];				wgap[windmn] = max(d__1,d__2);			    }			    if (windex < wend) {/* Computing MAX */				d__1 = savgap, d__2 = w[windpl] - werr[windpl]					 - w[windex] - werr[windex];				wgap[windex] = max(d__1,d__2);			    }			}			++idone;		    }/*                 here ends the code for the current child */L139:/*                 Proceed to any remaining child nodes */		    newfst = j + 1;L140:		    ;		}/* L150: */	    }	    ++ndepth;	    goto L40;	}	ibegin = iend + 1;	wbegin = wend + 1;L170:	;    }    return 0;/*     End of DLARRV */} /* _starpu_dlarrv_ */
 |