| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797 | 
							- /* dgesvj.f -- translated by f2c (version 20061008).
 
-    You must link the resulting object file with libf2c:
 
- 	on Microsoft Windows system, link with libf2c.lib;
 
- 	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
 
- 	or, if you install libf2c.a in a standard place, with -lf2c -lm
 
- 	-- in that order, at the end of the command line, as in
 
- 		cc *.o -lf2c -lm
 
- 	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
 
- 		http://www.netlib.org/f2c/libf2c.zip
 
- */
 
- #include "f2c.h"
 
- #include "blaswrap.h"
 
- /* Table of constant values */
 
- static doublereal c_b17 = 0.;
 
- static doublereal c_b18 = 1.;
 
- static integer c__1 = 1;
 
- static integer c__0 = 0;
 
- static integer c__2 = 2;
 
- /* Subroutine */ int _starpu_dgesvj_(char *joba, char *jobu, char *jobv, integer *m, 
 
- 	integer *n, doublereal *a, integer *lda, doublereal *sva, integer *mv, 
 
- 	 doublereal *v, integer *ldv, doublereal *work, integer *lwork, 
 
- 	integer *info)
 
- {
 
-     /* System generated locals */
 
-     integer a_dim1, a_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5;
 
-     doublereal d__1, d__2;
 
-     /* Builtin functions */
 
-     double sqrt(doublereal), d_sign(doublereal *, doublereal *);
 
-     /* Local variables */
 
-     doublereal bigtheta;
 
-     integer pskipped, i__, p, q;
 
-     doublereal t;
 
-     integer n2, n4;
 
-     doublereal rootsfmin;
 
-     integer n34;
 
-     doublereal cs, sn;
 
-     integer ir1, jbc;
 
-     doublereal big;
 
-     integer kbl, igl, ibr, jgl, nbl;
 
-     doublereal tol;
 
-     integer mvl;
 
-     doublereal aapp, aapq, aaqq;
 
-     extern doublereal _starpu_ddot_(integer *, doublereal *, integer *, doublereal *, 
 
- 	    integer *);
 
-     doublereal ctol;
 
-     integer ierr;
 
-     doublereal aapp0;
 
-     extern doublereal _starpu_dnrm2_(integer *, doublereal *, integer *);
 
-     doublereal temp1;
 
-     extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, 
 
- 	    integer *);
 
-     doublereal scale, large, apoaq, aqoap;
 
-     extern logical _starpu_lsame_(char *, char *);
 
-     doublereal theta, small, sfmin;
 
-     logical lsvec;
 
-     extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *, 
 
- 	    doublereal *, integer *);
 
-     doublereal fastr[5];
 
-     extern /* Subroutine */ int _starpu_dswap_(integer *, doublereal *, integer *, 
 
- 	    doublereal *, integer *);
 
-     logical applv, rsvec;
 
-     extern /* Subroutine */ int _starpu_daxpy_(integer *, doublereal *, doublereal *, 
 
- 	    integer *, doublereal *, integer *);
 
-     logical uctol;
 
-     extern /* Subroutine */ int _starpu_drotm_(integer *, doublereal *, integer *, 
 
- 	    doublereal *, integer *, doublereal *);
 
-     logical lower, upper, rotok;
 
-     extern /* Subroutine */ int _starpu_dgsvj0_(char *, integer *, integer *, 
 
- 	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
 
- 	    doublereal *, integer *, doublereal *, doublereal *, doublereal *, 
 
- 	     integer *, doublereal *, integer *, integer *), _starpu_dgsvj1_(
 
- 	    char *, integer *, integer *, integer *, doublereal *, integer *, 
 
- 	    doublereal *, doublereal *, integer *, doublereal *, integer *, 
 
- 	    doublereal *, doublereal *, doublereal *, integer *, doublereal *, 
 
- 	     integer *, integer *);
 
-     extern doublereal _starpu_dlamch_(char *);
 
-     extern /* Subroutine */ int _starpu_dlascl_(char *, integer *, integer *, 
 
- 	    doublereal *, doublereal *, integer *, integer *, doublereal *, 
 
- 	    integer *, integer *);
 
-     extern integer _starpu_idamax_(integer *, doublereal *, integer *);
 
-     extern /* Subroutine */ int _starpu_dlaset_(char *, integer *, integer *, 
 
- 	    doublereal *, doublereal *, doublereal *, integer *), 
 
- 	    _starpu_xerbla_(char *, integer *);
 
-     integer ijblsk, swband, blskip;
 
-     doublereal mxaapq;
 
-     extern /* Subroutine */ int _starpu_dlassq_(integer *, doublereal *, integer *, 
 
- 	    doublereal *, doublereal *);
 
-     doublereal thsign, mxsinj;
 
-     integer emptsw, notrot, iswrot, lkahead;
 
-     logical goscale, noscale;
 
-     doublereal rootbig, epsilon, rooteps;
 
-     integer rowskip;
 
-     doublereal roottol;
 
- /*  -- LAPACK routine (version 3.2)                                    -- */
 
- /*  -- Contributed by Zlatko Drmac of the University of Zagreb and     -- */
 
- /*  -- Kresimir Veselic of the Fernuniversitaet Hagen                  -- */
 
- /*  -- November 2008                                                   -- */
 
- /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
 
- /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
 
- /* This routine is also part of SIGMA (version 1.23, October 23. 2008.) */
 
- /* SIGMA is a library of algorithms for highly accurate algorithms for */
 
- /* computation of SVD, PSVD, QSVD, (H,K)-SVD, and for solution of the */
 
- /* eigenvalue problems Hx = lambda M x, H M x = lambda x with H, M > 0. */
 
- /*     -#- Scalar Arguments -#- */
 
- /*     -#- Array Arguments -#- */
 
- /*     .. */
 
- /*  Purpose */
 
- /*  ~~~~~~~ */
 
- /*  DGESVJ computes the singular value decomposition (SVD) of a real */
 
- /*  M-by-N matrix A, where M >= N. The SVD of A is written as */
 
- /*                                     [++]   [xx]   [x0]   [xx] */
 
- /*               A = U * SIGMA * V^t,  [++] = [xx] * [ox] * [xx] */
 
- /*                                     [++]   [xx] */
 
- /*  where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal */
 
- /*  matrix, and V is an N-by-N orthogonal matrix. The diagonal elements */
 
- /*  of SIGMA are the singular values of A. The columns of U and V are the */
 
- /*  left and the right singular vectors of A, respectively. */
 
- /*  Further Details */
 
- /*  ~~~~~~~~~~~~~~~ */
 
- /*  The orthogonal N-by-N matrix V is obtained as a product of Jacobi plane */
 
- /*  rotations. The rotations are implemented as fast scaled rotations of */
 
- /*  Anda and Park [1]. In the case of underflow of the Jacobi angle, a */
 
- /*  modified Jacobi transformation of Drmac [4] is used. Pivot strategy uses */
 
- /*  column interchanges of de Rijk [2]. The relative accuracy of the computed */
 
- /*  singular values and the accuracy of the computed singular vectors (in */
 
- /*  angle metric) is as guaranteed by the theory of Demmel and Veselic [3]. */
 
- /*  The condition number that determines the accuracy in the full rank case */
 
- /*  is essentially min_{D=diag} kappa(A*D), where kappa(.) is the */
 
- /*  spectral condition number. The best performance of this Jacobi SVD */
 
- /*  procedure is achieved if used in an  accelerated version of Drmac and */
 
- /*  Veselic [5,6], and it is the kernel routine in the SIGMA library [7]. */
 
- /*  Some tunning parameters (marked with [TP]) are available for the */
 
- /*  implementer. */
 
- /*  The computational range for the nonzero singular values is the  machine */
 
- /*  number interval ( UNDERFLOW , OVERFLOW ). In extreme cases, even */
 
- /*  denormalized singular values can be computed with the corresponding */
 
- /*  gradual loss of accurate digits. */
 
- /*  Contributors */
 
- /*  ~~~~~~~~~~~~ */
 
- /*  Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany) */
 
- /*  References */
 
- /*  ~~~~~~~~~~ */
 
- /* [1] A. A. Anda and H. Park: Fast plane rotations with dynamic scaling. */
 
- /*     SIAM J. matrix Anal. Appl., Vol. 15 (1994), pp. 162-174. */
 
- /* [2] P. P. M. De Rijk: A one-sided Jacobi algorithm for computing the */
 
- /*     singular value decomposition on a vector computer. */
 
- /*     SIAM J. Sci. Stat. Comp., Vol. 10 (1998), pp. 359-371. */
 
- /* [3] J. Demmel and K. Veselic: Jacobi method is more accurate than QR. */
 
- /* [4] Z. Drmac: Implementation of Jacobi rotations for accurate singular */
 
- /*     value computation in floating point arithmetic. */
 
- /*     SIAM J. Sci. Comp., Vol. 18 (1997), pp. 1200-1222. */
 
- /* [5] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I. */
 
- /*     SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342. */
 
- /*     LAPACK Working note 169. */
 
- /* [6] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II. */
 
- /*     SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362. */
 
- /*     LAPACK Working note 170. */
 
- /* [7] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV, */
 
- /*     QSVD, (H,K)-SVD computations. */
 
- /*     Department of Mathematics, University of Zagreb, 2008. */
 
- /*  Bugs, Examples and Comments */
 
- /*  ~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
 
- /*  Please report all bugs and send interesting test examples and comments to */
 
- /*  drmac@math.hr. Thank you. */
 
- /*  Arguments */
 
- /*  ~~~~~~~~~ */
 
- /*  JOBA    (input) CHARACTER* 1 */
 
- /*          Specifies the structure of A. */
 
- /*          = 'L': The input matrix A is lower triangular; */
 
- /*          = 'U': The input matrix A is upper triangular; */
 
- /*          = 'G': The input matrix A is general M-by-N matrix, M >= N. */
 
- /*  JOBU    (input) CHARACTER*1 */
 
- /*          Specifies whether to compute the left singular vectors */
 
- /*          (columns of U): */
 
- /*          = 'U': The left singular vectors corresponding to the nonzero */
 
- /*                 singular values are computed and returned in the leading */
 
- /*                 columns of A. See more details in the description of A. */
 
- /*                 The default numerical orthogonality threshold is set to */
 
- /*                 approximately TOL=CTOL*EPS, CTOL=DSQRT(M), EPS=DLAMCH('E'). */
 
- /*          = 'C': Analogous to JOBU='U', except that user can control the */
 
- /*                 level of numerical orthogonality of the computed left */
 
- /*                 singular vectors. TOL can be set to TOL = CTOL*EPS, where */
 
- /*                 CTOL is given on input in the array WORK. */
 
- /*                 No CTOL smaller than ONE is allowed. CTOL greater */
 
- /*                 than 1 / EPS is meaningless. The option 'C' */
 
- /*                 can be used if M*EPS is satisfactory orthogonality */
 
- /*                 of the computed left singular vectors, so CTOL=M could */
 
- /*                 save few sweeps of Jacobi rotations. */
 
- /*                 See the descriptions of A and WORK(1). */
 
- /*          = 'N': The matrix U is not computed. However, see the */
 
- /*                 description of A. */
 
- /*  JOBV    (input) CHARACTER*1 */
 
- /*          Specifies whether to compute the right singular vectors, that */
 
- /*          is, the matrix V: */
 
- /*          = 'V' : the matrix V is computed and returned in the array V */
 
- /*          = 'A' : the Jacobi rotations are applied to the MV-by-N */
 
- /*                  array V. In other words, the right singular vector */
 
- /*                  matrix V is not computed explicitly, instead it is */
 
- /*                  applied to an MV-by-N matrix initially stored in the */
 
- /*                  first MV rows of V. */
 
- /*          = 'N' : the matrix V is not computed and the array V is not */
 
- /*                  referenced */
 
- /*  M       (input) INTEGER */
 
- /*          The number of rows of the input matrix A.  M >= 0. */
 
- /*  N       (input) INTEGER */
 
- /*          The number of columns of the input matrix A. */
 
- /*          M >= N >= 0. */
 
- /*  A       (input/output) REAL array, dimension (LDA,N) */
 
- /*          On entry, the M-by-N matrix A. */
 
- /*          On exit, */
 
- /*          If JOBU .EQ. 'U' .OR. JOBU .EQ. 'C': */
 
- /*          ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
 
- /*                 If INFO .EQ. 0, */
 
- /*                 ~~~~~~~~~~~~~~~ */
 
- /*                 RANKA orthonormal columns of U are returned in the */
 
- /*                 leading RANKA columns of the array A. Here RANKA <= N */
 
- /*                 is the number of computed singular values of A that are */
 
- /*                 above the underflow threshold DLAMCH('S'). The singular */
 
- /*                 vectors corresponding to underflowed or zero singular */
 
- /*                 values are not computed. The value of RANKA is returned */
 
- /*                 in the array WORK as RANKA=NINT(WORK(2)). Also see the */
 
- /*                 descriptions of SVA and WORK. The computed columns of U */
 
- /*                 are mutually numerically orthogonal up to approximately */
 
- /*                 TOL=DSQRT(M)*EPS (default); or TOL=CTOL*EPS (JOBU.EQ.'C'), */
 
- /*                 see the description of JOBU. */
 
- /*                 If INFO .GT. 0, */
 
- /*                 ~~~~~~~~~~~~~~~ */
 
- /*                 the procedure DGESVJ did not converge in the given number */
 
- /*                 of iterations (sweeps). In that case, the computed */
 
- /*                 columns of U may not be orthogonal up to TOL. The output */
 
- /*                 U (stored in A), SIGMA (given by the computed singular */
 
- /*                 values in SVA(1:N)) and V is still a decomposition of the */
 
- /*                 input matrix A in the sense that the residual */
 
- /*                 ||A-SCALE*U*SIGMA*V^T||_2 / ||A||_2 is small. */
 
- /*          If JOBU .EQ. 'N': */
 
- /*          ~~~~~~~~~~~~~~~~~ */
 
- /*                 If INFO .EQ. 0 */
 
- /*                 ~~~~~~~~~~~~~~ */
 
- /*                 Note that the left singular vectors are 'for free' in the */
 
- /*                 one-sided Jacobi SVD algorithm. However, if only the */
 
- /*                 singular values are needed, the level of numerical */
 
- /*                 orthogonality of U is not an issue and iterations are */
 
- /*                 stopped when the columns of the iterated matrix are */
 
- /*                 numerically orthogonal up to approximately M*EPS. Thus, */
 
- /*                 on exit, A contains the columns of U scaled with the */
 
- /*                 corresponding singular values. */
 
- /*                 If INFO .GT. 0, */
 
- /*                 ~~~~~~~~~~~~~~~ */
 
- /*                 the procedure DGESVJ did not converge in the given number */
 
- /*                 of iterations (sweeps). */
 
- /*  LDA     (input) INTEGER */
 
- /*          The leading dimension of the array A.  LDA >= max(1,M). */
 
- /*  SVA     (workspace/output) REAL array, dimension (N) */
 
- /*          On exit, */
 
- /*          If INFO .EQ. 0, */
 
- /*          ~~~~~~~~~~~~~~~ */
 
- /*          depending on the value SCALE = WORK(1), we have: */
 
- /*                 If SCALE .EQ. ONE: */
 
- /*                 ~~~~~~~~~~~~~~~~~~ */
 
- /*                 SVA(1:N) contains the computed singular values of A. */
 
- /*                 During the computation SVA contains the Euclidean column */
 
- /*                 norms of the iterated matrices in the array A. */
 
- /*                 If SCALE .NE. ONE: */
 
- /*                 ~~~~~~~~~~~~~~~~~~ */
 
- /*                 The singular values of A are SCALE*SVA(1:N), and this */
 
- /*                 factored representation is due to the fact that some of the */
 
- /*                 singular values of A might underflow or overflow. */
 
- /*          If INFO .GT. 0, */
 
- /*          ~~~~~~~~~~~~~~~ */
 
- /*          the procedure DGESVJ did not converge in the given number of */
 
- /*          iterations (sweeps) and SCALE*SVA(1:N) may not be accurate. */
 
- /*  MV      (input) INTEGER */
 
- /*          If JOBV .EQ. 'A', then the product of Jacobi rotations in DGESVJ */
 
- /*          is applied to the first MV rows of V. See the description of JOBV. */
 
- /*  V       (input/output) REAL array, dimension (LDV,N) */
 
- /*          If JOBV = 'V', then V contains on exit the N-by-N matrix of */
 
- /*                         the right singular vectors; */
 
- /*          If JOBV = 'A', then V contains the product of the computed right */
 
- /*                         singular vector matrix and the initial matrix in */
 
- /*                         the array V. */
 
- /*          If JOBV = 'N', then V is not referenced. */
 
- /*  LDV     (input) INTEGER */
 
- /*          The leading dimension of the array V, LDV .GE. 1. */
 
- /*          If JOBV .EQ. 'V', then LDV .GE. max(1,N). */
 
- /*          If JOBV .EQ. 'A', then LDV .GE. max(1,MV) . */
 
- /*  WORK    (input/workspace/output) REAL array, dimension max(4,M+N). */
 
- /*          On entry, */
 
- /*          If JOBU .EQ. 'C', */
 
- /*          ~~~~~~~~~~~~~~~~~ */
 
- /*          WORK(1) = CTOL, where CTOL defines the threshold for convergence. */
 
- /*                    The process stops if all columns of A are mutually */
 
- /*                    orthogonal up to CTOL*EPS, EPS=DLAMCH('E'). */
 
- /*                    It is required that CTOL >= ONE, i.e. it is not */
 
- /*                    allowed to force the routine to obtain orthogonality */
 
- /*                    below EPSILON. */
 
- /*          On exit, */
 
- /*          WORK(1) = SCALE is the scaling factor such that SCALE*SVA(1:N) */
 
- /*                    are the computed singular vcalues of A. */
 
- /*                    (See description of SVA().) */
 
- /*          WORK(2) = NINT(WORK(2)) is the number of the computed nonzero */
 
- /*                    singular values. */
 
- /*          WORK(3) = NINT(WORK(3)) is the number of the computed singular */
 
- /*                    values that are larger than the underflow threshold. */
 
- /*          WORK(4) = NINT(WORK(4)) is the number of sweeps of Jacobi */
 
- /*                    rotations needed for numerical convergence. */
 
- /*          WORK(5) = max_{i.NE.j} |COS(A(:,i),A(:,j))| in the last sweep. */
 
- /*                    This is useful information in cases when DGESVJ did */
 
- /*                    not converge, as it can be used to estimate whether */
 
- /*                    the output is stil useful and for post festum analysis. */
 
- /*          WORK(6) = the largest absolute value over all sines of the */
 
- /*                    Jacobi rotation angles in the last sweep. It can be */
 
- /*                    useful for a post festum analysis. */
 
- /*  LWORK   length of WORK, WORK >= MAX(6,M+N) */
 
- /*  INFO    (output) INTEGER */
 
- /*          = 0 : successful exit. */
 
- /*          < 0 : if INFO = -i, then the i-th argument had an illegal value */
 
- /*          > 0 : DGESVJ did not converge in the maximal allowed number (30) */
 
- /*                of sweeps. The output may still be useful. See the */
 
- /*                description of WORK. */
 
- /*     Local Parameters */
 
- /*     Local Scalars */
 
- /*     Local Arrays */
 
- /*     Intrinsic Functions */
 
- /*     External Functions */
 
- /*     .. from BLAS */
 
- /*     .. from LAPACK */
 
- /*     External Subroutines */
 
- /*     .. from BLAS */
 
- /*     .. from LAPACK */
 
- /*     Test the input arguments */
 
-     /* Parameter adjustments */
 
-     --sva;
 
-     a_dim1 = *lda;
 
-     a_offset = 1 + a_dim1;
 
-     a -= a_offset;
 
-     v_dim1 = *ldv;
 
-     v_offset = 1 + v_dim1;
 
-     v -= v_offset;
 
-     --work;
 
-     /* Function Body */
 
-     lsvec = _starpu_lsame_(jobu, "U");
 
-     uctol = _starpu_lsame_(jobu, "C");
 
-     rsvec = _starpu_lsame_(jobv, "V");
 
-     applv = _starpu_lsame_(jobv, "A");
 
-     upper = _starpu_lsame_(joba, "U");
 
-     lower = _starpu_lsame_(joba, "L");
 
-     if (! (upper || lower || _starpu_lsame_(joba, "G"))) {
 
- 	*info = -1;
 
-     } else if (! (lsvec || uctol || _starpu_lsame_(jobu, "N"))) 
 
- 	    {
 
- 	*info = -2;
 
-     } else if (! (rsvec || applv || _starpu_lsame_(jobv, "N"))) 
 
- 	    {
 
- 	*info = -3;
 
-     } else if (*m < 0) {
 
- 	*info = -4;
 
-     } else if (*n < 0 || *n > *m) {
 
- 	*info = -5;
 
-     } else if (*lda < *m) {
 
- 	*info = -7;
 
-     } else if (*mv < 0) {
 
- 	*info = -9;
 
-     } else if (rsvec && *ldv < *n || applv && *ldv < *mv) {
 
- 	*info = -11;
 
-     } else if (uctol && work[1] <= 1.) {
 
- 	*info = -12;
 
-     } else /* if(complicated condition) */ {
 
- /* Computing MAX */
 
- 	i__1 = *m + *n;
 
- 	if (*lwork < max(i__1,6)) {
 
- 	    *info = -13;
 
- 	} else {
 
- 	    *info = 0;
 
- 	}
 
-     }
 
- /*     #:( */
 
-     if (*info != 0) {
 
- 	i__1 = -(*info);
 
- 	_starpu_xerbla_("DGESVJ", &i__1);
 
- 	return 0;
 
-     }
 
- /* #:) Quick return for void matrix */
 
-     if (*m == 0 || *n == 0) {
 
- 	return 0;
 
-     }
 
- /*     Set numerical parameters */
 
- /*     The stopping criterion for Jacobi rotations is */
 
- /*     max_{i<>j}|A(:,i)^T * A(:,j)|/(||A(:,i)||*||A(:,j)||) < CTOL*EPS */
 
- /*     where EPS is the round-off and CTOL is defined as follows: */
 
-     if (uctol) {
 
- /*        ... user controlled */
 
- 	ctol = work[1];
 
-     } else {
 
- /*        ... default */
 
- 	if (lsvec || rsvec || applv) {
 
- 	    ctol = sqrt((doublereal) (*m));
 
- 	} else {
 
- 	    ctol = (doublereal) (*m);
 
- 	}
 
-     }
 
- /*     ... and the machine dependent parameters are */
 
- /* [!]  (Make sure that DLAMCH() works properly on the target machine.) */
 
-     epsilon = _starpu_dlamch_("Epsilon");
 
-     rooteps = sqrt(epsilon);
 
-     sfmin = _starpu_dlamch_("SafeMinimum");
 
-     rootsfmin = sqrt(sfmin);
 
-     small = sfmin / epsilon;
 
-     big = _starpu_dlamch_("Overflow");
 
- /*     BIG         = ONE    / SFMIN */
 
-     rootbig = 1. / rootsfmin;
 
-     large = big / sqrt((doublereal) (*m * *n));
 
-     bigtheta = 1. / rooteps;
 
-     tol = ctol * epsilon;
 
-     roottol = sqrt(tol);
 
-     if ((doublereal) (*m) * epsilon >= 1.) {
 
- 	*info = -5;
 
- 	i__1 = -(*info);
 
- 	_starpu_xerbla_("DGESVJ", &i__1);
 
- 	return 0;
 
-     }
 
- /*     Initialize the right singular vector matrix. */
 
-     if (rsvec) {
 
- 	mvl = *n;
 
- 	_starpu_dlaset_("A", &mvl, n, &c_b17, &c_b18, &v[v_offset], ldv);
 
-     } else if (applv) {
 
- 	mvl = *mv;
 
-     }
 
-     rsvec = rsvec || applv;
 
- /*     Initialize SVA( 1:N ) = ( ||A e_i||_2, i = 1:N ) */
 
- /* (!)  If necessary, scale A to protect the largest singular value */
 
- /*     from overflow. It is possible that saving the largest singular */
 
- /*     value destroys the information about the small ones. */
 
- /*     This initial scaling is almost minimal in the sense that the */
 
- /*     goal is to make sure that no column norm overflows, and that */
 
- /*     DSQRT(N)*max_i SVA(i) does not overflow. If INFinite entries */
 
- /*     in A are detected, the procedure returns with INFO=-6. */
 
-     scale = 1. / sqrt((doublereal) (*m) * (doublereal) (*n));
 
-     noscale = TRUE_;
 
-     goscale = TRUE_;
 
-     if (lower) {
 
- /*        the input matrix is M-by-N lower triangular (trapezoidal) */
 
- 	i__1 = *n;
 
- 	for (p = 1; p <= i__1; ++p) {
 
- 	    aapp = 0.;
 
- 	    aaqq = 0.;
 
- 	    i__2 = *m - p + 1;
 
- 	    _starpu_dlassq_(&i__2, &a[p + p * a_dim1], &c__1, &aapp, &aaqq);
 
- 	    if (aapp > big) {
 
- 		*info = -6;
 
- 		i__2 = -(*info);
 
- 		_starpu_xerbla_("DGESVJ", &i__2);
 
- 		return 0;
 
- 	    }
 
- 	    aaqq = sqrt(aaqq);
 
- 	    if (aapp < big / aaqq && noscale) {
 
- 		sva[p] = aapp * aaqq;
 
- 	    } else {
 
- 		noscale = FALSE_;
 
- 		sva[p] = aapp * (aaqq * scale);
 
- 		if (goscale) {
 
- 		    goscale = FALSE_;
 
- 		    i__2 = p - 1;
 
- 		    for (q = 1; q <= i__2; ++q) {
 
- 			sva[q] *= scale;
 
- /* L1873: */
 
- 		    }
 
- 		}
 
- 	    }
 
- /* L1874: */
 
- 	}
 
-     } else if (upper) {
 
- /*        the input matrix is M-by-N upper triangular (trapezoidal) */
 
- 	i__1 = *n;
 
- 	for (p = 1; p <= i__1; ++p) {
 
- 	    aapp = 0.;
 
- 	    aaqq = 0.;
 
- 	    _starpu_dlassq_(&p, &a[p * a_dim1 + 1], &c__1, &aapp, &aaqq);
 
- 	    if (aapp > big) {
 
- 		*info = -6;
 
- 		i__2 = -(*info);
 
- 		_starpu_xerbla_("DGESVJ", &i__2);
 
- 		return 0;
 
- 	    }
 
- 	    aaqq = sqrt(aaqq);
 
- 	    if (aapp < big / aaqq && noscale) {
 
- 		sva[p] = aapp * aaqq;
 
- 	    } else {
 
- 		noscale = FALSE_;
 
- 		sva[p] = aapp * (aaqq * scale);
 
- 		if (goscale) {
 
- 		    goscale = FALSE_;
 
- 		    i__2 = p - 1;
 
- 		    for (q = 1; q <= i__2; ++q) {
 
- 			sva[q] *= scale;
 
- /* L2873: */
 
- 		    }
 
- 		}
 
- 	    }
 
- /* L2874: */
 
- 	}
 
-     } else {
 
- /*        the input matrix is M-by-N general dense */
 
- 	i__1 = *n;
 
- 	for (p = 1; p <= i__1; ++p) {
 
- 	    aapp = 0.;
 
- 	    aaqq = 0.;
 
- 	    _starpu_dlassq_(m, &a[p * a_dim1 + 1], &c__1, &aapp, &aaqq);
 
- 	    if (aapp > big) {
 
- 		*info = -6;
 
- 		i__2 = -(*info);
 
- 		_starpu_xerbla_("DGESVJ", &i__2);
 
- 		return 0;
 
- 	    }
 
- 	    aaqq = sqrt(aaqq);
 
- 	    if (aapp < big / aaqq && noscale) {
 
- 		sva[p] = aapp * aaqq;
 
- 	    } else {
 
- 		noscale = FALSE_;
 
- 		sva[p] = aapp * (aaqq * scale);
 
- 		if (goscale) {
 
- 		    goscale = FALSE_;
 
- 		    i__2 = p - 1;
 
- 		    for (q = 1; q <= i__2; ++q) {
 
- 			sva[q] *= scale;
 
- /* L3873: */
 
- 		    }
 
- 		}
 
- 	    }
 
- /* L3874: */
 
- 	}
 
-     }
 
-     if (noscale) {
 
- 	scale = 1.;
 
-     }
 
- /*     Move the smaller part of the spectrum from the underflow threshold */
 
- /* (!)  Start by determining the position of the nonzero entries of the */
 
- /*     array SVA() relative to ( SFMIN, BIG ). */
 
-     aapp = 0.;
 
-     aaqq = big;
 
-     i__1 = *n;
 
-     for (p = 1; p <= i__1; ++p) {
 
- 	if (sva[p] != 0.) {
 
- /* Computing MIN */
 
- 	    d__1 = aaqq, d__2 = sva[p];
 
- 	    aaqq = min(d__1,d__2);
 
- 	}
 
- /* Computing MAX */
 
- 	d__1 = aapp, d__2 = sva[p];
 
- 	aapp = max(d__1,d__2);
 
- /* L4781: */
 
-     }
 
- /* #:) Quick return for zero matrix */
 
-     if (aapp == 0.) {
 
- 	if (lsvec) {
 
- 	    _starpu_dlaset_("G", m, n, &c_b17, &c_b18, &a[a_offset], lda);
 
- 	}
 
- 	work[1] = 1.;
 
- 	work[2] = 0.;
 
- 	work[3] = 0.;
 
- 	work[4] = 0.;
 
- 	work[5] = 0.;
 
- 	work[6] = 0.;
 
- 	return 0;
 
-     }
 
- /* #:) Quick return for one-column matrix */
 
-     if (*n == 1) {
 
- 	if (lsvec) {
 
- 	    _starpu_dlascl_("G", &c__0, &c__0, &sva[1], &scale, m, &c__1, &a[a_dim1 + 
 
- 		    1], lda, &ierr);
 
- 	}
 
- 	work[1] = 1. / scale;
 
- 	if (sva[1] >= sfmin) {
 
- 	    work[2] = 1.;
 
- 	} else {
 
- 	    work[2] = 0.;
 
- 	}
 
- 	work[3] = 0.;
 
- 	work[4] = 0.;
 
- 	work[5] = 0.;
 
- 	work[6] = 0.;
 
- 	return 0;
 
-     }
 
- /*     Protect small singular values from underflow, and try to */
 
- /*     avoid underflows/overflows in computing Jacobi rotations. */
 
-     sn = sqrt(sfmin / epsilon);
 
-     temp1 = sqrt(big / (doublereal) (*n));
 
-     if (aapp <= sn || aaqq >= temp1 || sn <= aaqq && aapp <= temp1) {
 
- /* Computing MIN */
 
- 	d__1 = big, d__2 = temp1 / aapp;
 
- 	temp1 = min(d__1,d__2);
 
- /*         AAQQ  = AAQQ*TEMP1 */
 
- /*         AAPP  = AAPP*TEMP1 */
 
-     } else if (aaqq <= sn && aapp <= temp1) {
 
- /* Computing MIN */
 
- 	d__1 = sn / aaqq, d__2 = big / (aapp * sqrt((doublereal) (*n)));
 
- 	temp1 = min(d__1,d__2);
 
- /*         AAQQ  = AAQQ*TEMP1 */
 
- /*         AAPP  = AAPP*TEMP1 */
 
-     } else if (aaqq >= sn && aapp >= temp1) {
 
- /* Computing MAX */
 
- 	d__1 = sn / aaqq, d__2 = temp1 / aapp;
 
- 	temp1 = max(d__1,d__2);
 
- /*         AAQQ  = AAQQ*TEMP1 */
 
- /*         AAPP  = AAPP*TEMP1 */
 
-     } else if (aaqq <= sn && aapp >= temp1) {
 
- /* Computing MIN */
 
- 	d__1 = sn / aaqq, d__2 = big / (sqrt((doublereal) (*n)) * aapp);
 
- 	temp1 = min(d__1,d__2);
 
- /*         AAQQ  = AAQQ*TEMP1 */
 
- /*         AAPP  = AAPP*TEMP1 */
 
-     } else {
 
- 	temp1 = 1.;
 
-     }
 
- /*     Scale, if necessary */
 
-     if (temp1 != 1.) {
 
- 	_starpu_dlascl_("G", &c__0, &c__0, &c_b18, &temp1, n, &c__1, &sva[1], n, &
 
- 		ierr);
 
-     }
 
-     scale = temp1 * scale;
 
-     if (scale != 1.) {
 
- 	_starpu_dlascl_(joba, &c__0, &c__0, &c_b18, &scale, m, n, &a[a_offset], lda, &
 
- 		ierr);
 
- 	scale = 1. / scale;
 
-     }
 
- /*     Row-cyclic Jacobi SVD algorithm with column pivoting */
 
-     emptsw = *n * (*n - 1) / 2;
 
-     notrot = 0;
 
-     fastr[0] = 0.;
 
- /*     A is represented in factored form A = A * diag(WORK), where diag(WORK) */
 
- /*     is initialized to identity. WORK is updated during fast scaled */
 
- /*     rotations. */
 
-     i__1 = *n;
 
-     for (q = 1; q <= i__1; ++q) {
 
- 	work[q] = 1.;
 
- /* L1868: */
 
-     }
 
-     swband = 3;
 
- /* [TP] SWBAND is a tuning parameter [TP]. It is meaningful and effective */
 
- /*     if DGESVJ is used as a computational routine in the preconditioned */
 
- /*     Jacobi SVD algorithm DGESVJ. For sweeps i=1:SWBAND the procedure */
 
- /*     works on pivots inside a band-like region around the diagonal. */
 
- /*     The boundaries are determined dynamically, based on the number of */
 
- /*     pivots above a threshold. */
 
-     kbl = min(8,*n);
 
- /* [TP] KBL is a tuning parameter that defines the tile size in the */
 
- /*     tiling of the p-q loops of pivot pairs. In general, an optimal */
 
- /*     value of KBL depends on the matrix dimensions and on the */
 
- /*     parameters of the computer's memory. */
 
-     nbl = *n / kbl;
 
-     if (nbl * kbl != *n) {
 
- 	++nbl;
 
-     }
 
- /* Computing 2nd power */
 
-     i__1 = kbl;
 
-     blskip = i__1 * i__1;
 
- /* [TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL. */
 
-     rowskip = min(5,kbl);
 
- /* [TP] ROWSKIP is a tuning parameter. */
 
-     lkahead = 1;
 
- /* [TP] LKAHEAD is a tuning parameter. */
 
- /*     Quasi block transformations, using the lower (upper) triangular */
 
- /*     structure of the input matrix. The quasi-block-cycling usually */
 
- /*     invokes cubic convergence. Big part of this cycle is done inside */
 
- /*     canonical subspaces of dimensions less than M. */
 
- /* Computing MAX */
 
-     i__1 = 64, i__2 = kbl << 2;
 
-     if ((lower || upper) && *n > max(i__1,i__2)) {
 
- /* [TP] The number of partition levels and the actual partition are */
 
- /*     tuning parameters. */
 
- 	n4 = *n / 4;
 
- 	n2 = *n / 2;
 
- 	n34 = n4 * 3;
 
- 	if (applv) {
 
- 	    q = 0;
 
- 	} else {
 
- 	    q = 1;
 
- 	}
 
- 	if (lower) {
 
- /*     This works very well on lower triangular matrices, in particular */
 
- /*     in the framework of the preconditioned Jacobi SVD (xGEJSV). */
 
- /*     The idea is simple: */
 
- /*     [+ 0 0 0]   Note that Jacobi transformations of [0 0] */
 
- /*     [+ + 0 0]                                       [0 0] */
 
- /*     [+ + x 0]   actually work on [x 0]              [x 0] */
 
- /*     [+ + x x]                    [x x].             [x x] */
 
- 	    i__1 = *m - n34;
 
- 	    i__2 = *n - n34;
 
- 	    i__3 = *lwork - *n;
 
- 	    _starpu_dgsvj0_(jobv, &i__1, &i__2, &a[n34 + 1 + (n34 + 1) * a_dim1], lda, 
 
- 		     &work[n34 + 1], &sva[n34 + 1], &mvl, &v[n34 * q + 1 + (
 
- 		    n34 + 1) * v_dim1], ldv, &epsilon, &sfmin, &tol, &c__2, &
 
- 		    work[*n + 1], &i__3, &ierr);
 
- 	    i__1 = *m - n2;
 
- 	    i__2 = n34 - n2;
 
- 	    i__3 = *lwork - *n;
 
- 	    _starpu_dgsvj0_(jobv, &i__1, &i__2, &a[n2 + 1 + (n2 + 1) * a_dim1], lda, &
 
- 		    work[n2 + 1], &sva[n2 + 1], &mvl, &v[n2 * q + 1 + (n2 + 1)
 
- 		     * v_dim1], ldv, &epsilon, &sfmin, &tol, &c__2, &work[*n 
 
- 		    + 1], &i__3, &ierr);
 
- 	    i__1 = *m - n2;
 
- 	    i__2 = *n - n2;
 
- 	    i__3 = *lwork - *n;
 
- 	    _starpu_dgsvj1_(jobv, &i__1, &i__2, &n4, &a[n2 + 1 + (n2 + 1) * a_dim1], 
 
- 		    lda, &work[n2 + 1], &sva[n2 + 1], &mvl, &v[n2 * q + 1 + (
 
- 		    n2 + 1) * v_dim1], ldv, &epsilon, &sfmin, &tol, &c__1, &
 
- 		    work[*n + 1], &i__3, &ierr);
 
- 	    i__1 = *m - n4;
 
- 	    i__2 = n2 - n4;
 
- 	    i__3 = *lwork - *n;
 
- 	    _starpu_dgsvj0_(jobv, &i__1, &i__2, &a[n4 + 1 + (n4 + 1) * a_dim1], lda, &
 
- 		    work[n4 + 1], &sva[n4 + 1], &mvl, &v[n4 * q + 1 + (n4 + 1)
 
- 		     * v_dim1], ldv, &epsilon, &sfmin, &tol, &c__1, &work[*n 
 
- 		    + 1], &i__3, &ierr);
 
- 	    i__1 = *lwork - *n;
 
- 	    _starpu_dgsvj0_(jobv, m, &n4, &a[a_offset], lda, &work[1], &sva[1], &mvl, 
 
- 		    &v[v_offset], ldv, &epsilon, &sfmin, &tol, &c__1, &work[*
 
- 		    n + 1], &i__1, &ierr);
 
- 	    i__1 = *lwork - *n;
 
- 	    _starpu_dgsvj1_(jobv, m, &n2, &n4, &a[a_offset], lda, &work[1], &sva[1], &
 
- 		    mvl, &v[v_offset], ldv, &epsilon, &sfmin, &tol, &c__1, &
 
- 		    work[*n + 1], &i__1, &ierr);
 
- 	} else if (upper) {
 
- 	    i__1 = *lwork - *n;
 
- 	    _starpu_dgsvj0_(jobv, &n4, &n4, &a[a_offset], lda, &work[1], &sva[1], &
 
- 		    mvl, &v[v_offset], ldv, &epsilon, &sfmin, &tol, &c__2, &
 
- 		    work[*n + 1], &i__1, &ierr);
 
- 	    i__1 = *lwork - *n;
 
- 	    _starpu_dgsvj0_(jobv, &n2, &n4, &a[(n4 + 1) * a_dim1 + 1], lda, &work[n4 
 
- 		    + 1], &sva[n4 + 1], &mvl, &v[n4 * q + 1 + (n4 + 1) * 
 
- 		    v_dim1], ldv, &epsilon, &sfmin, &tol, &c__1, &work[*n + 1]
 
- , &i__1, &ierr);
 
- 	    i__1 = *lwork - *n;
 
- 	    _starpu_dgsvj1_(jobv, &n2, &n2, &n4, &a[a_offset], lda, &work[1], &sva[1], 
 
- 		     &mvl, &v[v_offset], ldv, &epsilon, &sfmin, &tol, &c__1, &
 
- 		    work[*n + 1], &i__1, &ierr);
 
- 	    i__1 = n2 + n4;
 
- 	    i__2 = *lwork - *n;
 
- 	    _starpu_dgsvj0_(jobv, &i__1, &n4, &a[(n2 + 1) * a_dim1 + 1], lda, &work[
 
- 		    n2 + 1], &sva[n2 + 1], &mvl, &v[n2 * q + 1 + (n2 + 1) * 
 
- 		    v_dim1], ldv, &epsilon, &sfmin, &tol, &c__1, &work[*n + 1]
 
- , &i__2, &ierr);
 
- 	}
 
-     }
 
- /*     -#- Row-cyclic pivot strategy with de Rijk's pivoting -#- */
 
-     for (i__ = 1; i__ <= 30; ++i__) {
 
- /*     .. go go go ... */
 
- 	mxaapq = 0.;
 
- 	mxsinj = 0.;
 
- 	iswrot = 0;
 
- 	notrot = 0;
 
- 	pskipped = 0;
 
- /*     Each sweep is unrolled using KBL-by-KBL tiles over the pivot pairs */
 
- /*     1 <= p < q <= N. This is the first step toward a blocked implementation */
 
- /*     of the rotations. New implementation, based on block transformations, */
 
- /*     is under development. */
 
- 	i__1 = nbl;
 
- 	for (ibr = 1; ibr <= i__1; ++ibr) {
 
- 	    igl = (ibr - 1) * kbl + 1;
 
- /* Computing MIN */
 
- 	    i__3 = lkahead, i__4 = nbl - ibr;
 
- 	    i__2 = min(i__3,i__4);
 
- 	    for (ir1 = 0; ir1 <= i__2; ++ir1) {
 
- 		igl += ir1 * kbl;
 
- /* Computing MIN */
 
- 		i__4 = igl + kbl - 1, i__5 = *n - 1;
 
- 		i__3 = min(i__4,i__5);
 
- 		for (p = igl; p <= i__3; ++p) {
 
- /*     .. de Rijk's pivoting */
 
- 		    i__4 = *n - p + 1;
 
- 		    q = _starpu_idamax_(&i__4, &sva[p], &c__1) + p - 1;
 
- 		    if (p != q) {
 
- 			_starpu_dswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 
 
- 				1], &c__1);
 
- 			if (rsvec) {
 
- 			    _starpu_dswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * 
 
- 				    v_dim1 + 1], &c__1);
 
- 			}
 
- 			temp1 = sva[p];
 
- 			sva[p] = sva[q];
 
- 			sva[q] = temp1;
 
- 			temp1 = work[p];
 
- 			work[p] = work[q];
 
- 			work[q] = temp1;
 
- 		    }
 
- 		    if (ir1 == 0) {
 
- /*        Column norms are periodically updated by explicit */
 
- /*        norm computation. */
 
- /*        Caveat: */
 
- /*        Unfortunately, some BLAS implementations compute DNRM2(M,A(1,p),1) */
 
- /*        as DSQRT(DDOT(M,A(1,p),1,A(1,p),1)), which may cause the result to */
 
- /*        overflow for ||A(:,p)||_2 > DSQRT(overflow_threshold), and to */
 
- /*        underflow for ||A(:,p)||_2 < DSQRT(underflow_threshold). */
 
- /*        Hence, DNRM2 cannot be trusted, not even in the case when */
 
- /*        the true norm is far from the under(over)flow boundaries. */
 
- /*        If properly implemented DNRM2 is available, the IF-THEN-ELSE */
 
- /*        below should read "AAPP = DNRM2( M, A(1,p), 1 ) * WORK(p)". */
 
- 			if (sva[p] < rootbig && sva[p] > rootsfmin) {
 
- 			    sva[p] = _starpu_dnrm2_(m, &a[p * a_dim1 + 1], &c__1) * 
 
- 				    work[p];
 
- 			} else {
 
- 			    temp1 = 0.;
 
- 			    aapp = 0.;
 
- 			    _starpu_dlassq_(m, &a[p * a_dim1 + 1], &c__1, &temp1, &
 
- 				    aapp);
 
- 			    sva[p] = temp1 * sqrt(aapp) * work[p];
 
- 			}
 
- 			aapp = sva[p];
 
- 		    } else {
 
- 			aapp = sva[p];
 
- 		    }
 
- 		    if (aapp > 0.) {
 
- 			pskipped = 0;
 
- /* Computing MIN */
 
- 			i__5 = igl + kbl - 1;
 
- 			i__4 = min(i__5,*n);
 
- 			for (q = p + 1; q <= i__4; ++q) {
 
- 			    aaqq = sva[q];
 
- 			    if (aaqq > 0.) {
 
- 				aapp0 = aapp;
 
- 				if (aaqq >= 1.) {
 
- 				    rotok = small * aapp <= aaqq;
 
- 				    if (aapp < big / aaqq) {
 
- 					aapq = _starpu_ddot_(m, &a[p * a_dim1 + 1], &
 
- 						c__1, &a[q * a_dim1 + 1], &
 
- 						c__1) * work[p] * work[q] / 
 
- 						aaqq / aapp;
 
- 				    } else {
 
- 					_starpu_dcopy_(m, &a[p * a_dim1 + 1], &c__1, &
 
- 						work[*n + 1], &c__1);
 
- 					_starpu_dlascl_("G", &c__0, &c__0, &aapp, &
 
- 						work[p], m, &c__1, &work[*n + 
 
- 						1], lda, &ierr);
 
- 					aapq = _starpu_ddot_(m, &work[*n + 1], &c__1, 
 
- 						&a[q * a_dim1 + 1], &c__1) * 
 
- 						work[q] / aaqq;
 
- 				    }
 
- 				} else {
 
- 				    rotok = aapp <= aaqq / small;
 
- 				    if (aapp > small / aaqq) {
 
- 					aapq = _starpu_ddot_(m, &a[p * a_dim1 + 1], &
 
- 						c__1, &a[q * a_dim1 + 1], &
 
- 						c__1) * work[p] * work[q] / 
 
- 						aaqq / aapp;
 
- 				    } else {
 
- 					_starpu_dcopy_(m, &a[q * a_dim1 + 1], &c__1, &
 
- 						work[*n + 1], &c__1);
 
- 					_starpu_dlascl_("G", &c__0, &c__0, &aaqq, &
 
- 						work[q], m, &c__1, &work[*n + 
 
- 						1], lda, &ierr);
 
- 					aapq = _starpu_ddot_(m, &work[*n + 1], &c__1, 
 
- 						&a[p * a_dim1 + 1], &c__1) * 
 
- 						work[p] / aapp;
 
- 				    }
 
- 				}
 
- /* Computing MAX */
 
- 				d__1 = mxaapq, d__2 = abs(aapq);
 
- 				mxaapq = max(d__1,d__2);
 
- /*        TO rotate or NOT to rotate, THAT is the question ... */
 
- 				if (abs(aapq) > tol) {
 
- /*           .. rotate */
 
- /* [RTD]      ROTATED = ROTATED + ONE */
 
- 				    if (ir1 == 0) {
 
- 					notrot = 0;
 
- 					pskipped = 0;
 
- 					++iswrot;
 
- 				    }
 
- 				    if (rotok) {
 
- 					aqoap = aaqq / aapp;
 
- 					apoaq = aapp / aaqq;
 
- 					theta = (d__1 = aqoap - apoaq, abs(
 
- 						d__1)) * -.5 / aapq;
 
- 					if (abs(theta) > bigtheta) {
 
- 					    t = .5 / theta;
 
- 					    fastr[2] = t * work[p] / work[q];
 
- 					    fastr[3] = -t * work[q] / work[p];
 
- 					    _starpu_drotm_(m, &a[p * a_dim1 + 1], &
 
- 						    c__1, &a[q * a_dim1 + 1], 
 
- 						    &c__1, fastr);
 
- 					    if (rsvec) {
 
- 			  _starpu_drotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * 
 
- 				  v_dim1 + 1], &c__1, fastr);
 
- 					    }
 
- /* Computing MAX */
 
- 					    d__1 = 0., d__2 = t * apoaq * 
 
- 						    aapq + 1.;
 
- 					    sva[q] = aaqq * sqrt((max(d__1,
 
- 						    d__2)));
 
- 					    aapp *= sqrt(1. - t * aqoap * 
 
- 						    aapq);
 
- /* Computing MAX */
 
- 					    d__1 = mxsinj, d__2 = abs(t);
 
- 					    mxsinj = max(d__1,d__2);
 
- 					} else {
 
- /*                 .. choose correct signum for THETA and rotate */
 
- 					    thsign = -d_sign(&c_b18, &aapq);
 
- 					    t = 1. / (theta + thsign * sqrt(
 
- 						    theta * theta + 1.));
 
- 					    cs = sqrt(1. / (t * t + 1.));
 
- 					    sn = t * cs;
 
- /* Computing MAX */
 
- 					    d__1 = mxsinj, d__2 = abs(sn);
 
- 					    mxsinj = max(d__1,d__2);
 
- /* Computing MAX */
 
- 					    d__1 = 0., d__2 = t * apoaq * 
 
- 						    aapq + 1.;
 
- 					    sva[q] = aaqq * sqrt((max(d__1,
 
- 						    d__2)));
 
- /* Computing MAX */
 
- 					    d__1 = 0., d__2 = 1. - t * aqoap *
 
- 						     aapq;
 
- 					    aapp *= sqrt((max(d__1,d__2)));
 
- 					    apoaq = work[p] / work[q];
 
- 					    aqoap = work[q] / work[p];
 
- 					    if (work[p] >= 1.) {
 
- 			  if (work[q] >= 1.) {
 
- 			      fastr[2] = t * apoaq;
 
- 			      fastr[3] = -t * aqoap;
 
- 			      work[p] *= cs;
 
- 			      work[q] *= cs;
 
- 			      _starpu_drotm_(m, &a[p * a_dim1 + 1], &c__1, &a[q * 
 
- 				      a_dim1 + 1], &c__1, fastr);
 
- 			      if (rsvec) {
 
- 				  _starpu_drotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[
 
- 					  q * v_dim1 + 1], &c__1, fastr);
 
- 			      }
 
- 			  } else {
 
- 			      d__1 = -t * aqoap;
 
- 			      _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[
 
- 				      p * a_dim1 + 1], &c__1);
 
- 			      d__1 = cs * sn * apoaq;
 
- 			      _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[
 
- 				      q * a_dim1 + 1], &c__1);
 
- 			      work[p] *= cs;
 
- 			      work[q] /= cs;
 
- 			      if (rsvec) {
 
- 				  d__1 = -t * aqoap;
 
- 				  _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], &
 
- 					  c__1, &v[p * v_dim1 + 1], &c__1);
 
- 				  d__1 = cs * sn * apoaq;
 
- 				  _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], &
 
- 					  c__1, &v[q * v_dim1 + 1], &c__1);
 
- 			      }
 
- 			  }
 
- 					    } else {
 
- 			  if (work[q] >= 1.) {
 
- 			      d__1 = t * apoaq;
 
- 			      _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[
 
- 				      q * a_dim1 + 1], &c__1);
 
- 			      d__1 = -cs * sn * aqoap;
 
- 			      _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[
 
- 				      p * a_dim1 + 1], &c__1);
 
- 			      work[p] /= cs;
 
- 			      work[q] *= cs;
 
- 			      if (rsvec) {
 
- 				  d__1 = t * apoaq;
 
- 				  _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], &
 
- 					  c__1, &v[q * v_dim1 + 1], &c__1);
 
- 				  d__1 = -cs * sn * aqoap;
 
- 				  _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], &
 
- 					  c__1, &v[p * v_dim1 + 1], &c__1);
 
- 			      }
 
- 			  } else {
 
- 			      if (work[p] >= work[q]) {
 
- 				  d__1 = -t * aqoap;
 
- 				  _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, 
 
- 					  &a[p * a_dim1 + 1], &c__1);
 
- 				  d__1 = cs * sn * apoaq;
 
- 				  _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, 
 
- 					  &a[q * a_dim1 + 1], &c__1);
 
- 				  work[p] *= cs;
 
- 				  work[q] /= cs;
 
- 				  if (rsvec) {
 
- 				      d__1 = -t * aqoap;
 
- 				      _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], 
 
- 					      &c__1, &v[p * v_dim1 + 1], &
 
- 					      c__1);
 
- 				      d__1 = cs * sn * apoaq;
 
- 				      _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], 
 
- 					      &c__1, &v[q * v_dim1 + 1], &
 
- 					      c__1);
 
- 				  }
 
- 			      } else {
 
- 				  d__1 = t * apoaq;
 
- 				  _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, 
 
- 					  &a[q * a_dim1 + 1], &c__1);
 
- 				  d__1 = -cs * sn * aqoap;
 
- 				  _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, 
 
- 					  &a[p * a_dim1 + 1], &c__1);
 
- 				  work[p] /= cs;
 
- 				  work[q] *= cs;
 
- 				  if (rsvec) {
 
- 				      d__1 = t * apoaq;
 
- 				      _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], 
 
- 					      &c__1, &v[q * v_dim1 + 1], &
 
- 					      c__1);
 
- 				      d__1 = -cs * sn * aqoap;
 
- 				      _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], 
 
- 					      &c__1, &v[p * v_dim1 + 1], &
 
- 					      c__1);
 
- 				  }
 
- 			      }
 
- 			  }
 
- 					    }
 
- 					}
 
- 				    } else {
 
- /*              .. have to use modified Gram-Schmidt like transformation */
 
- 					_starpu_dcopy_(m, &a[p * a_dim1 + 1], &c__1, &
 
- 						work[*n + 1], &c__1);
 
- 					_starpu_dlascl_("G", &c__0, &c__0, &aapp, &
 
- 						c_b18, m, &c__1, &work[*n + 1]
 
- , lda, &ierr);
 
- 					_starpu_dlascl_("G", &c__0, &c__0, &aaqq, &
 
- 						c_b18, m, &c__1, &a[q * 
 
- 						a_dim1 + 1], lda, &ierr);
 
- 					temp1 = -aapq * work[p] / work[q];
 
- 					_starpu_daxpy_(m, &temp1, &work[*n + 1], &
 
- 						c__1, &a[q * a_dim1 + 1], &
 
- 						c__1);
 
- 					_starpu_dlascl_("G", &c__0, &c__0, &c_b18, &
 
- 						aaqq, m, &c__1, &a[q * a_dim1 
 
- 						+ 1], lda, &ierr);
 
- /* Computing MAX */
 
- 					d__1 = 0., d__2 = 1. - aapq * aapq;
 
- 					sva[q] = aaqq * sqrt((max(d__1,d__2)))
 
- 						;
 
- 					mxsinj = max(mxsinj,sfmin);
 
- 				    }
 
- /*           END IF ROTOK THEN ... ELSE */
 
- /*           In the case of cancellation in updating SVA(q), SVA(p) */
 
- /*           recompute SVA(q), SVA(p). */
 
- /* Computing 2nd power */
 
- 				    d__1 = sva[q] / aaqq;
 
- 				    if (d__1 * d__1 <= rooteps) {
 
- 					if (aaqq < rootbig && aaqq > 
 
- 						rootsfmin) {
 
- 					    sva[q] = _starpu_dnrm2_(m, &a[q * a_dim1 
 
- 						    + 1], &c__1) * work[q];
 
- 					} else {
 
- 					    t = 0.;
 
- 					    aaqq = 0.;
 
- 					    _starpu_dlassq_(m, &a[q * a_dim1 + 1], &
 
- 						    c__1, &t, &aaqq);
 
- 					    sva[q] = t * sqrt(aaqq) * work[q];
 
- 					}
 
- 				    }
 
- 				    if (aapp / aapp0 <= rooteps) {
 
- 					if (aapp < rootbig && aapp > 
 
- 						rootsfmin) {
 
- 					    aapp = _starpu_dnrm2_(m, &a[p * a_dim1 + 
 
- 						    1], &c__1) * work[p];
 
- 					} else {
 
- 					    t = 0.;
 
- 					    aapp = 0.;
 
- 					    _starpu_dlassq_(m, &a[p * a_dim1 + 1], &
 
- 						    c__1, &t, &aapp);
 
- 					    aapp = t * sqrt(aapp) * work[p];
 
- 					}
 
- 					sva[p] = aapp;
 
- 				    }
 
- 				} else {
 
- /*        A(:,p) and A(:,q) already numerically orthogonal */
 
- 				    if (ir1 == 0) {
 
- 					++notrot;
 
- 				    }
 
- /* [RTD]      SKIPPED  = SKIPPED  + 1 */
 
- 				    ++pskipped;
 
- 				}
 
- 			    } else {
 
- /*        A(:,q) is zero column */
 
- 				if (ir1 == 0) {
 
- 				    ++notrot;
 
- 				}
 
- 				++pskipped;
 
- 			    }
 
- 			    if (i__ <= swband && pskipped > rowskip) {
 
- 				if (ir1 == 0) {
 
- 				    aapp = -aapp;
 
- 				}
 
- 				notrot = 0;
 
- 				goto L2103;
 
- 			    }
 
- /* L2002: */
 
- 			}
 
- /*     END q-LOOP */
 
- L2103:
 
- /*     bailed out of q-loop */
 
- 			sva[p] = aapp;
 
- 		    } else {
 
- 			sva[p] = aapp;
 
- 			if (ir1 == 0 && aapp == 0.) {
 
- /* Computing MIN */
 
- 			    i__4 = igl + kbl - 1;
 
- 			    notrot = notrot + min(i__4,*n) - p;
 
- 			}
 
- 		    }
 
- /* L2001: */
 
- 		}
 
- /*     end of the p-loop */
 
- /*     end of doing the block ( ibr, ibr ) */
 
- /* L1002: */
 
- 	    }
 
- /*     end of ir1-loop */
 
- /* ... go to the off diagonal blocks */
 
- 	    igl = (ibr - 1) * kbl + 1;
 
- 	    i__2 = nbl;
 
- 	    for (jbc = ibr + 1; jbc <= i__2; ++jbc) {
 
- 		jgl = (jbc - 1) * kbl + 1;
 
- /*        doing the block at ( ibr, jbc ) */
 
- 		ijblsk = 0;
 
- /* Computing MIN */
 
- 		i__4 = igl + kbl - 1;
 
- 		i__3 = min(i__4,*n);
 
- 		for (p = igl; p <= i__3; ++p) {
 
- 		    aapp = sva[p];
 
- 		    if (aapp > 0.) {
 
- 			pskipped = 0;
 
- /* Computing MIN */
 
- 			i__5 = jgl + kbl - 1;
 
- 			i__4 = min(i__5,*n);
 
- 			for (q = jgl; q <= i__4; ++q) {
 
- 			    aaqq = sva[q];
 
- 			    if (aaqq > 0.) {
 
- 				aapp0 = aapp;
 
- /*     -#- M x 2 Jacobi SVD -#- */
 
- /*        Safe Gram matrix computation */
 
- 				if (aaqq >= 1.) {
 
- 				    if (aapp >= aaqq) {
 
- 					rotok = small * aapp <= aaqq;
 
- 				    } else {
 
- 					rotok = small * aaqq <= aapp;
 
- 				    }
 
- 				    if (aapp < big / aaqq) {
 
- 					aapq = _starpu_ddot_(m, &a[p * a_dim1 + 1], &
 
- 						c__1, &a[q * a_dim1 + 1], &
 
- 						c__1) * work[p] * work[q] / 
 
- 						aaqq / aapp;
 
- 				    } else {
 
- 					_starpu_dcopy_(m, &a[p * a_dim1 + 1], &c__1, &
 
- 						work[*n + 1], &c__1);
 
- 					_starpu_dlascl_("G", &c__0, &c__0, &aapp, &
 
- 						work[p], m, &c__1, &work[*n + 
 
- 						1], lda, &ierr);
 
- 					aapq = _starpu_ddot_(m, &work[*n + 1], &c__1, 
 
- 						&a[q * a_dim1 + 1], &c__1) * 
 
- 						work[q] / aaqq;
 
- 				    }
 
- 				} else {
 
- 				    if (aapp >= aaqq) {
 
- 					rotok = aapp <= aaqq / small;
 
- 				    } else {
 
- 					rotok = aaqq <= aapp / small;
 
- 				    }
 
- 				    if (aapp > small / aaqq) {
 
- 					aapq = _starpu_ddot_(m, &a[p * a_dim1 + 1], &
 
- 						c__1, &a[q * a_dim1 + 1], &
 
- 						c__1) * work[p] * work[q] / 
 
- 						aaqq / aapp;
 
- 				    } else {
 
- 					_starpu_dcopy_(m, &a[q * a_dim1 + 1], &c__1, &
 
- 						work[*n + 1], &c__1);
 
- 					_starpu_dlascl_("G", &c__0, &c__0, &aaqq, &
 
- 						work[q], m, &c__1, &work[*n + 
 
- 						1], lda, &ierr);
 
- 					aapq = _starpu_ddot_(m, &work[*n + 1], &c__1, 
 
- 						&a[p * a_dim1 + 1], &c__1) * 
 
- 						work[p] / aapp;
 
- 				    }
 
- 				}
 
- /* Computing MAX */
 
- 				d__1 = mxaapq, d__2 = abs(aapq);
 
- 				mxaapq = max(d__1,d__2);
 
- /*        TO rotate or NOT to rotate, THAT is the question ... */
 
- 				if (abs(aapq) > tol) {
 
- 				    notrot = 0;
 
- /* [RTD]      ROTATED  = ROTATED + 1 */
 
- 				    pskipped = 0;
 
- 				    ++iswrot;
 
- 				    if (rotok) {
 
- 					aqoap = aaqq / aapp;
 
- 					apoaq = aapp / aaqq;
 
- 					theta = (d__1 = aqoap - apoaq, abs(
 
- 						d__1)) * -.5 / aapq;
 
- 					if (aaqq > aapp0) {
 
- 					    theta = -theta;
 
- 					}
 
- 					if (abs(theta) > bigtheta) {
 
- 					    t = .5 / theta;
 
- 					    fastr[2] = t * work[p] / work[q];
 
- 					    fastr[3] = -t * work[q] / work[p];
 
- 					    _starpu_drotm_(m, &a[p * a_dim1 + 1], &
 
- 						    c__1, &a[q * a_dim1 + 1], 
 
- 						    &c__1, fastr);
 
- 					    if (rsvec) {
 
- 			  _starpu_drotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * 
 
- 				  v_dim1 + 1], &c__1, fastr);
 
- 					    }
 
- /* Computing MAX */
 
- 					    d__1 = 0., d__2 = t * apoaq * 
 
- 						    aapq + 1.;
 
- 					    sva[q] = aaqq * sqrt((max(d__1,
 
- 						    d__2)));
 
- /* Computing MAX */
 
- 					    d__1 = 0., d__2 = 1. - t * aqoap *
 
- 						     aapq;
 
- 					    aapp *= sqrt((max(d__1,d__2)));
 
- /* Computing MAX */
 
- 					    d__1 = mxsinj, d__2 = abs(t);
 
- 					    mxsinj = max(d__1,d__2);
 
- 					} else {
 
- /*                 .. choose correct signum for THETA and rotate */
 
- 					    thsign = -d_sign(&c_b18, &aapq);
 
- 					    if (aaqq > aapp0) {
 
- 			  thsign = -thsign;
 
- 					    }
 
- 					    t = 1. / (theta + thsign * sqrt(
 
- 						    theta * theta + 1.));
 
- 					    cs = sqrt(1. / (t * t + 1.));
 
- 					    sn = t * cs;
 
- /* Computing MAX */
 
- 					    d__1 = mxsinj, d__2 = abs(sn);
 
- 					    mxsinj = max(d__1,d__2);
 
- /* Computing MAX */
 
- 					    d__1 = 0., d__2 = t * apoaq * 
 
- 						    aapq + 1.;
 
- 					    sva[q] = aaqq * sqrt((max(d__1,
 
- 						    d__2)));
 
- 					    aapp *= sqrt(1. - t * aqoap * 
 
- 						    aapq);
 
- 					    apoaq = work[p] / work[q];
 
- 					    aqoap = work[q] / work[p];
 
- 					    if (work[p] >= 1.) {
 
- 			  if (work[q] >= 1.) {
 
- 			      fastr[2] = t * apoaq;
 
- 			      fastr[3] = -t * aqoap;
 
- 			      work[p] *= cs;
 
- 			      work[q] *= cs;
 
- 			      _starpu_drotm_(m, &a[p * a_dim1 + 1], &c__1, &a[q * 
 
- 				      a_dim1 + 1], &c__1, fastr);
 
- 			      if (rsvec) {
 
- 				  _starpu_drotm_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[
 
- 					  q * v_dim1 + 1], &c__1, fastr);
 
- 			      }
 
- 			  } else {
 
- 			      d__1 = -t * aqoap;
 
- 			      _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[
 
- 				      p * a_dim1 + 1], &c__1);
 
- 			      d__1 = cs * sn * apoaq;
 
- 			      _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[
 
- 				      q * a_dim1 + 1], &c__1);
 
- 			      if (rsvec) {
 
- 				  d__1 = -t * aqoap;
 
- 				  _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], &
 
- 					  c__1, &v[p * v_dim1 + 1], &c__1);
 
- 				  d__1 = cs * sn * apoaq;
 
- 				  _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], &
 
- 					  c__1, &v[q * v_dim1 + 1], &c__1);
 
- 			      }
 
- 			      work[p] *= cs;
 
- 			      work[q] /= cs;
 
- 			  }
 
- 					    } else {
 
- 			  if (work[q] >= 1.) {
 
- 			      d__1 = t * apoaq;
 
- 			      _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, &a[
 
- 				      q * a_dim1 + 1], &c__1);
 
- 			      d__1 = -cs * sn * aqoap;
 
- 			      _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, &a[
 
- 				      p * a_dim1 + 1], &c__1);
 
- 			      if (rsvec) {
 
- 				  d__1 = t * apoaq;
 
- 				  _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], &
 
- 					  c__1, &v[q * v_dim1 + 1], &c__1);
 
- 				  d__1 = -cs * sn * aqoap;
 
- 				  _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], &
 
- 					  c__1, &v[p * v_dim1 + 1], &c__1);
 
- 			      }
 
- 			      work[p] /= cs;
 
- 			      work[q] *= cs;
 
- 			  } else {
 
- 			      if (work[p] >= work[q]) {
 
- 				  d__1 = -t * aqoap;
 
- 				  _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, 
 
- 					  &a[p * a_dim1 + 1], &c__1);
 
- 				  d__1 = cs * sn * apoaq;
 
- 				  _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, 
 
- 					  &a[q * a_dim1 + 1], &c__1);
 
- 				  work[p] *= cs;
 
- 				  work[q] /= cs;
 
- 				  if (rsvec) {
 
- 				      d__1 = -t * aqoap;
 
- 				      _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], 
 
- 					      &c__1, &v[p * v_dim1 + 1], &
 
- 					      c__1);
 
- 				      d__1 = cs * sn * apoaq;
 
- 				      _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], 
 
- 					      &c__1, &v[q * v_dim1 + 1], &
 
- 					      c__1);
 
- 				  }
 
- 			      } else {
 
- 				  d__1 = t * apoaq;
 
- 				  _starpu_daxpy_(m, &d__1, &a[p * a_dim1 + 1], &c__1, 
 
- 					  &a[q * a_dim1 + 1], &c__1);
 
- 				  d__1 = -cs * sn * aqoap;
 
- 				  _starpu_daxpy_(m, &d__1, &a[q * a_dim1 + 1], &c__1, 
 
- 					  &a[p * a_dim1 + 1], &c__1);
 
- 				  work[p] /= cs;
 
- 				  work[q] *= cs;
 
- 				  if (rsvec) {
 
- 				      d__1 = t * apoaq;
 
- 				      _starpu_daxpy_(&mvl, &d__1, &v[p * v_dim1 + 1], 
 
- 					      &c__1, &v[q * v_dim1 + 1], &
 
- 					      c__1);
 
- 				      d__1 = -cs * sn * aqoap;
 
- 				      _starpu_daxpy_(&mvl, &d__1, &v[q * v_dim1 + 1], 
 
- 					      &c__1, &v[p * v_dim1 + 1], &
 
- 					      c__1);
 
- 				  }
 
- 			      }
 
- 			  }
 
- 					    }
 
- 					}
 
- 				    } else {
 
- 					if (aapp > aaqq) {
 
- 					    _starpu_dcopy_(m, &a[p * a_dim1 + 1], &
 
- 						    c__1, &work[*n + 1], &
 
- 						    c__1);
 
- 					    _starpu_dlascl_("G", &c__0, &c__0, &aapp, 
 
- 						    &c_b18, m, &c__1, &work[*
 
- 						    n + 1], lda, &ierr);
 
- 					    _starpu_dlascl_("G", &c__0, &c__0, &aaqq, 
 
- 						    &c_b18, m, &c__1, &a[q * 
 
- 						    a_dim1 + 1], lda, &ierr);
 
- 					    temp1 = -aapq * work[p] / work[q];
 
- 					    _starpu_daxpy_(m, &temp1, &work[*n + 1], &
 
- 						    c__1, &a[q * a_dim1 + 1], 
 
- 						    &c__1);
 
- 					    _starpu_dlascl_("G", &c__0, &c__0, &c_b18, 
 
- 						     &aaqq, m, &c__1, &a[q * 
 
- 						    a_dim1 + 1], lda, &ierr);
 
- /* Computing MAX */
 
- 					    d__1 = 0., d__2 = 1. - aapq * 
 
- 						    aapq;
 
- 					    sva[q] = aaqq * sqrt((max(d__1,
 
- 						    d__2)));
 
- 					    mxsinj = max(mxsinj,sfmin);
 
- 					} else {
 
- 					    _starpu_dcopy_(m, &a[q * a_dim1 + 1], &
 
- 						    c__1, &work[*n + 1], &
 
- 						    c__1);
 
- 					    _starpu_dlascl_("G", &c__0, &c__0, &aaqq, 
 
- 						    &c_b18, m, &c__1, &work[*
 
- 						    n + 1], lda, &ierr);
 
- 					    _starpu_dlascl_("G", &c__0, &c__0, &aapp, 
 
- 						    &c_b18, m, &c__1, &a[p * 
 
- 						    a_dim1 + 1], lda, &ierr);
 
- 					    temp1 = -aapq * work[q] / work[p];
 
- 					    _starpu_daxpy_(m, &temp1, &work[*n + 1], &
 
- 						    c__1, &a[p * a_dim1 + 1], 
 
- 						    &c__1);
 
- 					    _starpu_dlascl_("G", &c__0, &c__0, &c_b18, 
 
- 						     &aapp, m, &c__1, &a[p * 
 
- 						    a_dim1 + 1], lda, &ierr);
 
- /* Computing MAX */
 
- 					    d__1 = 0., d__2 = 1. - aapq * 
 
- 						    aapq;
 
- 					    sva[p] = aapp * sqrt((max(d__1,
 
- 						    d__2)));
 
- 					    mxsinj = max(mxsinj,sfmin);
 
- 					}
 
- 				    }
 
- /*           END IF ROTOK THEN ... ELSE */
 
- /*           In the case of cancellation in updating SVA(q) */
 
- /*           .. recompute SVA(q) */
 
- /* Computing 2nd power */
 
- 				    d__1 = sva[q] / aaqq;
 
- 				    if (d__1 * d__1 <= rooteps) {
 
- 					if (aaqq < rootbig && aaqq > 
 
- 						rootsfmin) {
 
- 					    sva[q] = _starpu_dnrm2_(m, &a[q * a_dim1 
 
- 						    + 1], &c__1) * work[q];
 
- 					} else {
 
- 					    t = 0.;
 
- 					    aaqq = 0.;
 
- 					    _starpu_dlassq_(m, &a[q * a_dim1 + 1], &
 
- 						    c__1, &t, &aaqq);
 
- 					    sva[q] = t * sqrt(aaqq) * work[q];
 
- 					}
 
- 				    }
 
- /* Computing 2nd power */
 
- 				    d__1 = aapp / aapp0;
 
- 				    if (d__1 * d__1 <= rooteps) {
 
- 					if (aapp < rootbig && aapp > 
 
- 						rootsfmin) {
 
- 					    aapp = _starpu_dnrm2_(m, &a[p * a_dim1 + 
 
- 						    1], &c__1) * work[p];
 
- 					} else {
 
- 					    t = 0.;
 
- 					    aapp = 0.;
 
- 					    _starpu_dlassq_(m, &a[p * a_dim1 + 1], &
 
- 						    c__1, &t, &aapp);
 
- 					    aapp = t * sqrt(aapp) * work[p];
 
- 					}
 
- 					sva[p] = aapp;
 
- 				    }
 
- /*              end of OK rotation */
 
- 				} else {
 
- 				    ++notrot;
 
- /* [RTD]      SKIPPED  = SKIPPED  + 1 */
 
- 				    ++pskipped;
 
- 				    ++ijblsk;
 
- 				}
 
- 			    } else {
 
- 				++notrot;
 
- 				++pskipped;
 
- 				++ijblsk;
 
- 			    }
 
- 			    if (i__ <= swband && ijblsk >= blskip) {
 
- 				sva[p] = aapp;
 
- 				notrot = 0;
 
- 				goto L2011;
 
- 			    }
 
- 			    if (i__ <= swband && pskipped > rowskip) {
 
- 				aapp = -aapp;
 
- 				notrot = 0;
 
- 				goto L2203;
 
- 			    }
 
- /* L2200: */
 
- 			}
 
- /*        end of the q-loop */
 
- L2203:
 
- 			sva[p] = aapp;
 
- 		    } else {
 
- 			if (aapp == 0.) {
 
- /* Computing MIN */
 
- 			    i__4 = jgl + kbl - 1;
 
- 			    notrot = notrot + min(i__4,*n) - jgl + 1;
 
- 			}
 
- 			if (aapp < 0.) {
 
- 			    notrot = 0;
 
- 			}
 
- 		    }
 
- /* L2100: */
 
- 		}
 
- /*     end of the p-loop */
 
- /* L2010: */
 
- 	    }
 
- /*     end of the jbc-loop */
 
- L2011:
 
- /* 2011 bailed out of the jbc-loop */
 
- /* Computing MIN */
 
- 	    i__3 = igl + kbl - 1;
 
- 	    i__2 = min(i__3,*n);
 
- 	    for (p = igl; p <= i__2; ++p) {
 
- 		sva[p] = (d__1 = sva[p], abs(d__1));
 
- /* L2012: */
 
- 	    }
 
- /* ** */
 
- /* L2000: */
 
- 	}
 
- /* 2000 :: end of the ibr-loop */
 
- /*     .. update SVA(N) */
 
- 	if (sva[*n] < rootbig && sva[*n] > rootsfmin) {
 
- 	    sva[*n] = _starpu_dnrm2_(m, &a[*n * a_dim1 + 1], &c__1) * work[*n];
 
- 	} else {
 
- 	    t = 0.;
 
- 	    aapp = 0.;
 
- 	    _starpu_dlassq_(m, &a[*n * a_dim1 + 1], &c__1, &t, &aapp);
 
- 	    sva[*n] = t * sqrt(aapp) * work[*n];
 
- 	}
 
- /*     Additional steering devices */
 
- 	if (i__ < swband && (mxaapq <= roottol || iswrot <= *n)) {
 
- 	    swband = i__;
 
- 	}
 
- 	if (i__ > swband + 1 && mxaapq < sqrt((doublereal) (*n)) * tol && (
 
- 		doublereal) (*n) * mxaapq * mxsinj < tol) {
 
- 	    goto L1994;
 
- 	}
 
- 	if (notrot >= emptsw) {
 
- 	    goto L1994;
 
- 	}
 
- /* L1993: */
 
-     }
 
- /*     end i=1:NSWEEP loop */
 
- /* #:( Reaching this point means that the procedure has not converged. */
 
-     *info = 29;
 
-     goto L1995;
 
- L1994:
 
- /* #:) Reaching this point means numerical convergence after the i-th */
 
- /*     sweep. */
 
-     *info = 0;
 
- /* #:) INFO = 0 confirms successful iterations. */
 
- L1995:
 
- /*     Sort the singular values and find how many are above */
 
- /*     the underflow threshold. */
 
-     n2 = 0;
 
-     n4 = 0;
 
-     i__1 = *n - 1;
 
-     for (p = 1; p <= i__1; ++p) {
 
- 	i__2 = *n - p + 1;
 
- 	q = _starpu_idamax_(&i__2, &sva[p], &c__1) + p - 1;
 
- 	if (p != q) {
 
- 	    temp1 = sva[p];
 
- 	    sva[p] = sva[q];
 
- 	    sva[q] = temp1;
 
- 	    temp1 = work[p];
 
- 	    work[p] = work[q];
 
- 	    work[q] = temp1;
 
- 	    _starpu_dswap_(m, &a[p * a_dim1 + 1], &c__1, &a[q * a_dim1 + 1], &c__1);
 
- 	    if (rsvec) {
 
- 		_starpu_dswap_(&mvl, &v[p * v_dim1 + 1], &c__1, &v[q * v_dim1 + 1], &
 
- 			c__1);
 
- 	    }
 
- 	}
 
- 	if (sva[p] != 0.) {
 
- 	    ++n4;
 
- 	    if (sva[p] * scale > sfmin) {
 
- 		++n2;
 
- 	    }
 
- 	}
 
- /* L5991: */
 
-     }
 
-     if (sva[*n] != 0.) {
 
- 	++n4;
 
- 	if (sva[*n] * scale > sfmin) {
 
- 	    ++n2;
 
- 	}
 
-     }
 
- /*     Normalize the left singular vectors. */
 
-     if (lsvec || uctol) {
 
- 	i__1 = n2;
 
- 	for (p = 1; p <= i__1; ++p) {
 
- 	    d__1 = work[p] / sva[p];
 
- 	    _starpu_dscal_(m, &d__1, &a[p * a_dim1 + 1], &c__1);
 
- /* L1998: */
 
- 	}
 
-     }
 
- /*     Scale the product of Jacobi rotations (assemble the fast rotations). */
 
-     if (rsvec) {
 
- 	if (applv) {
 
- 	    i__1 = *n;
 
- 	    for (p = 1; p <= i__1; ++p) {
 
- 		_starpu_dscal_(&mvl, &work[p], &v[p * v_dim1 + 1], &c__1);
 
- /* L2398: */
 
- 	    }
 
- 	} else {
 
- 	    i__1 = *n;
 
- 	    for (p = 1; p <= i__1; ++p) {
 
- 		temp1 = 1. / _starpu_dnrm2_(&mvl, &v[p * v_dim1 + 1], &c__1);
 
- 		_starpu_dscal_(&mvl, &temp1, &v[p * v_dim1 + 1], &c__1);
 
- /* L2399: */
 
- 	    }
 
- 	}
 
-     }
 
- /*     Undo scaling, if necessary (and possible). */
 
-     if (scale > 1. && sva[1] < big / scale || scale < 1. && sva[n2] > sfmin / 
 
- 	    scale) {
 
- 	i__1 = *n;
 
- 	for (p = 1; p <= i__1; ++p) {
 
- 	    sva[p] = scale * sva[p];
 
- /* L2400: */
 
- 	}
 
- 	scale = 1.;
 
-     }
 
-     work[1] = scale;
 
- /*     The singular values of A are SCALE*SVA(1:N). If SCALE.NE.ONE */
 
- /*     then some of the singular values may overflow or underflow and */
 
- /*     the spectrum is given in this factored representation. */
 
-     work[2] = (doublereal) n4;
 
- /*     N4 is the number of computed nonzero singular values of A. */
 
-     work[3] = (doublereal) n2;
 
- /*     N2 is the number of singular values of A greater than SFMIN. */
 
- /*     If N2<N, SVA(N2:N) contains ZEROS and/or denormalized numbers */
 
- /*     that may carry some information. */
 
-     work[4] = (doublereal) i__;
 
- /*     i is the index of the last sweep before declaring convergence. */
 
-     work[5] = mxaapq;
 
- /*     MXAAPQ is the largest absolute value of scaled pivots in the */
 
- /*     last sweep */
 
-     work[6] = mxsinj;
 
- /*     MXSINJ is the largest absolute value of the sines of Jacobi angles */
 
- /*     in the last sweep */
 
-     return 0;
 
- /*     .. */
 
- /*     .. END OF DGESVJ */
 
- /*     .. */
 
- } /* _starpu_dgesvj_ */
 
 
  |