| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589 | /* dgbtrf.f -- translated by f2c (version 20061008).   You must link the resulting object file with libf2c:	on Microsoft Windows system, link with libf2c.lib;	on Linux or Unix systems, link with .../path/to/libf2c.a -lm	or, if you install libf2c.a in a standard place, with -lf2c -lm	-- in that order, at the end of the command line, as in		cc *.o -lf2c -lm	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,		http://www.netlib.org/f2c/libf2c.zip*/#include "f2c.h"#include "blaswrap.h"/* Table of constant values */static integer c__1 = 1;static integer c__65 = 65;static doublereal c_b18 = -1.;static doublereal c_b31 = 1.;/* Subroutine */ int _starpu_dgbtrf_(integer *m, integer *n, integer *kl, integer *ku, 	 doublereal *ab, integer *ldab, integer *ipiv, integer *info){    /* System generated locals */    integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5, i__6;    doublereal d__1;    /* Local variables */    integer i__, j, i2, i3, j2, j3, k2, jb, nb, ii, jj, jm, ip, jp, km, ju, 	    kv, nw;    extern /* Subroutine */ int _starpu_dger_(integer *, integer *, doublereal *, 	    doublereal *, integer *, doublereal *, integer *, doublereal *, 	    integer *);    doublereal temp;    extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *, 	    integer *), _starpu_dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, 	    doublereal *, doublereal *, integer *), _starpu_dcopy_(	    integer *, doublereal *, integer *, doublereal *, integer *), 	    _starpu_dswap_(integer *, doublereal *, integer *, doublereal *, integer *);    doublereal work13[4160]	/* was [65][64] */, work31[4160]	/* 	    was [65][64] */;    extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *, 	    integer *, integer *, doublereal *, doublereal *, integer *, 	    doublereal *, integer *), _starpu_dgbtf2_(	    integer *, integer *, integer *, integer *, doublereal *, integer 	    *, integer *, integer *);    extern integer _starpu_idamax_(integer *, doublereal *, integer *);    extern /* Subroutine */ int _starpu_xerbla_(char *, integer *);    extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *, 	    integer *, integer *);    extern /* Subroutine */ int _starpu_dlaswp_(integer *, doublereal *, integer *, 	    integer *, integer *, integer *, integer *);/*  -- LAPACK routine (version 3.2) -- *//*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. *//*     November 2006 *//*     .. Scalar Arguments .. *//*     .. *//*     .. Array Arguments .. *//*     .. *//*  Purpose *//*  ======= *//*  DGBTRF computes an LU factorization of a real m-by-n band matrix A *//*  using partial pivoting with row interchanges. *//*  This is the blocked version of the algorithm, calling Level 3 BLAS. *//*  Arguments *//*  ========= *//*  M       (input) INTEGER *//*          The number of rows of the matrix A.  M >= 0. *//*  N       (input) INTEGER *//*          The number of columns of the matrix A.  N >= 0. *//*  KL      (input) INTEGER *//*          The number of subdiagonals within the band of A.  KL >= 0. *//*  KU      (input) INTEGER *//*          The number of superdiagonals within the band of A.  KU >= 0. *//*  AB      (input/output) DOUBLE PRECISION array, dimension (LDAB,N) *//*          On entry, the matrix A in band storage, in rows KL+1 to *//*          2*KL+KU+1; rows 1 to KL of the array need not be set. *//*          The j-th column of A is stored in the j-th column of the *//*          array AB as follows: *//*          AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) *//*          On exit, details of the factorization: U is stored as an *//*          upper triangular band matrix with KL+KU superdiagonals in *//*          rows 1 to KL+KU+1, and the multipliers used during the *//*          factorization are stored in rows KL+KU+2 to 2*KL+KU+1. *//*          See below for further details. *//*  LDAB    (input) INTEGER *//*          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1. *//*  IPIV    (output) INTEGER array, dimension (min(M,N)) *//*          The pivot indices; for 1 <= i <= min(M,N), row i of the *//*          matrix was interchanged with row IPIV(i). *//*  INFO    (output) INTEGER *//*          = 0: successful exit *//*          < 0: if INFO = -i, the i-th argument had an illegal value *//*          > 0: if INFO = +i, U(i,i) is exactly zero. The factorization *//*               has been completed, but the factor U is exactly *//*               singular, and division by zero will occur if it is used *//*               to solve a system of equations. *//*  Further Details *//*  =============== *//*  The band storage scheme is illustrated by the following example, when *//*  M = N = 6, KL = 2, KU = 1: *//*  On entry:                       On exit: *//*      *    *    *    +    +    +       *    *    *   u14  u25  u36 *//*      *    *    +    +    +    +       *    *   u13  u24  u35  u46 *//*      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56 *//*     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66 *//*     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   * *//*     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    * *//*  Array elements marked * are not used by the routine; elements marked *//*  + need not be set on entry, but are required by the routine to store *//*  elements of U because of fill-in resulting from the row interchanges. *//*  ===================================================================== *//*     .. Parameters .. *//*     .. *//*     .. Local Scalars .. *//*     .. *//*     .. Local Arrays .. *//*     .. *//*     .. External Functions .. *//*     .. *//*     .. External Subroutines .. *//*     .. *//*     .. Intrinsic Functions .. *//*     .. *//*     .. Executable Statements .. *//*     KV is the number of superdiagonals in the factor U, allowing for *//*     fill-in */    /* Parameter adjustments */    ab_dim1 = *ldab;    ab_offset = 1 + ab_dim1;    ab -= ab_offset;    --ipiv;    /* Function Body */    kv = *ku + *kl;/*     Test the input parameters. */    *info = 0;    if (*m < 0) {	*info = -1;    } else if (*n < 0) {	*info = -2;    } else if (*kl < 0) {	*info = -3;    } else if (*ku < 0) {	*info = -4;    } else if (*ldab < *kl + kv + 1) {	*info = -6;    }    if (*info != 0) {	i__1 = -(*info);	_starpu_xerbla_("DGBTRF", &i__1);	return 0;    }/*     Quick return if possible */    if (*m == 0 || *n == 0) {	return 0;    }/*     Determine the block size for this environment */    nb = _starpu_ilaenv_(&c__1, "DGBTRF", " ", m, n, kl, ku);/*     The block size must not exceed the limit set by the size of the *//*     local arrays WORK13 and WORK31. */    nb = min(nb,64);    if (nb <= 1 || nb > *kl) {/*        Use unblocked code */	_starpu_dgbtf2_(m, n, kl, ku, &ab[ab_offset], ldab, &ipiv[1], info);    } else {/*        Use blocked code *//*        Zero the superdiagonal elements of the work array WORK13 */	i__1 = nb;	for (j = 1; j <= i__1; ++j) {	    i__2 = j - 1;	    for (i__ = 1; i__ <= i__2; ++i__) {		work13[i__ + j * 65 - 66] = 0.;/* L10: */	    }/* L20: */	}/*        Zero the subdiagonal elements of the work array WORK31 */	i__1 = nb;	for (j = 1; j <= i__1; ++j) {	    i__2 = nb;	    for (i__ = j + 1; i__ <= i__2; ++i__) {		work31[i__ + j * 65 - 66] = 0.;/* L30: */	    }/* L40: */	}/*        Gaussian elimination with partial pivoting *//*        Set fill-in elements in columns KU+2 to KV to zero */	i__1 = min(kv,*n);	for (j = *ku + 2; j <= i__1; ++j) {	    i__2 = *kl;	    for (i__ = kv - j + 2; i__ <= i__2; ++i__) {		ab[i__ + j * ab_dim1] = 0.;/* L50: */	    }/* L60: */	}/*        JU is the index of the last column affected by the current *//*        stage of the factorization */	ju = 1;	i__1 = min(*m,*n);	i__2 = nb;	for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {/* Computing MIN */	    i__3 = nb, i__4 = min(*m,*n) - j + 1;	    jb = min(i__3,i__4);/*           The active part of the matrix is partitioned *//*              A11   A12   A13 *//*              A21   A22   A23 *//*              A31   A32   A33 *//*           Here A11, A21 and A31 denote the current block of JB columns *//*           which is about to be factorized. The number of rows in the *//*           partitioning are JB, I2, I3 respectively, and the numbers *//*           of columns are JB, J2, J3. The superdiagonal elements of A13 *//*           and the subdiagonal elements of A31 lie outside the band. *//* Computing MIN */	    i__3 = *kl - jb, i__4 = *m - j - jb + 1;	    i2 = min(i__3,i__4);/* Computing MIN */	    i__3 = jb, i__4 = *m - j - *kl + 1;	    i3 = min(i__3,i__4);/*           J2 and J3 are computed after JU has been updated. *//*           Factorize the current block of JB columns */	    i__3 = j + jb - 1;	    for (jj = j; jj <= i__3; ++jj) {/*              Set fill-in elements in column JJ+KV to zero */		if (jj + kv <= *n) {		    i__4 = *kl;		    for (i__ = 1; i__ <= i__4; ++i__) {			ab[i__ + (jj + kv) * ab_dim1] = 0.;/* L70: */		    }		}/*              Find pivot and test for singularity. KM is the number of *//*              subdiagonal elements in the current column. *//* Computing MIN */		i__4 = *kl, i__5 = *m - jj;		km = min(i__4,i__5);		i__4 = km + 1;		jp = _starpu_idamax_(&i__4, &ab[kv + 1 + jj * ab_dim1], &c__1);		ipiv[jj] = jp + jj - j;		if (ab[kv + jp + jj * ab_dim1] != 0.) {/* Computing MAX *//* Computing MIN */		    i__6 = jj + *ku + jp - 1;		    i__4 = ju, i__5 = min(i__6,*n);		    ju = max(i__4,i__5);		    if (jp != 1) {/*                    Apply interchange to columns J to J+JB-1 */			if (jp + jj - 1 < j + *kl) {			    i__4 = *ldab - 1;			    i__5 = *ldab - 1;			    _starpu_dswap_(&jb, &ab[kv + 1 + jj - j + j * ab_dim1], &				    i__4, &ab[kv + jp + jj - j + j * ab_dim1], 				     &i__5);			} else {/*                       The interchange affects columns J to JJ-1 of A31 *//*                       which are stored in the work array WORK31 */			    i__4 = jj - j;			    i__5 = *ldab - 1;			    _starpu_dswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], 				    &i__5, &work31[jp + jj - j - *kl - 1], &				    c__65);			    i__4 = j + jb - jj;			    i__5 = *ldab - 1;			    i__6 = *ldab - 1;			    _starpu_dswap_(&i__4, &ab[kv + 1 + jj * ab_dim1], &i__5, &				    ab[kv + jp + jj * ab_dim1], &i__6);			}		    }/*                 Compute multipliers */		    d__1 = 1. / ab[kv + 1 + jj * ab_dim1];		    _starpu_dscal_(&km, &d__1, &ab[kv + 2 + jj * ab_dim1], &c__1);/*                 Update trailing submatrix within the band and within *//*                 the current block. JM is the index of the last column *//*                 which needs to be updated. *//* Computing MIN */		    i__4 = ju, i__5 = j + jb - 1;		    jm = min(i__4,i__5);		    if (jm > jj) {			i__4 = jm - jj;			i__5 = *ldab - 1;			i__6 = *ldab - 1;			_starpu_dger_(&km, &i__4, &c_b18, &ab[kv + 2 + jj * ab_dim1], 				&c__1, &ab[kv + (jj + 1) * ab_dim1], &i__5, &				ab[kv + 1 + (jj + 1) * ab_dim1], &i__6);		    }		} else {/*                 If pivot is zero, set INFO to the index of the pivot *//*                 unless a zero pivot has already been found. */		    if (*info == 0) {			*info = jj;		    }		}/*              Copy current column of A31 into the work array WORK31 *//* Computing MIN */		i__4 = jj - j + 1;		nw = min(i__4,i3);		if (nw > 0) {		    _starpu_dcopy_(&nw, &ab[kv + *kl + 1 - jj + j + jj * ab_dim1], &			    c__1, &work31[(jj - j + 1) * 65 - 65], &c__1);		}/* L80: */	    }	    if (j + jb <= *n) {/*              Apply the row interchanges to the other blocks. *//* Computing MIN */		i__3 = ju - j + 1;		j2 = min(i__3,kv) - jb;/* Computing MAX */		i__3 = 0, i__4 = ju - j - kv + 1;		j3 = max(i__3,i__4);/*              Use DLASWP to apply the row interchanges to A12, A22, and *//*              A32. */		i__3 = *ldab - 1;		_starpu_dlaswp_(&j2, &ab[kv + 1 - jb + (j + jb) * ab_dim1], &i__3, &			c__1, &jb, &ipiv[j], &c__1);/*              Adjust the pivot indices. */		i__3 = j + jb - 1;		for (i__ = j; i__ <= i__3; ++i__) {		    ipiv[i__] = ipiv[i__] + j - 1;/* L90: */		}/*              Apply the row interchanges to A13, A23, and A33 *//*              columnwise. */		k2 = j - 1 + jb + j2;		i__3 = j3;		for (i__ = 1; i__ <= i__3; ++i__) {		    jj = k2 + i__;		    i__4 = j + jb - 1;		    for (ii = j + i__ - 1; ii <= i__4; ++ii) {			ip = ipiv[ii];			if (ip != ii) {			    temp = ab[kv + 1 + ii - jj + jj * ab_dim1];			    ab[kv + 1 + ii - jj + jj * ab_dim1] = ab[kv + 1 + 				    ip - jj + jj * ab_dim1];			    ab[kv + 1 + ip - jj + jj * ab_dim1] = temp;			}/* L100: */		    }/* L110: */		}/*              Update the relevant part of the trailing submatrix */		if (j2 > 0) {/*                 Update A12 */		    i__3 = *ldab - 1;		    i__4 = *ldab - 1;		    _starpu_dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &j2, 			    &c_b31, &ab[kv + 1 + j * ab_dim1], &i__3, &ab[kv 			    + 1 - jb + (j + jb) * ab_dim1], &i__4);		    if (i2 > 0) {/*                    Update A22 */			i__3 = *ldab - 1;			i__4 = *ldab - 1;			i__5 = *ldab - 1;			_starpu_dgemm_("No transpose", "No transpose", &i2, &j2, &jb, 				&c_b18, &ab[kv + 1 + jb + j * ab_dim1], &i__3, 				 &ab[kv + 1 - jb + (j + jb) * ab_dim1], &i__4, 				 &c_b31, &ab[kv + 1 + (j + jb) * ab_dim1], &				i__5);		    }		    if (i3 > 0) {/*                    Update A32 */			i__3 = *ldab - 1;			i__4 = *ldab - 1;			_starpu_dgemm_("No transpose", "No transpose", &i3, &j2, &jb, 				&c_b18, work31, &c__65, &ab[kv + 1 - jb + (j 				+ jb) * ab_dim1], &i__3, &c_b31, &ab[kv + *kl 				+ 1 - jb + (j + jb) * ab_dim1], &i__4);		    }		}		if (j3 > 0) {/*                 Copy the lower triangle of A13 into the work array *//*                 WORK13 */		    i__3 = j3;		    for (jj = 1; jj <= i__3; ++jj) {			i__4 = jb;			for (ii = jj; ii <= i__4; ++ii) {			    work13[ii + jj * 65 - 66] = ab[ii - jj + 1 + (jj 				    + j + kv - 1) * ab_dim1];/* L120: */			}/* L130: */		    }/*                 Update A13 in the work array */		    i__3 = *ldab - 1;		    _starpu_dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &j3, 			    &c_b31, &ab[kv + 1 + j * ab_dim1], &i__3, work13, 			    &c__65);		    if (i2 > 0) {/*                    Update A23 */			i__3 = *ldab - 1;			i__4 = *ldab - 1;			_starpu_dgemm_("No transpose", "No transpose", &i2, &j3, &jb, 				&c_b18, &ab[kv + 1 + jb + j * ab_dim1], &i__3, 				 work13, &c__65, &c_b31, &ab[jb + 1 + (j + kv)				 * ab_dim1], &i__4);		    }		    if (i3 > 0) {/*                    Update A33 */			i__3 = *ldab - 1;			_starpu_dgemm_("No transpose", "No transpose", &i3, &j3, &jb, 				&c_b18, work31, &c__65, work13, &c__65, &				c_b31, &ab[*kl + 1 + (j + kv) * ab_dim1], &				i__3);		    }/*                 Copy the lower triangle of A13 back into place */		    i__3 = j3;		    for (jj = 1; jj <= i__3; ++jj) {			i__4 = jb;			for (ii = jj; ii <= i__4; ++ii) {			    ab[ii - jj + 1 + (jj + j + kv - 1) * ab_dim1] = 				    work13[ii + jj * 65 - 66];/* L140: */			}/* L150: */		    }		}	    } else {/*              Adjust the pivot indices. */		i__3 = j + jb - 1;		for (i__ = j; i__ <= i__3; ++i__) {		    ipiv[i__] = ipiv[i__] + j - 1;/* L160: */		}	    }/*           Partially undo the interchanges in the current block to *//*           restore the upper triangular form of A31 and copy the upper *//*           triangle of A31 back into place */	    i__3 = j;	    for (jj = j + jb - 1; jj >= i__3; --jj) {		jp = ipiv[jj] - jj + 1;		if (jp != 1) {/*                 Apply interchange to columns J to JJ-1 */		    if (jp + jj - 1 < j + *kl) {/*                    The interchange does not affect A31 */			i__4 = jj - j;			i__5 = *ldab - 1;			i__6 = *ldab - 1;			_starpu_dswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], &				i__5, &ab[kv + jp + jj - j + j * ab_dim1], &				i__6);		    } else {/*                    The interchange does affect A31 */			i__4 = jj - j;			i__5 = *ldab - 1;			_starpu_dswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], &				i__5, &work31[jp + jj - j - *kl - 1], &c__65);		    }		}/*              Copy the current column of A31 back into place *//* Computing MIN */		i__4 = i3, i__5 = jj - j + 1;		nw = min(i__4,i__5);		if (nw > 0) {		    _starpu_dcopy_(&nw, &work31[(jj - j + 1) * 65 - 65], &c__1, &ab[			    kv + *kl + 1 - jj + j + jj * ab_dim1], &c__1);		}/* L170: */	    }/* L180: */	}    }    return 0;/*     End of DGBTRF */} /* _starpu_dgbtrf_ */
 |