123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375 |
- /* dlaed6.f -- translated by f2c (version 20061008).
- You must link the resulting object file with libf2c:
- on Microsoft Windows system, link with libf2c.lib;
- on Linux or Unix systems, link with .../path/to/libf2c.a -lm
- or, if you install libf2c.a in a standard place, with -lf2c -lm
- -- in that order, at the end of the command line, as in
- cc *.o -lf2c -lm
- Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
- http://www.netlib.org/f2c/libf2c.zip
- */
- #include "f2c.h"
- #include "blaswrap.h"
- /* Subroutine */ int _starpu_dlaed6_(integer *kniter, logical *orgati, doublereal *
- rho, doublereal *d__, doublereal *z__, doublereal *finit, doublereal *
- tau, integer *info)
- {
- /* System generated locals */
- integer i__1;
- doublereal d__1, d__2, d__3, d__4;
- /* Builtin functions */
- double sqrt(doublereal), log(doublereal), pow_di(doublereal *, integer *);
- /* Local variables */
- doublereal a, b, c__, f;
- integer i__;
- doublereal fc, df, ddf, lbd, eta, ubd, eps, base;
- integer iter;
- doublereal temp, temp1, temp2, temp3, temp4;
- logical scale;
- integer niter;
- doublereal small1, small2, sminv1, sminv2;
- extern doublereal _starpu_dlamch_(char *);
- doublereal dscale[3], sclfac, zscale[3], erretm, sclinv;
- /* -- LAPACK routine (version 3.2) -- */
- /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
- /* February 2007 */
- /* .. Scalar Arguments .. */
- /* .. */
- /* .. Array Arguments .. */
- /* .. */
- /* Purpose */
- /* ======= */
- /* DLAED6 computes the positive or negative root (closest to the origin) */
- /* of */
- /* z(1) z(2) z(3) */
- /* f(x) = rho + --------- + ---------- + --------- */
- /* d(1)-x d(2)-x d(3)-x */
- /* It is assumed that */
- /* if ORGATI = .true. the root is between d(2) and d(3); */
- /* otherwise it is between d(1) and d(2) */
- /* This routine will be called by DLAED4 when necessary. In most cases, */
- /* the root sought is the smallest in magnitude, though it might not be */
- /* in some extremely rare situations. */
- /* Arguments */
- /* ========= */
- /* KNITER (input) INTEGER */
- /* Refer to DLAED4 for its significance. */
- /* ORGATI (input) LOGICAL */
- /* If ORGATI is true, the needed root is between d(2) and */
- /* d(3); otherwise it is between d(1) and d(2). See */
- /* DLAED4 for further details. */
- /* RHO (input) DOUBLE PRECISION */
- /* Refer to the equation f(x) above. */
- /* D (input) DOUBLE PRECISION array, dimension (3) */
- /* D satisfies d(1) < d(2) < d(3). */
- /* Z (input) DOUBLE PRECISION array, dimension (3) */
- /* Each of the elements in z must be positive. */
- /* FINIT (input) DOUBLE PRECISION */
- /* The value of f at 0. It is more accurate than the one */
- /* evaluated inside this routine (if someone wants to do */
- /* so). */
- /* TAU (output) DOUBLE PRECISION */
- /* The root of the equation f(x). */
- /* INFO (output) INTEGER */
- /* = 0: successful exit */
- /* > 0: if INFO = 1, failure to converge */
- /* Further Details */
- /* =============== */
- /* 30/06/99: Based on contributions by */
- /* Ren-Cang Li, Computer Science Division, University of California */
- /* at Berkeley, USA */
- /* 10/02/03: This version has a few statements commented out for thread */
- /* safety (machine parameters are computed on each entry). SJH. */
- /* 05/10/06: Modified from a new version of Ren-Cang Li, use */
- /* Gragg-Thornton-Warner cubic convergent scheme for better stability. */
- /* ===================================================================== */
- /* .. Parameters .. */
- /* .. */
- /* .. External Functions .. */
- /* .. */
- /* .. Local Arrays .. */
- /* .. */
- /* .. Local Scalars .. */
- /* .. */
- /* .. Intrinsic Functions .. */
- /* .. */
- /* .. Executable Statements .. */
- /* Parameter adjustments */
- --z__;
- --d__;
- /* Function Body */
- *info = 0;
- if (*orgati) {
- lbd = d__[2];
- ubd = d__[3];
- } else {
- lbd = d__[1];
- ubd = d__[2];
- }
- if (*finit < 0.) {
- lbd = 0.;
- } else {
- ubd = 0.;
- }
- niter = 1;
- *tau = 0.;
- if (*kniter == 2) {
- if (*orgati) {
- temp = (d__[3] - d__[2]) / 2.;
- c__ = *rho + z__[1] / (d__[1] - d__[2] - temp);
- a = c__ * (d__[2] + d__[3]) + z__[2] + z__[3];
- b = c__ * d__[2] * d__[3] + z__[2] * d__[3] + z__[3] * d__[2];
- } else {
- temp = (d__[1] - d__[2]) / 2.;
- c__ = *rho + z__[3] / (d__[3] - d__[2] - temp);
- a = c__ * (d__[1] + d__[2]) + z__[1] + z__[2];
- b = c__ * d__[1] * d__[2] + z__[1] * d__[2] + z__[2] * d__[1];
- }
- /* Computing MAX */
- d__1 = abs(a), d__2 = abs(b), d__1 = max(d__1,d__2), d__2 = abs(c__);
- temp = max(d__1,d__2);
- a /= temp;
- b /= temp;
- c__ /= temp;
- if (c__ == 0.) {
- *tau = b / a;
- } else if (a <= 0.) {
- *tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
- c__ * 2.);
- } else {
- *tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))
- ));
- }
- if (*tau < lbd || *tau > ubd) {
- *tau = (lbd + ubd) / 2.;
- }
- if (d__[1] == *tau || d__[2] == *tau || d__[3] == *tau) {
- *tau = 0.;
- } else {
- temp = *finit + *tau * z__[1] / (d__[1] * (d__[1] - *tau)) + *tau
- * z__[2] / (d__[2] * (d__[2] - *tau)) + *tau * z__[3] / (
- d__[3] * (d__[3] - *tau));
- if (temp <= 0.) {
- lbd = *tau;
- } else {
- ubd = *tau;
- }
- if (abs(*finit) <= abs(temp)) {
- *tau = 0.;
- }
- }
- }
- /* get machine parameters for possible scaling to avoid overflow */
- /* modified by Sven: parameters SMALL1, SMINV1, SMALL2, */
- /* SMINV2, EPS are not SAVEd anymore between one call to the */
- /* others but recomputed at each call */
- eps = _starpu_dlamch_("Epsilon");
- base = _starpu_dlamch_("Base");
- i__1 = (integer) (log(_starpu_dlamch_("SafMin")) / log(base) / 3.);
- small1 = pow_di(&base, &i__1);
- sminv1 = 1. / small1;
- small2 = small1 * small1;
- sminv2 = sminv1 * sminv1;
- /* Determine if scaling of inputs necessary to avoid overflow */
- /* when computing 1/TEMP**3 */
- if (*orgati) {
- /* Computing MIN */
- d__3 = (d__1 = d__[2] - *tau, abs(d__1)), d__4 = (d__2 = d__[3] - *
- tau, abs(d__2));
- temp = min(d__3,d__4);
- } else {
- /* Computing MIN */
- d__3 = (d__1 = d__[1] - *tau, abs(d__1)), d__4 = (d__2 = d__[2] - *
- tau, abs(d__2));
- temp = min(d__3,d__4);
- }
- scale = FALSE_;
- if (temp <= small1) {
- scale = TRUE_;
- if (temp <= small2) {
- /* Scale up by power of radix nearest 1/SAFMIN**(2/3) */
- sclfac = sminv2;
- sclinv = small2;
- } else {
- /* Scale up by power of radix nearest 1/SAFMIN**(1/3) */
- sclfac = sminv1;
- sclinv = small1;
- }
- /* Scaling up safe because D, Z, TAU scaled elsewhere to be O(1) */
- for (i__ = 1; i__ <= 3; ++i__) {
- dscale[i__ - 1] = d__[i__] * sclfac;
- zscale[i__ - 1] = z__[i__] * sclfac;
- /* L10: */
- }
- *tau *= sclfac;
- lbd *= sclfac;
- ubd *= sclfac;
- } else {
- /* Copy D and Z to DSCALE and ZSCALE */
- for (i__ = 1; i__ <= 3; ++i__) {
- dscale[i__ - 1] = d__[i__];
- zscale[i__ - 1] = z__[i__];
- /* L20: */
- }
- }
- fc = 0.;
- df = 0.;
- ddf = 0.;
- for (i__ = 1; i__ <= 3; ++i__) {
- temp = 1. / (dscale[i__ - 1] - *tau);
- temp1 = zscale[i__ - 1] * temp;
- temp2 = temp1 * temp;
- temp3 = temp2 * temp;
- fc += temp1 / dscale[i__ - 1];
- df += temp2;
- ddf += temp3;
- /* L30: */
- }
- f = *finit + *tau * fc;
- if (abs(f) <= 0.) {
- goto L60;
- }
- if (f <= 0.) {
- lbd = *tau;
- } else {
- ubd = *tau;
- }
- /* Iteration begins -- Use Gragg-Thornton-Warner cubic convergent */
- /* scheme */
- /* It is not hard to see that */
- /* 1) Iterations will go up monotonically */
- /* if FINIT < 0; */
- /* 2) Iterations will go down monotonically */
- /* if FINIT > 0. */
- iter = niter + 1;
- for (niter = iter; niter <= 40; ++niter) {
- if (*orgati) {
- temp1 = dscale[1] - *tau;
- temp2 = dscale[2] - *tau;
- } else {
- temp1 = dscale[0] - *tau;
- temp2 = dscale[1] - *tau;
- }
- a = (temp1 + temp2) * f - temp1 * temp2 * df;
- b = temp1 * temp2 * f;
- c__ = f - (temp1 + temp2) * df + temp1 * temp2 * ddf;
- /* Computing MAX */
- d__1 = abs(a), d__2 = abs(b), d__1 = max(d__1,d__2), d__2 = abs(c__);
- temp = max(d__1,d__2);
- a /= temp;
- b /= temp;
- c__ /= temp;
- if (c__ == 0.) {
- eta = b / a;
- } else if (a <= 0.) {
- eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__
- * 2.);
- } else {
- eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))
- );
- }
- if (f * eta >= 0.) {
- eta = -f / df;
- }
- *tau += eta;
- if (*tau < lbd || *tau > ubd) {
- *tau = (lbd + ubd) / 2.;
- }
- fc = 0.;
- erretm = 0.;
- df = 0.;
- ddf = 0.;
- for (i__ = 1; i__ <= 3; ++i__) {
- temp = 1. / (dscale[i__ - 1] - *tau);
- temp1 = zscale[i__ - 1] * temp;
- temp2 = temp1 * temp;
- temp3 = temp2 * temp;
- temp4 = temp1 / dscale[i__ - 1];
- fc += temp4;
- erretm += abs(temp4);
- df += temp2;
- ddf += temp3;
- /* L40: */
- }
- f = *finit + *tau * fc;
- erretm = (abs(*finit) + abs(*tau) * erretm) * 8. + abs(*tau) * df;
- if (abs(f) <= eps * erretm) {
- goto L60;
- }
- if (f <= 0.) {
- lbd = *tau;
- } else {
- ubd = *tau;
- }
- /* L50: */
- }
- *info = 1;
- L60:
- /* Undo scaling */
- if (scale) {
- *tau *= sclinv;
- }
- return 0;
- /* End of DLAED6 */
- } /* _starpu_dlaed6_ */
|