dlaed6.c 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375
  1. /* dlaed6.f -- translated by f2c (version 20061008).
  2. You must link the resulting object file with libf2c:
  3. on Microsoft Windows system, link with libf2c.lib;
  4. on Linux or Unix systems, link with .../path/to/libf2c.a -lm
  5. or, if you install libf2c.a in a standard place, with -lf2c -lm
  6. -- in that order, at the end of the command line, as in
  7. cc *.o -lf2c -lm
  8. Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
  9. http://www.netlib.org/f2c/libf2c.zip
  10. */
  11. #include "f2c.h"
  12. #include "blaswrap.h"
  13. /* Subroutine */ int _starpu_dlaed6_(integer *kniter, logical *orgati, doublereal *
  14. rho, doublereal *d__, doublereal *z__, doublereal *finit, doublereal *
  15. tau, integer *info)
  16. {
  17. /* System generated locals */
  18. integer i__1;
  19. doublereal d__1, d__2, d__3, d__4;
  20. /* Builtin functions */
  21. double sqrt(doublereal), log(doublereal), pow_di(doublereal *, integer *);
  22. /* Local variables */
  23. doublereal a, b, c__, f;
  24. integer i__;
  25. doublereal fc, df, ddf, lbd, eta, ubd, eps, base;
  26. integer iter;
  27. doublereal temp, temp1, temp2, temp3, temp4;
  28. logical scale;
  29. integer niter;
  30. doublereal small1, small2, sminv1, sminv2;
  31. extern doublereal _starpu_dlamch_(char *);
  32. doublereal dscale[3], sclfac, zscale[3], erretm, sclinv;
  33. /* -- LAPACK routine (version 3.2) -- */
  34. /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
  35. /* February 2007 */
  36. /* .. Scalar Arguments .. */
  37. /* .. */
  38. /* .. Array Arguments .. */
  39. /* .. */
  40. /* Purpose */
  41. /* ======= */
  42. /* DLAED6 computes the positive or negative root (closest to the origin) */
  43. /* of */
  44. /* z(1) z(2) z(3) */
  45. /* f(x) = rho + --------- + ---------- + --------- */
  46. /* d(1)-x d(2)-x d(3)-x */
  47. /* It is assumed that */
  48. /* if ORGATI = .true. the root is between d(2) and d(3); */
  49. /* otherwise it is between d(1) and d(2) */
  50. /* This routine will be called by DLAED4 when necessary. In most cases, */
  51. /* the root sought is the smallest in magnitude, though it might not be */
  52. /* in some extremely rare situations. */
  53. /* Arguments */
  54. /* ========= */
  55. /* KNITER (input) INTEGER */
  56. /* Refer to DLAED4 for its significance. */
  57. /* ORGATI (input) LOGICAL */
  58. /* If ORGATI is true, the needed root is between d(2) and */
  59. /* d(3); otherwise it is between d(1) and d(2). See */
  60. /* DLAED4 for further details. */
  61. /* RHO (input) DOUBLE PRECISION */
  62. /* Refer to the equation f(x) above. */
  63. /* D (input) DOUBLE PRECISION array, dimension (3) */
  64. /* D satisfies d(1) < d(2) < d(3). */
  65. /* Z (input) DOUBLE PRECISION array, dimension (3) */
  66. /* Each of the elements in z must be positive. */
  67. /* FINIT (input) DOUBLE PRECISION */
  68. /* The value of f at 0. It is more accurate than the one */
  69. /* evaluated inside this routine (if someone wants to do */
  70. /* so). */
  71. /* TAU (output) DOUBLE PRECISION */
  72. /* The root of the equation f(x). */
  73. /* INFO (output) INTEGER */
  74. /* = 0: successful exit */
  75. /* > 0: if INFO = 1, failure to converge */
  76. /* Further Details */
  77. /* =============== */
  78. /* 30/06/99: Based on contributions by */
  79. /* Ren-Cang Li, Computer Science Division, University of California */
  80. /* at Berkeley, USA */
  81. /* 10/02/03: This version has a few statements commented out for thread */
  82. /* safety (machine parameters are computed on each entry). SJH. */
  83. /* 05/10/06: Modified from a new version of Ren-Cang Li, use */
  84. /* Gragg-Thornton-Warner cubic convergent scheme for better stability. */
  85. /* ===================================================================== */
  86. /* .. Parameters .. */
  87. /* .. */
  88. /* .. External Functions .. */
  89. /* .. */
  90. /* .. Local Arrays .. */
  91. /* .. */
  92. /* .. Local Scalars .. */
  93. /* .. */
  94. /* .. Intrinsic Functions .. */
  95. /* .. */
  96. /* .. Executable Statements .. */
  97. /* Parameter adjustments */
  98. --z__;
  99. --d__;
  100. /* Function Body */
  101. *info = 0;
  102. if (*orgati) {
  103. lbd = d__[2];
  104. ubd = d__[3];
  105. } else {
  106. lbd = d__[1];
  107. ubd = d__[2];
  108. }
  109. if (*finit < 0.) {
  110. lbd = 0.;
  111. } else {
  112. ubd = 0.;
  113. }
  114. niter = 1;
  115. *tau = 0.;
  116. if (*kniter == 2) {
  117. if (*orgati) {
  118. temp = (d__[3] - d__[2]) / 2.;
  119. c__ = *rho + z__[1] / (d__[1] - d__[2] - temp);
  120. a = c__ * (d__[2] + d__[3]) + z__[2] + z__[3];
  121. b = c__ * d__[2] * d__[3] + z__[2] * d__[3] + z__[3] * d__[2];
  122. } else {
  123. temp = (d__[1] - d__[2]) / 2.;
  124. c__ = *rho + z__[3] / (d__[3] - d__[2] - temp);
  125. a = c__ * (d__[1] + d__[2]) + z__[1] + z__[2];
  126. b = c__ * d__[1] * d__[2] + z__[1] * d__[2] + z__[2] * d__[1];
  127. }
  128. /* Computing MAX */
  129. d__1 = abs(a), d__2 = abs(b), d__1 = max(d__1,d__2), d__2 = abs(c__);
  130. temp = max(d__1,d__2);
  131. a /= temp;
  132. b /= temp;
  133. c__ /= temp;
  134. if (c__ == 0.) {
  135. *tau = b / a;
  136. } else if (a <= 0.) {
  137. *tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
  138. c__ * 2.);
  139. } else {
  140. *tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))
  141. ));
  142. }
  143. if (*tau < lbd || *tau > ubd) {
  144. *tau = (lbd + ubd) / 2.;
  145. }
  146. if (d__[1] == *tau || d__[2] == *tau || d__[3] == *tau) {
  147. *tau = 0.;
  148. } else {
  149. temp = *finit + *tau * z__[1] / (d__[1] * (d__[1] - *tau)) + *tau
  150. * z__[2] / (d__[2] * (d__[2] - *tau)) + *tau * z__[3] / (
  151. d__[3] * (d__[3] - *tau));
  152. if (temp <= 0.) {
  153. lbd = *tau;
  154. } else {
  155. ubd = *tau;
  156. }
  157. if (abs(*finit) <= abs(temp)) {
  158. *tau = 0.;
  159. }
  160. }
  161. }
  162. /* get machine parameters for possible scaling to avoid overflow */
  163. /* modified by Sven: parameters SMALL1, SMINV1, SMALL2, */
  164. /* SMINV2, EPS are not SAVEd anymore between one call to the */
  165. /* others but recomputed at each call */
  166. eps = _starpu_dlamch_("Epsilon");
  167. base = _starpu_dlamch_("Base");
  168. i__1 = (integer) (log(_starpu_dlamch_("SafMin")) / log(base) / 3.);
  169. small1 = pow_di(&base, &i__1);
  170. sminv1 = 1. / small1;
  171. small2 = small1 * small1;
  172. sminv2 = sminv1 * sminv1;
  173. /* Determine if scaling of inputs necessary to avoid overflow */
  174. /* when computing 1/TEMP**3 */
  175. if (*orgati) {
  176. /* Computing MIN */
  177. d__3 = (d__1 = d__[2] - *tau, abs(d__1)), d__4 = (d__2 = d__[3] - *
  178. tau, abs(d__2));
  179. temp = min(d__3,d__4);
  180. } else {
  181. /* Computing MIN */
  182. d__3 = (d__1 = d__[1] - *tau, abs(d__1)), d__4 = (d__2 = d__[2] - *
  183. tau, abs(d__2));
  184. temp = min(d__3,d__4);
  185. }
  186. scale = FALSE_;
  187. if (temp <= small1) {
  188. scale = TRUE_;
  189. if (temp <= small2) {
  190. /* Scale up by power of radix nearest 1/SAFMIN**(2/3) */
  191. sclfac = sminv2;
  192. sclinv = small2;
  193. } else {
  194. /* Scale up by power of radix nearest 1/SAFMIN**(1/3) */
  195. sclfac = sminv1;
  196. sclinv = small1;
  197. }
  198. /* Scaling up safe because D, Z, TAU scaled elsewhere to be O(1) */
  199. for (i__ = 1; i__ <= 3; ++i__) {
  200. dscale[i__ - 1] = d__[i__] * sclfac;
  201. zscale[i__ - 1] = z__[i__] * sclfac;
  202. /* L10: */
  203. }
  204. *tau *= sclfac;
  205. lbd *= sclfac;
  206. ubd *= sclfac;
  207. } else {
  208. /* Copy D and Z to DSCALE and ZSCALE */
  209. for (i__ = 1; i__ <= 3; ++i__) {
  210. dscale[i__ - 1] = d__[i__];
  211. zscale[i__ - 1] = z__[i__];
  212. /* L20: */
  213. }
  214. }
  215. fc = 0.;
  216. df = 0.;
  217. ddf = 0.;
  218. for (i__ = 1; i__ <= 3; ++i__) {
  219. temp = 1. / (dscale[i__ - 1] - *tau);
  220. temp1 = zscale[i__ - 1] * temp;
  221. temp2 = temp1 * temp;
  222. temp3 = temp2 * temp;
  223. fc += temp1 / dscale[i__ - 1];
  224. df += temp2;
  225. ddf += temp3;
  226. /* L30: */
  227. }
  228. f = *finit + *tau * fc;
  229. if (abs(f) <= 0.) {
  230. goto L60;
  231. }
  232. if (f <= 0.) {
  233. lbd = *tau;
  234. } else {
  235. ubd = *tau;
  236. }
  237. /* Iteration begins -- Use Gragg-Thornton-Warner cubic convergent */
  238. /* scheme */
  239. /* It is not hard to see that */
  240. /* 1) Iterations will go up monotonically */
  241. /* if FINIT < 0; */
  242. /* 2) Iterations will go down monotonically */
  243. /* if FINIT > 0. */
  244. iter = niter + 1;
  245. for (niter = iter; niter <= 40; ++niter) {
  246. if (*orgati) {
  247. temp1 = dscale[1] - *tau;
  248. temp2 = dscale[2] - *tau;
  249. } else {
  250. temp1 = dscale[0] - *tau;
  251. temp2 = dscale[1] - *tau;
  252. }
  253. a = (temp1 + temp2) * f - temp1 * temp2 * df;
  254. b = temp1 * temp2 * f;
  255. c__ = f - (temp1 + temp2) * df + temp1 * temp2 * ddf;
  256. /* Computing MAX */
  257. d__1 = abs(a), d__2 = abs(b), d__1 = max(d__1,d__2), d__2 = abs(c__);
  258. temp = max(d__1,d__2);
  259. a /= temp;
  260. b /= temp;
  261. c__ /= temp;
  262. if (c__ == 0.) {
  263. eta = b / a;
  264. } else if (a <= 0.) {
  265. eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__
  266. * 2.);
  267. } else {
  268. eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))
  269. );
  270. }
  271. if (f * eta >= 0.) {
  272. eta = -f / df;
  273. }
  274. *tau += eta;
  275. if (*tau < lbd || *tau > ubd) {
  276. *tau = (lbd + ubd) / 2.;
  277. }
  278. fc = 0.;
  279. erretm = 0.;
  280. df = 0.;
  281. ddf = 0.;
  282. for (i__ = 1; i__ <= 3; ++i__) {
  283. temp = 1. / (dscale[i__ - 1] - *tau);
  284. temp1 = zscale[i__ - 1] * temp;
  285. temp2 = temp1 * temp;
  286. temp3 = temp2 * temp;
  287. temp4 = temp1 / dscale[i__ - 1];
  288. fc += temp4;
  289. erretm += abs(temp4);
  290. df += temp2;
  291. ddf += temp3;
  292. /* L40: */
  293. }
  294. f = *finit + *tau * fc;
  295. erretm = (abs(*finit) + abs(*tau) * erretm) * 8. + abs(*tau) * df;
  296. if (abs(f) <= eps * erretm) {
  297. goto L60;
  298. }
  299. if (f <= 0.) {
  300. lbd = *tau;
  301. } else {
  302. ubd = *tau;
  303. }
  304. /* L50: */
  305. }
  306. *info = 1;
  307. L60:
  308. /* Undo scaling */
  309. if (scale) {
  310. *tau *= sclinv;
  311. }
  312. return 0;
  313. /* End of DLAED6 */
  314. } /* _starpu_dlaed6_ */