dlaed4.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955
  1. /* dlaed4.f -- translated by f2c (version 20061008).
  2. You must link the resulting object file with libf2c:
  3. on Microsoft Windows system, link with libf2c.lib;
  4. on Linux or Unix systems, link with .../path/to/libf2c.a -lm
  5. or, if you install libf2c.a in a standard place, with -lf2c -lm
  6. -- in that order, at the end of the command line, as in
  7. cc *.o -lf2c -lm
  8. Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
  9. http://www.netlib.org/f2c/libf2c.zip
  10. */
  11. #include "f2c.h"
  12. #include "blaswrap.h"
  13. /* Subroutine */ int _starpu_dlaed4_(integer *n, integer *i__, doublereal *d__,
  14. doublereal *z__, doublereal *delta, doublereal *rho, doublereal *dlam,
  15. integer *info)
  16. {
  17. /* System generated locals */
  18. integer i__1;
  19. doublereal d__1;
  20. /* Builtin functions */
  21. double sqrt(doublereal);
  22. /* Local variables */
  23. doublereal a, b, c__;
  24. integer j;
  25. doublereal w;
  26. integer ii;
  27. doublereal dw, zz[3];
  28. integer ip1;
  29. doublereal del, eta, phi, eps, tau, psi;
  30. integer iim1, iip1;
  31. doublereal dphi, dpsi;
  32. integer iter;
  33. doublereal temp, prew, temp1, dltlb, dltub, midpt;
  34. integer niter;
  35. logical swtch;
  36. extern /* Subroutine */ int _starpu_dlaed5_(integer *, doublereal *, doublereal *,
  37. doublereal *, doublereal *, doublereal *), _starpu_dlaed6_(integer *,
  38. logical *, doublereal *, doublereal *, doublereal *, doublereal *,
  39. doublereal *, integer *);
  40. logical swtch3;
  41. extern doublereal _starpu_dlamch_(char *);
  42. logical orgati;
  43. doublereal erretm, rhoinv;
  44. /* -- LAPACK routine (version 3.2) -- */
  45. /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
  46. /* November 2006 */
  47. /* .. Scalar Arguments .. */
  48. /* .. */
  49. /* .. Array Arguments .. */
  50. /* .. */
  51. /* Purpose */
  52. /* ======= */
  53. /* This subroutine computes the I-th updated eigenvalue of a symmetric */
  54. /* rank-one modification to a diagonal matrix whose elements are */
  55. /* given in the array d, and that */
  56. /* D(i) < D(j) for i < j */
  57. /* and that RHO > 0. This is arranged by the calling routine, and is */
  58. /* no loss in generality. The rank-one modified system is thus */
  59. /* diag( D ) + RHO * Z * Z_transpose. */
  60. /* where we assume the Euclidean norm of Z is 1. */
  61. /* The method consists of approximating the rational functions in the */
  62. /* secular equation by simpler interpolating rational functions. */
  63. /* Arguments */
  64. /* ========= */
  65. /* N (input) INTEGER */
  66. /* The length of all arrays. */
  67. /* I (input) INTEGER */
  68. /* The index of the eigenvalue to be computed. 1 <= I <= N. */
  69. /* D (input) DOUBLE PRECISION array, dimension (N) */
  70. /* The original eigenvalues. It is assumed that they are in */
  71. /* order, D(I) < D(J) for I < J. */
  72. /* Z (input) DOUBLE PRECISION array, dimension (N) */
  73. /* The components of the updating vector. */
  74. /* DELTA (output) DOUBLE PRECISION array, dimension (N) */
  75. /* If N .GT. 2, DELTA contains (D(j) - lambda_I) in its j-th */
  76. /* component. If N = 1, then DELTA(1) = 1. If N = 2, see DLAED5 */
  77. /* for detail. The vector DELTA contains the information necessary */
  78. /* to construct the eigenvectors by DLAED3 and DLAED9. */
  79. /* RHO (input) DOUBLE PRECISION */
  80. /* The scalar in the symmetric updating formula. */
  81. /* DLAM (output) DOUBLE PRECISION */
  82. /* The computed lambda_I, the I-th updated eigenvalue. */
  83. /* INFO (output) INTEGER */
  84. /* = 0: successful exit */
  85. /* > 0: if INFO = 1, the updating process failed. */
  86. /* Internal Parameters */
  87. /* =================== */
  88. /* Logical variable ORGATI (origin-at-i?) is used for distinguishing */
  89. /* whether D(i) or D(i+1) is treated as the origin. */
  90. /* ORGATI = .true. origin at i */
  91. /* ORGATI = .false. origin at i+1 */
  92. /* Logical variable SWTCH3 (switch-for-3-poles?) is for noting */
  93. /* if we are working with THREE poles! */
  94. /* MAXIT is the maximum number of iterations allowed for each */
  95. /* eigenvalue. */
  96. /* Further Details */
  97. /* =============== */
  98. /* Based on contributions by */
  99. /* Ren-Cang Li, Computer Science Division, University of California */
  100. /* at Berkeley, USA */
  101. /* ===================================================================== */
  102. /* .. Parameters .. */
  103. /* .. */
  104. /* .. Local Scalars .. */
  105. /* .. */
  106. /* .. Local Arrays .. */
  107. /* .. */
  108. /* .. External Functions .. */
  109. /* .. */
  110. /* .. External Subroutines .. */
  111. /* .. */
  112. /* .. Intrinsic Functions .. */
  113. /* .. */
  114. /* .. Executable Statements .. */
  115. /* Since this routine is called in an inner loop, we do no argument */
  116. /* checking. */
  117. /* Quick return for N=1 and 2. */
  118. /* Parameter adjustments */
  119. --delta;
  120. --z__;
  121. --d__;
  122. /* Function Body */
  123. *info = 0;
  124. if (*n == 1) {
  125. /* Presumably, I=1 upon entry */
  126. *dlam = d__[1] + *rho * z__[1] * z__[1];
  127. delta[1] = 1.;
  128. return 0;
  129. }
  130. if (*n == 2) {
  131. _starpu_dlaed5_(i__, &d__[1], &z__[1], &delta[1], rho, dlam);
  132. return 0;
  133. }
  134. /* Compute machine epsilon */
  135. eps = _starpu_dlamch_("Epsilon");
  136. rhoinv = 1. / *rho;
  137. /* The case I = N */
  138. if (*i__ == *n) {
  139. /* Initialize some basic variables */
  140. ii = *n - 1;
  141. niter = 1;
  142. /* Calculate initial guess */
  143. midpt = *rho / 2.;
  144. /* If ||Z||_2 is not one, then TEMP should be set to */
  145. /* RHO * ||Z||_2^2 / TWO */
  146. i__1 = *n;
  147. for (j = 1; j <= i__1; ++j) {
  148. delta[j] = d__[j] - d__[*i__] - midpt;
  149. /* L10: */
  150. }
  151. psi = 0.;
  152. i__1 = *n - 2;
  153. for (j = 1; j <= i__1; ++j) {
  154. psi += z__[j] * z__[j] / delta[j];
  155. /* L20: */
  156. }
  157. c__ = rhoinv + psi;
  158. w = c__ + z__[ii] * z__[ii] / delta[ii] + z__[*n] * z__[*n] / delta[*
  159. n];
  160. if (w <= 0.) {
  161. temp = z__[*n - 1] * z__[*n - 1] / (d__[*n] - d__[*n - 1] + *rho)
  162. + z__[*n] * z__[*n] / *rho;
  163. if (c__ <= temp) {
  164. tau = *rho;
  165. } else {
  166. del = d__[*n] - d__[*n - 1];
  167. a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n]
  168. ;
  169. b = z__[*n] * z__[*n] * del;
  170. if (a < 0.) {
  171. tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a);
  172. } else {
  173. tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.);
  174. }
  175. }
  176. /* It can be proved that */
  177. /* D(N)+RHO/2 <= LAMBDA(N) < D(N)+TAU <= D(N)+RHO */
  178. dltlb = midpt;
  179. dltub = *rho;
  180. } else {
  181. del = d__[*n] - d__[*n - 1];
  182. a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n];
  183. b = z__[*n] * z__[*n] * del;
  184. if (a < 0.) {
  185. tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a);
  186. } else {
  187. tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.);
  188. }
  189. /* It can be proved that */
  190. /* D(N) < D(N)+TAU < LAMBDA(N) < D(N)+RHO/2 */
  191. dltlb = 0.;
  192. dltub = midpt;
  193. }
  194. i__1 = *n;
  195. for (j = 1; j <= i__1; ++j) {
  196. delta[j] = d__[j] - d__[*i__] - tau;
  197. /* L30: */
  198. }
  199. /* Evaluate PSI and the derivative DPSI */
  200. dpsi = 0.;
  201. psi = 0.;
  202. erretm = 0.;
  203. i__1 = ii;
  204. for (j = 1; j <= i__1; ++j) {
  205. temp = z__[j] / delta[j];
  206. psi += z__[j] * temp;
  207. dpsi += temp * temp;
  208. erretm += psi;
  209. /* L40: */
  210. }
  211. erretm = abs(erretm);
  212. /* Evaluate PHI and the derivative DPHI */
  213. temp = z__[*n] / delta[*n];
  214. phi = z__[*n] * temp;
  215. dphi = temp * temp;
  216. erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi
  217. + dphi);
  218. w = rhoinv + phi + psi;
  219. /* Test for convergence */
  220. if (abs(w) <= eps * erretm) {
  221. *dlam = d__[*i__] + tau;
  222. goto L250;
  223. }
  224. if (w <= 0.) {
  225. dltlb = max(dltlb,tau);
  226. } else {
  227. dltub = min(dltub,tau);
  228. }
  229. /* Calculate the new step */
  230. ++niter;
  231. c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi;
  232. a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * (
  233. dpsi + dphi);
  234. b = delta[*n - 1] * delta[*n] * w;
  235. if (c__ < 0.) {
  236. c__ = abs(c__);
  237. }
  238. if (c__ == 0.) {
  239. /* ETA = B/A */
  240. /* ETA = RHO - TAU */
  241. eta = dltub - tau;
  242. } else if (a >= 0.) {
  243. eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__
  244. * 2.);
  245. } else {
  246. eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))
  247. );
  248. }
  249. /* Note, eta should be positive if w is negative, and */
  250. /* eta should be negative otherwise. However, */
  251. /* if for some reason caused by roundoff, eta*w > 0, */
  252. /* we simply use one Newton step instead. This way */
  253. /* will guarantee eta*w < 0. */
  254. if (w * eta > 0.) {
  255. eta = -w / (dpsi + dphi);
  256. }
  257. temp = tau + eta;
  258. if (temp > dltub || temp < dltlb) {
  259. if (w < 0.) {
  260. eta = (dltub - tau) / 2.;
  261. } else {
  262. eta = (dltlb - tau) / 2.;
  263. }
  264. }
  265. i__1 = *n;
  266. for (j = 1; j <= i__1; ++j) {
  267. delta[j] -= eta;
  268. /* L50: */
  269. }
  270. tau += eta;
  271. /* Evaluate PSI and the derivative DPSI */
  272. dpsi = 0.;
  273. psi = 0.;
  274. erretm = 0.;
  275. i__1 = ii;
  276. for (j = 1; j <= i__1; ++j) {
  277. temp = z__[j] / delta[j];
  278. psi += z__[j] * temp;
  279. dpsi += temp * temp;
  280. erretm += psi;
  281. /* L60: */
  282. }
  283. erretm = abs(erretm);
  284. /* Evaluate PHI and the derivative DPHI */
  285. temp = z__[*n] / delta[*n];
  286. phi = z__[*n] * temp;
  287. dphi = temp * temp;
  288. erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi
  289. + dphi);
  290. w = rhoinv + phi + psi;
  291. /* Main loop to update the values of the array DELTA */
  292. iter = niter + 1;
  293. for (niter = iter; niter <= 30; ++niter) {
  294. /* Test for convergence */
  295. if (abs(w) <= eps * erretm) {
  296. *dlam = d__[*i__] + tau;
  297. goto L250;
  298. }
  299. if (w <= 0.) {
  300. dltlb = max(dltlb,tau);
  301. } else {
  302. dltub = min(dltub,tau);
  303. }
  304. /* Calculate the new step */
  305. c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi;
  306. a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] *
  307. (dpsi + dphi);
  308. b = delta[*n - 1] * delta[*n] * w;
  309. if (a >= 0.) {
  310. eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
  311. c__ * 2.);
  312. } else {
  313. eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(
  314. d__1))));
  315. }
  316. /* Note, eta should be positive if w is negative, and */
  317. /* eta should be negative otherwise. However, */
  318. /* if for some reason caused by roundoff, eta*w > 0, */
  319. /* we simply use one Newton step instead. This way */
  320. /* will guarantee eta*w < 0. */
  321. if (w * eta > 0.) {
  322. eta = -w / (dpsi + dphi);
  323. }
  324. temp = tau + eta;
  325. if (temp > dltub || temp < dltlb) {
  326. if (w < 0.) {
  327. eta = (dltub - tau) / 2.;
  328. } else {
  329. eta = (dltlb - tau) / 2.;
  330. }
  331. }
  332. i__1 = *n;
  333. for (j = 1; j <= i__1; ++j) {
  334. delta[j] -= eta;
  335. /* L70: */
  336. }
  337. tau += eta;
  338. /* Evaluate PSI and the derivative DPSI */
  339. dpsi = 0.;
  340. psi = 0.;
  341. erretm = 0.;
  342. i__1 = ii;
  343. for (j = 1; j <= i__1; ++j) {
  344. temp = z__[j] / delta[j];
  345. psi += z__[j] * temp;
  346. dpsi += temp * temp;
  347. erretm += psi;
  348. /* L80: */
  349. }
  350. erretm = abs(erretm);
  351. /* Evaluate PHI and the derivative DPHI */
  352. temp = z__[*n] / delta[*n];
  353. phi = z__[*n] * temp;
  354. dphi = temp * temp;
  355. erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (
  356. dpsi + dphi);
  357. w = rhoinv + phi + psi;
  358. /* L90: */
  359. }
  360. /* Return with INFO = 1, NITER = MAXIT and not converged */
  361. *info = 1;
  362. *dlam = d__[*i__] + tau;
  363. goto L250;
  364. /* End for the case I = N */
  365. } else {
  366. /* The case for I < N */
  367. niter = 1;
  368. ip1 = *i__ + 1;
  369. /* Calculate initial guess */
  370. del = d__[ip1] - d__[*i__];
  371. midpt = del / 2.;
  372. i__1 = *n;
  373. for (j = 1; j <= i__1; ++j) {
  374. delta[j] = d__[j] - d__[*i__] - midpt;
  375. /* L100: */
  376. }
  377. psi = 0.;
  378. i__1 = *i__ - 1;
  379. for (j = 1; j <= i__1; ++j) {
  380. psi += z__[j] * z__[j] / delta[j];
  381. /* L110: */
  382. }
  383. phi = 0.;
  384. i__1 = *i__ + 2;
  385. for (j = *n; j >= i__1; --j) {
  386. phi += z__[j] * z__[j] / delta[j];
  387. /* L120: */
  388. }
  389. c__ = rhoinv + psi + phi;
  390. w = c__ + z__[*i__] * z__[*i__] / delta[*i__] + z__[ip1] * z__[ip1] /
  391. delta[ip1];
  392. if (w > 0.) {
  393. /* d(i)< the ith eigenvalue < (d(i)+d(i+1))/2 */
  394. /* We choose d(i) as origin. */
  395. orgati = TRUE_;
  396. a = c__ * del + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1];
  397. b = z__[*i__] * z__[*i__] * del;
  398. if (a > 0.) {
  399. tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(
  400. d__1))));
  401. } else {
  402. tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
  403. c__ * 2.);
  404. }
  405. dltlb = 0.;
  406. dltub = midpt;
  407. } else {
  408. /* (d(i)+d(i+1))/2 <= the ith eigenvalue < d(i+1) */
  409. /* We choose d(i+1) as origin. */
  410. orgati = FALSE_;
  411. a = c__ * del - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1];
  412. b = z__[ip1] * z__[ip1] * del;
  413. if (a < 0.) {
  414. tau = b * 2. / (a - sqrt((d__1 = a * a + b * 4. * c__, abs(
  415. d__1))));
  416. } else {
  417. tau = -(a + sqrt((d__1 = a * a + b * 4. * c__, abs(d__1)))) /
  418. (c__ * 2.);
  419. }
  420. dltlb = -midpt;
  421. dltub = 0.;
  422. }
  423. if (orgati) {
  424. i__1 = *n;
  425. for (j = 1; j <= i__1; ++j) {
  426. delta[j] = d__[j] - d__[*i__] - tau;
  427. /* L130: */
  428. }
  429. } else {
  430. i__1 = *n;
  431. for (j = 1; j <= i__1; ++j) {
  432. delta[j] = d__[j] - d__[ip1] - tau;
  433. /* L140: */
  434. }
  435. }
  436. if (orgati) {
  437. ii = *i__;
  438. } else {
  439. ii = *i__ + 1;
  440. }
  441. iim1 = ii - 1;
  442. iip1 = ii + 1;
  443. /* Evaluate PSI and the derivative DPSI */
  444. dpsi = 0.;
  445. psi = 0.;
  446. erretm = 0.;
  447. i__1 = iim1;
  448. for (j = 1; j <= i__1; ++j) {
  449. temp = z__[j] / delta[j];
  450. psi += z__[j] * temp;
  451. dpsi += temp * temp;
  452. erretm += psi;
  453. /* L150: */
  454. }
  455. erretm = abs(erretm);
  456. /* Evaluate PHI and the derivative DPHI */
  457. dphi = 0.;
  458. phi = 0.;
  459. i__1 = iip1;
  460. for (j = *n; j >= i__1; --j) {
  461. temp = z__[j] / delta[j];
  462. phi += z__[j] * temp;
  463. dphi += temp * temp;
  464. erretm += phi;
  465. /* L160: */
  466. }
  467. w = rhoinv + phi + psi;
  468. /* W is the value of the secular function with */
  469. /* its ii-th element removed. */
  470. swtch3 = FALSE_;
  471. if (orgati) {
  472. if (w < 0.) {
  473. swtch3 = TRUE_;
  474. }
  475. } else {
  476. if (w > 0.) {
  477. swtch3 = TRUE_;
  478. }
  479. }
  480. if (ii == 1 || ii == *n) {
  481. swtch3 = FALSE_;
  482. }
  483. temp = z__[ii] / delta[ii];
  484. dw = dpsi + dphi + temp * temp;
  485. temp = z__[ii] * temp;
  486. w += temp;
  487. erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. +
  488. abs(tau) * dw;
  489. /* Test for convergence */
  490. if (abs(w) <= eps * erretm) {
  491. if (orgati) {
  492. *dlam = d__[*i__] + tau;
  493. } else {
  494. *dlam = d__[ip1] + tau;
  495. }
  496. goto L250;
  497. }
  498. if (w <= 0.) {
  499. dltlb = max(dltlb,tau);
  500. } else {
  501. dltub = min(dltub,tau);
  502. }
  503. /* Calculate the new step */
  504. ++niter;
  505. if (! swtch3) {
  506. if (orgati) {
  507. /* Computing 2nd power */
  508. d__1 = z__[*i__] / delta[*i__];
  509. c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (d__1 *
  510. d__1);
  511. } else {
  512. /* Computing 2nd power */
  513. d__1 = z__[ip1] / delta[ip1];
  514. c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * (d__1 *
  515. d__1);
  516. }
  517. a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] *
  518. dw;
  519. b = delta[*i__] * delta[ip1] * w;
  520. if (c__ == 0.) {
  521. if (a == 0.) {
  522. if (orgati) {
  523. a = z__[*i__] * z__[*i__] + delta[ip1] * delta[ip1] *
  524. (dpsi + dphi);
  525. } else {
  526. a = z__[ip1] * z__[ip1] + delta[*i__] * delta[*i__] *
  527. (dpsi + dphi);
  528. }
  529. }
  530. eta = b / a;
  531. } else if (a <= 0.) {
  532. eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
  533. c__ * 2.);
  534. } else {
  535. eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(
  536. d__1))));
  537. }
  538. } else {
  539. /* Interpolation using THREE most relevant poles */
  540. temp = rhoinv + psi + phi;
  541. if (orgati) {
  542. temp1 = z__[iim1] / delta[iim1];
  543. temp1 *= temp1;
  544. c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] - d__[
  545. iip1]) * temp1;
  546. zz[0] = z__[iim1] * z__[iim1];
  547. zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + dphi);
  548. } else {
  549. temp1 = z__[iip1] / delta[iip1];
  550. temp1 *= temp1;
  551. c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] - d__[
  552. iim1]) * temp1;
  553. zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - temp1));
  554. zz[2] = z__[iip1] * z__[iip1];
  555. }
  556. zz[1] = z__[ii] * z__[ii];
  557. _starpu_dlaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, info);
  558. if (*info != 0) {
  559. goto L250;
  560. }
  561. }
  562. /* Note, eta should be positive if w is negative, and */
  563. /* eta should be negative otherwise. However, */
  564. /* if for some reason caused by roundoff, eta*w > 0, */
  565. /* we simply use one Newton step instead. This way */
  566. /* will guarantee eta*w < 0. */
  567. if (w * eta >= 0.) {
  568. eta = -w / dw;
  569. }
  570. temp = tau + eta;
  571. if (temp > dltub || temp < dltlb) {
  572. if (w < 0.) {
  573. eta = (dltub - tau) / 2.;
  574. } else {
  575. eta = (dltlb - tau) / 2.;
  576. }
  577. }
  578. prew = w;
  579. i__1 = *n;
  580. for (j = 1; j <= i__1; ++j) {
  581. delta[j] -= eta;
  582. /* L180: */
  583. }
  584. /* Evaluate PSI and the derivative DPSI */
  585. dpsi = 0.;
  586. psi = 0.;
  587. erretm = 0.;
  588. i__1 = iim1;
  589. for (j = 1; j <= i__1; ++j) {
  590. temp = z__[j] / delta[j];
  591. psi += z__[j] * temp;
  592. dpsi += temp * temp;
  593. erretm += psi;
  594. /* L190: */
  595. }
  596. erretm = abs(erretm);
  597. /* Evaluate PHI and the derivative DPHI */
  598. dphi = 0.;
  599. phi = 0.;
  600. i__1 = iip1;
  601. for (j = *n; j >= i__1; --j) {
  602. temp = z__[j] / delta[j];
  603. phi += z__[j] * temp;
  604. dphi += temp * temp;
  605. erretm += phi;
  606. /* L200: */
  607. }
  608. temp = z__[ii] / delta[ii];
  609. dw = dpsi + dphi + temp * temp;
  610. temp = z__[ii] * temp;
  611. w = rhoinv + phi + psi + temp;
  612. erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + (
  613. d__1 = tau + eta, abs(d__1)) * dw;
  614. swtch = FALSE_;
  615. if (orgati) {
  616. if (-w > abs(prew) / 10.) {
  617. swtch = TRUE_;
  618. }
  619. } else {
  620. if (w > abs(prew) / 10.) {
  621. swtch = TRUE_;
  622. }
  623. }
  624. tau += eta;
  625. /* Main loop to update the values of the array DELTA */
  626. iter = niter + 1;
  627. for (niter = iter; niter <= 30; ++niter) {
  628. /* Test for convergence */
  629. if (abs(w) <= eps * erretm) {
  630. if (orgati) {
  631. *dlam = d__[*i__] + tau;
  632. } else {
  633. *dlam = d__[ip1] + tau;
  634. }
  635. goto L250;
  636. }
  637. if (w <= 0.) {
  638. dltlb = max(dltlb,tau);
  639. } else {
  640. dltub = min(dltub,tau);
  641. }
  642. /* Calculate the new step */
  643. if (! swtch3) {
  644. if (! swtch) {
  645. if (orgati) {
  646. /* Computing 2nd power */
  647. d__1 = z__[*i__] / delta[*i__];
  648. c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (
  649. d__1 * d__1);
  650. } else {
  651. /* Computing 2nd power */
  652. d__1 = z__[ip1] / delta[ip1];
  653. c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) *
  654. (d__1 * d__1);
  655. }
  656. } else {
  657. temp = z__[ii] / delta[ii];
  658. if (orgati) {
  659. dpsi += temp * temp;
  660. } else {
  661. dphi += temp * temp;
  662. }
  663. c__ = w - delta[*i__] * dpsi - delta[ip1] * dphi;
  664. }
  665. a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1]
  666. * dw;
  667. b = delta[*i__] * delta[ip1] * w;
  668. if (c__ == 0.) {
  669. if (a == 0.) {
  670. if (! swtch) {
  671. if (orgati) {
  672. a = z__[*i__] * z__[*i__] + delta[ip1] *
  673. delta[ip1] * (dpsi + dphi);
  674. } else {
  675. a = z__[ip1] * z__[ip1] + delta[*i__] * delta[
  676. *i__] * (dpsi + dphi);
  677. }
  678. } else {
  679. a = delta[*i__] * delta[*i__] * dpsi + delta[ip1]
  680. * delta[ip1] * dphi;
  681. }
  682. }
  683. eta = b / a;
  684. } else if (a <= 0.) {
  685. eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))))
  686. / (c__ * 2.);
  687. } else {
  688. eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__,
  689. abs(d__1))));
  690. }
  691. } else {
  692. /* Interpolation using THREE most relevant poles */
  693. temp = rhoinv + psi + phi;
  694. if (swtch) {
  695. c__ = temp - delta[iim1] * dpsi - delta[iip1] * dphi;
  696. zz[0] = delta[iim1] * delta[iim1] * dpsi;
  697. zz[2] = delta[iip1] * delta[iip1] * dphi;
  698. } else {
  699. if (orgati) {
  700. temp1 = z__[iim1] / delta[iim1];
  701. temp1 *= temp1;
  702. c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1]
  703. - d__[iip1]) * temp1;
  704. zz[0] = z__[iim1] * z__[iim1];
  705. zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 +
  706. dphi);
  707. } else {
  708. temp1 = z__[iip1] / delta[iip1];
  709. temp1 *= temp1;
  710. c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1]
  711. - d__[iim1]) * temp1;
  712. zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi -
  713. temp1));
  714. zz[2] = z__[iip1] * z__[iip1];
  715. }
  716. }
  717. _starpu_dlaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta,
  718. info);
  719. if (*info != 0) {
  720. goto L250;
  721. }
  722. }
  723. /* Note, eta should be positive if w is negative, and */
  724. /* eta should be negative otherwise. However, */
  725. /* if for some reason caused by roundoff, eta*w > 0, */
  726. /* we simply use one Newton step instead. This way */
  727. /* will guarantee eta*w < 0. */
  728. if (w * eta >= 0.) {
  729. eta = -w / dw;
  730. }
  731. temp = tau + eta;
  732. if (temp > dltub || temp < dltlb) {
  733. if (w < 0.) {
  734. eta = (dltub - tau) / 2.;
  735. } else {
  736. eta = (dltlb - tau) / 2.;
  737. }
  738. }
  739. i__1 = *n;
  740. for (j = 1; j <= i__1; ++j) {
  741. delta[j] -= eta;
  742. /* L210: */
  743. }
  744. tau += eta;
  745. prew = w;
  746. /* Evaluate PSI and the derivative DPSI */
  747. dpsi = 0.;
  748. psi = 0.;
  749. erretm = 0.;
  750. i__1 = iim1;
  751. for (j = 1; j <= i__1; ++j) {
  752. temp = z__[j] / delta[j];
  753. psi += z__[j] * temp;
  754. dpsi += temp * temp;
  755. erretm += psi;
  756. /* L220: */
  757. }
  758. erretm = abs(erretm);
  759. /* Evaluate PHI and the derivative DPHI */
  760. dphi = 0.;
  761. phi = 0.;
  762. i__1 = iip1;
  763. for (j = *n; j >= i__1; --j) {
  764. temp = z__[j] / delta[j];
  765. phi += z__[j] * temp;
  766. dphi += temp * temp;
  767. erretm += phi;
  768. /* L230: */
  769. }
  770. temp = z__[ii] / delta[ii];
  771. dw = dpsi + dphi + temp * temp;
  772. temp = z__[ii] * temp;
  773. w = rhoinv + phi + psi + temp;
  774. erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3.
  775. + abs(tau) * dw;
  776. if (w * prew > 0. && abs(w) > abs(prew) / 10.) {
  777. swtch = ! swtch;
  778. }
  779. /* L240: */
  780. }
  781. /* Return with INFO = 1, NITER = MAXIT and not converged */
  782. *info = 1;
  783. if (orgati) {
  784. *dlam = d__[*i__] + tau;
  785. } else {
  786. *dlam = d__[ip1] + tau;
  787. }
  788. }
  789. L250:
  790. return 0;
  791. /* End of DLAED4 */
  792. } /* _starpu_dlaed4_ */