dlasd4.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011
  1. /* dlasd4.f -- translated by f2c (version 20061008).
  2. You must link the resulting object file with libf2c:
  3. on Microsoft Windows system, link with libf2c.lib;
  4. on Linux or Unix systems, link with .../path/to/libf2c.a -lm
  5. or, if you install libf2c.a in a standard place, with -lf2c -lm
  6. -- in that order, at the end of the command line, as in
  7. cc *.o -lf2c -lm
  8. Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
  9. http://www.netlib.org/f2c/libf2c.zip
  10. */
  11. #include "f2c.h"
  12. #include "blaswrap.h"
  13. /* Subroutine */ int _starpu_dlasd4_(integer *n, integer *i__, doublereal *d__,
  14. doublereal *z__, doublereal *delta, doublereal *rho, doublereal *
  15. sigma, doublereal *work, integer *info)
  16. {
  17. /* System generated locals */
  18. integer i__1;
  19. doublereal d__1;
  20. /* Builtin functions */
  21. double sqrt(doublereal);
  22. /* Local variables */
  23. doublereal a, b, c__;
  24. integer j;
  25. doublereal w, dd[3];
  26. integer ii;
  27. doublereal dw, zz[3];
  28. integer ip1;
  29. doublereal eta, phi, eps, tau, psi;
  30. integer iim1, iip1;
  31. doublereal dphi, dpsi;
  32. integer iter;
  33. doublereal temp, prew, sg2lb, sg2ub, temp1, temp2, dtiim, delsq, dtiip;
  34. integer niter;
  35. doublereal dtisq;
  36. logical swtch;
  37. doublereal dtnsq;
  38. extern /* Subroutine */ int _starpu_dlaed6_(integer *, logical *, doublereal *,
  39. doublereal *, doublereal *, doublereal *, doublereal *, integer *)
  40. , _starpu_dlasd5_(integer *, doublereal *, doublereal *, doublereal *,
  41. doublereal *, doublereal *, doublereal *);
  42. doublereal delsq2, dtnsq1;
  43. logical swtch3;
  44. extern doublereal _starpu_dlamch_(char *);
  45. logical orgati;
  46. doublereal erretm, dtipsq, rhoinv;
  47. /* -- LAPACK auxiliary routine (version 3.2) -- */
  48. /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
  49. /* November 2006 */
  50. /* .. Scalar Arguments .. */
  51. /* .. */
  52. /* .. Array Arguments .. */
  53. /* .. */
  54. /* Purpose */
  55. /* ======= */
  56. /* This subroutine computes the square root of the I-th updated */
  57. /* eigenvalue of a positive symmetric rank-one modification to */
  58. /* a positive diagonal matrix whose entries are given as the squares */
  59. /* of the corresponding entries in the array d, and that */
  60. /* 0 <= D(i) < D(j) for i < j */
  61. /* and that RHO > 0. This is arranged by the calling routine, and is */
  62. /* no loss in generality. The rank-one modified system is thus */
  63. /* diag( D ) * diag( D ) + RHO * Z * Z_transpose. */
  64. /* where we assume the Euclidean norm of Z is 1. */
  65. /* The method consists of approximating the rational functions in the */
  66. /* secular equation by simpler interpolating rational functions. */
  67. /* Arguments */
  68. /* ========= */
  69. /* N (input) INTEGER */
  70. /* The length of all arrays. */
  71. /* I (input) INTEGER */
  72. /* The index of the eigenvalue to be computed. 1 <= I <= N. */
  73. /* D (input) DOUBLE PRECISION array, dimension ( N ) */
  74. /* The original eigenvalues. It is assumed that they are in */
  75. /* order, 0 <= D(I) < D(J) for I < J. */
  76. /* Z (input) DOUBLE PRECISION array, dimension ( N ) */
  77. /* The components of the updating vector. */
  78. /* DELTA (output) DOUBLE PRECISION array, dimension ( N ) */
  79. /* If N .ne. 1, DELTA contains (D(j) - sigma_I) in its j-th */
  80. /* component. If N = 1, then DELTA(1) = 1. The vector DELTA */
  81. /* contains the information necessary to construct the */
  82. /* (singular) eigenvectors. */
  83. /* RHO (input) DOUBLE PRECISION */
  84. /* The scalar in the symmetric updating formula. */
  85. /* SIGMA (output) DOUBLE PRECISION */
  86. /* The computed sigma_I, the I-th updated eigenvalue. */
  87. /* WORK (workspace) DOUBLE PRECISION array, dimension ( N ) */
  88. /* If N .ne. 1, WORK contains (D(j) + sigma_I) in its j-th */
  89. /* component. If N = 1, then WORK( 1 ) = 1. */
  90. /* INFO (output) INTEGER */
  91. /* = 0: successful exit */
  92. /* > 0: if INFO = 1, the updating process failed. */
  93. /* Internal Parameters */
  94. /* =================== */
  95. /* Logical variable ORGATI (origin-at-i?) is used for distinguishing */
  96. /* whether D(i) or D(i+1) is treated as the origin. */
  97. /* ORGATI = .true. origin at i */
  98. /* ORGATI = .false. origin at i+1 */
  99. /* Logical variable SWTCH3 (switch-for-3-poles?) is for noting */
  100. /* if we are working with THREE poles! */
  101. /* MAXIT is the maximum number of iterations allowed for each */
  102. /* eigenvalue. */
  103. /* Further Details */
  104. /* =============== */
  105. /* Based on contributions by */
  106. /* Ren-Cang Li, Computer Science Division, University of California */
  107. /* at Berkeley, USA */
  108. /* ===================================================================== */
  109. /* .. Parameters .. */
  110. /* .. */
  111. /* .. Local Scalars .. */
  112. /* .. */
  113. /* .. Local Arrays .. */
  114. /* .. */
  115. /* .. External Subroutines .. */
  116. /* .. */
  117. /* .. External Functions .. */
  118. /* .. */
  119. /* .. Intrinsic Functions .. */
  120. /* .. */
  121. /* .. Executable Statements .. */
  122. /* Since this routine is called in an inner loop, we do no argument */
  123. /* checking. */
  124. /* Quick return for N=1 and 2. */
  125. /* Parameter adjustments */
  126. --work;
  127. --delta;
  128. --z__;
  129. --d__;
  130. /* Function Body */
  131. *info = 0;
  132. if (*n == 1) {
  133. /* Presumably, I=1 upon entry */
  134. *sigma = sqrt(d__[1] * d__[1] + *rho * z__[1] * z__[1]);
  135. delta[1] = 1.;
  136. work[1] = 1.;
  137. return 0;
  138. }
  139. if (*n == 2) {
  140. _starpu_dlasd5_(i__, &d__[1], &z__[1], &delta[1], rho, sigma, &work[1]);
  141. return 0;
  142. }
  143. /* Compute machine epsilon */
  144. eps = _starpu_dlamch_("Epsilon");
  145. rhoinv = 1. / *rho;
  146. /* The case I = N */
  147. if (*i__ == *n) {
  148. /* Initialize some basic variables */
  149. ii = *n - 1;
  150. niter = 1;
  151. /* Calculate initial guess */
  152. temp = *rho / 2.;
  153. /* If ||Z||_2 is not one, then TEMP should be set to */
  154. /* RHO * ||Z||_2^2 / TWO */
  155. temp1 = temp / (d__[*n] + sqrt(d__[*n] * d__[*n] + temp));
  156. i__1 = *n;
  157. for (j = 1; j <= i__1; ++j) {
  158. work[j] = d__[j] + d__[*n] + temp1;
  159. delta[j] = d__[j] - d__[*n] - temp1;
  160. /* L10: */
  161. }
  162. psi = 0.;
  163. i__1 = *n - 2;
  164. for (j = 1; j <= i__1; ++j) {
  165. psi += z__[j] * z__[j] / (delta[j] * work[j]);
  166. /* L20: */
  167. }
  168. c__ = rhoinv + psi;
  169. w = c__ + z__[ii] * z__[ii] / (delta[ii] * work[ii]) + z__[*n] * z__[*
  170. n] / (delta[*n] * work[*n]);
  171. if (w <= 0.) {
  172. temp1 = sqrt(d__[*n] * d__[*n] + *rho);
  173. temp = z__[*n - 1] * z__[*n - 1] / ((d__[*n - 1] + temp1) * (d__[*
  174. n] - d__[*n - 1] + *rho / (d__[*n] + temp1))) + z__[*n] *
  175. z__[*n] / *rho;
  176. /* The following TAU is to approximate */
  177. /* SIGMA_n^2 - D( N )*D( N ) */
  178. if (c__ <= temp) {
  179. tau = *rho;
  180. } else {
  181. delsq = (d__[*n] - d__[*n - 1]) * (d__[*n] + d__[*n - 1]);
  182. a = -c__ * delsq + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*
  183. n];
  184. b = z__[*n] * z__[*n] * delsq;
  185. if (a < 0.) {
  186. tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a);
  187. } else {
  188. tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.);
  189. }
  190. }
  191. /* It can be proved that */
  192. /* D(N)^2+RHO/2 <= SIGMA_n^2 < D(N)^2+TAU <= D(N)^2+RHO */
  193. } else {
  194. delsq = (d__[*n] - d__[*n - 1]) * (d__[*n] + d__[*n - 1]);
  195. a = -c__ * delsq + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n];
  196. b = z__[*n] * z__[*n] * delsq;
  197. /* The following TAU is to approximate */
  198. /* SIGMA_n^2 - D( N )*D( N ) */
  199. if (a < 0.) {
  200. tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a);
  201. } else {
  202. tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.);
  203. }
  204. /* It can be proved that */
  205. /* D(N)^2 < D(N)^2+TAU < SIGMA(N)^2 < D(N)^2+RHO/2 */
  206. }
  207. /* The following ETA is to approximate SIGMA_n - D( N ) */
  208. eta = tau / (d__[*n] + sqrt(d__[*n] * d__[*n] + tau));
  209. *sigma = d__[*n] + eta;
  210. i__1 = *n;
  211. for (j = 1; j <= i__1; ++j) {
  212. delta[j] = d__[j] - d__[*i__] - eta;
  213. work[j] = d__[j] + d__[*i__] + eta;
  214. /* L30: */
  215. }
  216. /* Evaluate PSI and the derivative DPSI */
  217. dpsi = 0.;
  218. psi = 0.;
  219. erretm = 0.;
  220. i__1 = ii;
  221. for (j = 1; j <= i__1; ++j) {
  222. temp = z__[j] / (delta[j] * work[j]);
  223. psi += z__[j] * temp;
  224. dpsi += temp * temp;
  225. erretm += psi;
  226. /* L40: */
  227. }
  228. erretm = abs(erretm);
  229. /* Evaluate PHI and the derivative DPHI */
  230. temp = z__[*n] / (delta[*n] * work[*n]);
  231. phi = z__[*n] * temp;
  232. dphi = temp * temp;
  233. erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi
  234. + dphi);
  235. w = rhoinv + phi + psi;
  236. /* Test for convergence */
  237. if (abs(w) <= eps * erretm) {
  238. goto L240;
  239. }
  240. /* Calculate the new step */
  241. ++niter;
  242. dtnsq1 = work[*n - 1] * delta[*n - 1];
  243. dtnsq = work[*n] * delta[*n];
  244. c__ = w - dtnsq1 * dpsi - dtnsq * dphi;
  245. a = (dtnsq + dtnsq1) * w - dtnsq * dtnsq1 * (dpsi + dphi);
  246. b = dtnsq * dtnsq1 * w;
  247. if (c__ < 0.) {
  248. c__ = abs(c__);
  249. }
  250. if (c__ == 0.) {
  251. eta = *rho - *sigma * *sigma;
  252. } else if (a >= 0.) {
  253. eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__
  254. * 2.);
  255. } else {
  256. eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))
  257. );
  258. }
  259. /* Note, eta should be positive if w is negative, and */
  260. /* eta should be negative otherwise. However, */
  261. /* if for some reason caused by roundoff, eta*w > 0, */
  262. /* we simply use one Newton step instead. This way */
  263. /* will guarantee eta*w < 0. */
  264. if (w * eta > 0.) {
  265. eta = -w / (dpsi + dphi);
  266. }
  267. temp = eta - dtnsq;
  268. if (temp > *rho) {
  269. eta = *rho + dtnsq;
  270. }
  271. tau += eta;
  272. eta /= *sigma + sqrt(eta + *sigma * *sigma);
  273. i__1 = *n;
  274. for (j = 1; j <= i__1; ++j) {
  275. delta[j] -= eta;
  276. work[j] += eta;
  277. /* L50: */
  278. }
  279. *sigma += eta;
  280. /* Evaluate PSI and the derivative DPSI */
  281. dpsi = 0.;
  282. psi = 0.;
  283. erretm = 0.;
  284. i__1 = ii;
  285. for (j = 1; j <= i__1; ++j) {
  286. temp = z__[j] / (work[j] * delta[j]);
  287. psi += z__[j] * temp;
  288. dpsi += temp * temp;
  289. erretm += psi;
  290. /* L60: */
  291. }
  292. erretm = abs(erretm);
  293. /* Evaluate PHI and the derivative DPHI */
  294. temp = z__[*n] / (work[*n] * delta[*n]);
  295. phi = z__[*n] * temp;
  296. dphi = temp * temp;
  297. erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi
  298. + dphi);
  299. w = rhoinv + phi + psi;
  300. /* Main loop to update the values of the array DELTA */
  301. iter = niter + 1;
  302. for (niter = iter; niter <= 20; ++niter) {
  303. /* Test for convergence */
  304. if (abs(w) <= eps * erretm) {
  305. goto L240;
  306. }
  307. /* Calculate the new step */
  308. dtnsq1 = work[*n - 1] * delta[*n - 1];
  309. dtnsq = work[*n] * delta[*n];
  310. c__ = w - dtnsq1 * dpsi - dtnsq * dphi;
  311. a = (dtnsq + dtnsq1) * w - dtnsq1 * dtnsq * (dpsi + dphi);
  312. b = dtnsq1 * dtnsq * w;
  313. if (a >= 0.) {
  314. eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
  315. c__ * 2.);
  316. } else {
  317. eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(
  318. d__1))));
  319. }
  320. /* Note, eta should be positive if w is negative, and */
  321. /* eta should be negative otherwise. However, */
  322. /* if for some reason caused by roundoff, eta*w > 0, */
  323. /* we simply use one Newton step instead. This way */
  324. /* will guarantee eta*w < 0. */
  325. if (w * eta > 0.) {
  326. eta = -w / (dpsi + dphi);
  327. }
  328. temp = eta - dtnsq;
  329. if (temp <= 0.) {
  330. eta /= 2.;
  331. }
  332. tau += eta;
  333. eta /= *sigma + sqrt(eta + *sigma * *sigma);
  334. i__1 = *n;
  335. for (j = 1; j <= i__1; ++j) {
  336. delta[j] -= eta;
  337. work[j] += eta;
  338. /* L70: */
  339. }
  340. *sigma += eta;
  341. /* Evaluate PSI and the derivative DPSI */
  342. dpsi = 0.;
  343. psi = 0.;
  344. erretm = 0.;
  345. i__1 = ii;
  346. for (j = 1; j <= i__1; ++j) {
  347. temp = z__[j] / (work[j] * delta[j]);
  348. psi += z__[j] * temp;
  349. dpsi += temp * temp;
  350. erretm += psi;
  351. /* L80: */
  352. }
  353. erretm = abs(erretm);
  354. /* Evaluate PHI and the derivative DPHI */
  355. temp = z__[*n] / (work[*n] * delta[*n]);
  356. phi = z__[*n] * temp;
  357. dphi = temp * temp;
  358. erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (
  359. dpsi + dphi);
  360. w = rhoinv + phi + psi;
  361. /* L90: */
  362. }
  363. /* Return with INFO = 1, NITER = MAXIT and not converged */
  364. *info = 1;
  365. goto L240;
  366. /* End for the case I = N */
  367. } else {
  368. /* The case for I < N */
  369. niter = 1;
  370. ip1 = *i__ + 1;
  371. /* Calculate initial guess */
  372. delsq = (d__[ip1] - d__[*i__]) * (d__[ip1] + d__[*i__]);
  373. delsq2 = delsq / 2.;
  374. temp = delsq2 / (d__[*i__] + sqrt(d__[*i__] * d__[*i__] + delsq2));
  375. i__1 = *n;
  376. for (j = 1; j <= i__1; ++j) {
  377. work[j] = d__[j] + d__[*i__] + temp;
  378. delta[j] = d__[j] - d__[*i__] - temp;
  379. /* L100: */
  380. }
  381. psi = 0.;
  382. i__1 = *i__ - 1;
  383. for (j = 1; j <= i__1; ++j) {
  384. psi += z__[j] * z__[j] / (work[j] * delta[j]);
  385. /* L110: */
  386. }
  387. phi = 0.;
  388. i__1 = *i__ + 2;
  389. for (j = *n; j >= i__1; --j) {
  390. phi += z__[j] * z__[j] / (work[j] * delta[j]);
  391. /* L120: */
  392. }
  393. c__ = rhoinv + psi + phi;
  394. w = c__ + z__[*i__] * z__[*i__] / (work[*i__] * delta[*i__]) + z__[
  395. ip1] * z__[ip1] / (work[ip1] * delta[ip1]);
  396. if (w > 0.) {
  397. /* d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2 */
  398. /* We choose d(i) as origin. */
  399. orgati = TRUE_;
  400. sg2lb = 0.;
  401. sg2ub = delsq2;
  402. a = c__ * delsq + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1];
  403. b = z__[*i__] * z__[*i__] * delsq;
  404. if (a > 0.) {
  405. tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(
  406. d__1))));
  407. } else {
  408. tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
  409. c__ * 2.);
  410. }
  411. /* TAU now is an estimation of SIGMA^2 - D( I )^2. The */
  412. /* following, however, is the corresponding estimation of */
  413. /* SIGMA - D( I ). */
  414. eta = tau / (d__[*i__] + sqrt(d__[*i__] * d__[*i__] + tau));
  415. } else {
  416. /* (d(i)^2+d(i+1)^2)/2 <= the ith sigma^2 < d(i+1)^2/2 */
  417. /* We choose d(i+1) as origin. */
  418. orgati = FALSE_;
  419. sg2lb = -delsq2;
  420. sg2ub = 0.;
  421. a = c__ * delsq - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1];
  422. b = z__[ip1] * z__[ip1] * delsq;
  423. if (a < 0.) {
  424. tau = b * 2. / (a - sqrt((d__1 = a * a + b * 4. * c__, abs(
  425. d__1))));
  426. } else {
  427. tau = -(a + sqrt((d__1 = a * a + b * 4. * c__, abs(d__1)))) /
  428. (c__ * 2.);
  429. }
  430. /* TAU now is an estimation of SIGMA^2 - D( IP1 )^2. The */
  431. /* following, however, is the corresponding estimation of */
  432. /* SIGMA - D( IP1 ). */
  433. eta = tau / (d__[ip1] + sqrt((d__1 = d__[ip1] * d__[ip1] + tau,
  434. abs(d__1))));
  435. }
  436. if (orgati) {
  437. ii = *i__;
  438. *sigma = d__[*i__] + eta;
  439. i__1 = *n;
  440. for (j = 1; j <= i__1; ++j) {
  441. work[j] = d__[j] + d__[*i__] + eta;
  442. delta[j] = d__[j] - d__[*i__] - eta;
  443. /* L130: */
  444. }
  445. } else {
  446. ii = *i__ + 1;
  447. *sigma = d__[ip1] + eta;
  448. i__1 = *n;
  449. for (j = 1; j <= i__1; ++j) {
  450. work[j] = d__[j] + d__[ip1] + eta;
  451. delta[j] = d__[j] - d__[ip1] - eta;
  452. /* L140: */
  453. }
  454. }
  455. iim1 = ii - 1;
  456. iip1 = ii + 1;
  457. /* Evaluate PSI and the derivative DPSI */
  458. dpsi = 0.;
  459. psi = 0.;
  460. erretm = 0.;
  461. i__1 = iim1;
  462. for (j = 1; j <= i__1; ++j) {
  463. temp = z__[j] / (work[j] * delta[j]);
  464. psi += z__[j] * temp;
  465. dpsi += temp * temp;
  466. erretm += psi;
  467. /* L150: */
  468. }
  469. erretm = abs(erretm);
  470. /* Evaluate PHI and the derivative DPHI */
  471. dphi = 0.;
  472. phi = 0.;
  473. i__1 = iip1;
  474. for (j = *n; j >= i__1; --j) {
  475. temp = z__[j] / (work[j] * delta[j]);
  476. phi += z__[j] * temp;
  477. dphi += temp * temp;
  478. erretm += phi;
  479. /* L160: */
  480. }
  481. w = rhoinv + phi + psi;
  482. /* W is the value of the secular function with */
  483. /* its ii-th element removed. */
  484. swtch3 = FALSE_;
  485. if (orgati) {
  486. if (w < 0.) {
  487. swtch3 = TRUE_;
  488. }
  489. } else {
  490. if (w > 0.) {
  491. swtch3 = TRUE_;
  492. }
  493. }
  494. if (ii == 1 || ii == *n) {
  495. swtch3 = FALSE_;
  496. }
  497. temp = z__[ii] / (work[ii] * delta[ii]);
  498. dw = dpsi + dphi + temp * temp;
  499. temp = z__[ii] * temp;
  500. w += temp;
  501. erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. +
  502. abs(tau) * dw;
  503. /* Test for convergence */
  504. if (abs(w) <= eps * erretm) {
  505. goto L240;
  506. }
  507. if (w <= 0.) {
  508. sg2lb = max(sg2lb,tau);
  509. } else {
  510. sg2ub = min(sg2ub,tau);
  511. }
  512. /* Calculate the new step */
  513. ++niter;
  514. if (! swtch3) {
  515. dtipsq = work[ip1] * delta[ip1];
  516. dtisq = work[*i__] * delta[*i__];
  517. if (orgati) {
  518. /* Computing 2nd power */
  519. d__1 = z__[*i__] / dtisq;
  520. c__ = w - dtipsq * dw + delsq * (d__1 * d__1);
  521. } else {
  522. /* Computing 2nd power */
  523. d__1 = z__[ip1] / dtipsq;
  524. c__ = w - dtisq * dw - delsq * (d__1 * d__1);
  525. }
  526. a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw;
  527. b = dtipsq * dtisq * w;
  528. if (c__ == 0.) {
  529. if (a == 0.) {
  530. if (orgati) {
  531. a = z__[*i__] * z__[*i__] + dtipsq * dtipsq * (dpsi +
  532. dphi);
  533. } else {
  534. a = z__[ip1] * z__[ip1] + dtisq * dtisq * (dpsi +
  535. dphi);
  536. }
  537. }
  538. eta = b / a;
  539. } else if (a <= 0.) {
  540. eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
  541. c__ * 2.);
  542. } else {
  543. eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(
  544. d__1))));
  545. }
  546. } else {
  547. /* Interpolation using THREE most relevant poles */
  548. dtiim = work[iim1] * delta[iim1];
  549. dtiip = work[iip1] * delta[iip1];
  550. temp = rhoinv + psi + phi;
  551. if (orgati) {
  552. temp1 = z__[iim1] / dtiim;
  553. temp1 *= temp1;
  554. c__ = temp - dtiip * (dpsi + dphi) - (d__[iim1] - d__[iip1]) *
  555. (d__[iim1] + d__[iip1]) * temp1;
  556. zz[0] = z__[iim1] * z__[iim1];
  557. if (dpsi < temp1) {
  558. zz[2] = dtiip * dtiip * dphi;
  559. } else {
  560. zz[2] = dtiip * dtiip * (dpsi - temp1 + dphi);
  561. }
  562. } else {
  563. temp1 = z__[iip1] / dtiip;
  564. temp1 *= temp1;
  565. c__ = temp - dtiim * (dpsi + dphi) - (d__[iip1] - d__[iim1]) *
  566. (d__[iim1] + d__[iip1]) * temp1;
  567. if (dphi < temp1) {
  568. zz[0] = dtiim * dtiim * dpsi;
  569. } else {
  570. zz[0] = dtiim * dtiim * (dpsi + (dphi - temp1));
  571. }
  572. zz[2] = z__[iip1] * z__[iip1];
  573. }
  574. zz[1] = z__[ii] * z__[ii];
  575. dd[0] = dtiim;
  576. dd[1] = delta[ii] * work[ii];
  577. dd[2] = dtiip;
  578. _starpu_dlaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info);
  579. if (*info != 0) {
  580. goto L240;
  581. }
  582. }
  583. /* Note, eta should be positive if w is negative, and */
  584. /* eta should be negative otherwise. However, */
  585. /* if for some reason caused by roundoff, eta*w > 0, */
  586. /* we simply use one Newton step instead. This way */
  587. /* will guarantee eta*w < 0. */
  588. if (w * eta >= 0.) {
  589. eta = -w / dw;
  590. }
  591. if (orgati) {
  592. temp1 = work[*i__] * delta[*i__];
  593. temp = eta - temp1;
  594. } else {
  595. temp1 = work[ip1] * delta[ip1];
  596. temp = eta - temp1;
  597. }
  598. if (temp > sg2ub || temp < sg2lb) {
  599. if (w < 0.) {
  600. eta = (sg2ub - tau) / 2.;
  601. } else {
  602. eta = (sg2lb - tau) / 2.;
  603. }
  604. }
  605. tau += eta;
  606. eta /= *sigma + sqrt(*sigma * *sigma + eta);
  607. prew = w;
  608. *sigma += eta;
  609. i__1 = *n;
  610. for (j = 1; j <= i__1; ++j) {
  611. work[j] += eta;
  612. delta[j] -= eta;
  613. /* L170: */
  614. }
  615. /* Evaluate PSI and the derivative DPSI */
  616. dpsi = 0.;
  617. psi = 0.;
  618. erretm = 0.;
  619. i__1 = iim1;
  620. for (j = 1; j <= i__1; ++j) {
  621. temp = z__[j] / (work[j] * delta[j]);
  622. psi += z__[j] * temp;
  623. dpsi += temp * temp;
  624. erretm += psi;
  625. /* L180: */
  626. }
  627. erretm = abs(erretm);
  628. /* Evaluate PHI and the derivative DPHI */
  629. dphi = 0.;
  630. phi = 0.;
  631. i__1 = iip1;
  632. for (j = *n; j >= i__1; --j) {
  633. temp = z__[j] / (work[j] * delta[j]);
  634. phi += z__[j] * temp;
  635. dphi += temp * temp;
  636. erretm += phi;
  637. /* L190: */
  638. }
  639. temp = z__[ii] / (work[ii] * delta[ii]);
  640. dw = dpsi + dphi + temp * temp;
  641. temp = z__[ii] * temp;
  642. w = rhoinv + phi + psi + temp;
  643. erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. +
  644. abs(tau) * dw;
  645. if (w <= 0.) {
  646. sg2lb = max(sg2lb,tau);
  647. } else {
  648. sg2ub = min(sg2ub,tau);
  649. }
  650. swtch = FALSE_;
  651. if (orgati) {
  652. if (-w > abs(prew) / 10.) {
  653. swtch = TRUE_;
  654. }
  655. } else {
  656. if (w > abs(prew) / 10.) {
  657. swtch = TRUE_;
  658. }
  659. }
  660. /* Main loop to update the values of the array DELTA and WORK */
  661. iter = niter + 1;
  662. for (niter = iter; niter <= 20; ++niter) {
  663. /* Test for convergence */
  664. if (abs(w) <= eps * erretm) {
  665. goto L240;
  666. }
  667. /* Calculate the new step */
  668. if (! swtch3) {
  669. dtipsq = work[ip1] * delta[ip1];
  670. dtisq = work[*i__] * delta[*i__];
  671. if (! swtch) {
  672. if (orgati) {
  673. /* Computing 2nd power */
  674. d__1 = z__[*i__] / dtisq;
  675. c__ = w - dtipsq * dw + delsq * (d__1 * d__1);
  676. } else {
  677. /* Computing 2nd power */
  678. d__1 = z__[ip1] / dtipsq;
  679. c__ = w - dtisq * dw - delsq * (d__1 * d__1);
  680. }
  681. } else {
  682. temp = z__[ii] / (work[ii] * delta[ii]);
  683. if (orgati) {
  684. dpsi += temp * temp;
  685. } else {
  686. dphi += temp * temp;
  687. }
  688. c__ = w - dtisq * dpsi - dtipsq * dphi;
  689. }
  690. a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw;
  691. b = dtipsq * dtisq * w;
  692. if (c__ == 0.) {
  693. if (a == 0.) {
  694. if (! swtch) {
  695. if (orgati) {
  696. a = z__[*i__] * z__[*i__] + dtipsq * dtipsq *
  697. (dpsi + dphi);
  698. } else {
  699. a = z__[ip1] * z__[ip1] + dtisq * dtisq * (
  700. dpsi + dphi);
  701. }
  702. } else {
  703. a = dtisq * dtisq * dpsi + dtipsq * dtipsq * dphi;
  704. }
  705. }
  706. eta = b / a;
  707. } else if (a <= 0.) {
  708. eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))))
  709. / (c__ * 2.);
  710. } else {
  711. eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__,
  712. abs(d__1))));
  713. }
  714. } else {
  715. /* Interpolation using THREE most relevant poles */
  716. dtiim = work[iim1] * delta[iim1];
  717. dtiip = work[iip1] * delta[iip1];
  718. temp = rhoinv + psi + phi;
  719. if (swtch) {
  720. c__ = temp - dtiim * dpsi - dtiip * dphi;
  721. zz[0] = dtiim * dtiim * dpsi;
  722. zz[2] = dtiip * dtiip * dphi;
  723. } else {
  724. if (orgati) {
  725. temp1 = z__[iim1] / dtiim;
  726. temp1 *= temp1;
  727. temp2 = (d__[iim1] - d__[iip1]) * (d__[iim1] + d__[
  728. iip1]) * temp1;
  729. c__ = temp - dtiip * (dpsi + dphi) - temp2;
  730. zz[0] = z__[iim1] * z__[iim1];
  731. if (dpsi < temp1) {
  732. zz[2] = dtiip * dtiip * dphi;
  733. } else {
  734. zz[2] = dtiip * dtiip * (dpsi - temp1 + dphi);
  735. }
  736. } else {
  737. temp1 = z__[iip1] / dtiip;
  738. temp1 *= temp1;
  739. temp2 = (d__[iip1] - d__[iim1]) * (d__[iim1] + d__[
  740. iip1]) * temp1;
  741. c__ = temp - dtiim * (dpsi + dphi) - temp2;
  742. if (dphi < temp1) {
  743. zz[0] = dtiim * dtiim * dpsi;
  744. } else {
  745. zz[0] = dtiim * dtiim * (dpsi + (dphi - temp1));
  746. }
  747. zz[2] = z__[iip1] * z__[iip1];
  748. }
  749. }
  750. dd[0] = dtiim;
  751. dd[1] = delta[ii] * work[ii];
  752. dd[2] = dtiip;
  753. _starpu_dlaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info);
  754. if (*info != 0) {
  755. goto L240;
  756. }
  757. }
  758. /* Note, eta should be positive if w is negative, and */
  759. /* eta should be negative otherwise. However, */
  760. /* if for some reason caused by roundoff, eta*w > 0, */
  761. /* we simply use one Newton step instead. This way */
  762. /* will guarantee eta*w < 0. */
  763. if (w * eta >= 0.) {
  764. eta = -w / dw;
  765. }
  766. if (orgati) {
  767. temp1 = work[*i__] * delta[*i__];
  768. temp = eta - temp1;
  769. } else {
  770. temp1 = work[ip1] * delta[ip1];
  771. temp = eta - temp1;
  772. }
  773. if (temp > sg2ub || temp < sg2lb) {
  774. if (w < 0.) {
  775. eta = (sg2ub - tau) / 2.;
  776. } else {
  777. eta = (sg2lb - tau) / 2.;
  778. }
  779. }
  780. tau += eta;
  781. eta /= *sigma + sqrt(*sigma * *sigma + eta);
  782. *sigma += eta;
  783. i__1 = *n;
  784. for (j = 1; j <= i__1; ++j) {
  785. work[j] += eta;
  786. delta[j] -= eta;
  787. /* L200: */
  788. }
  789. prew = w;
  790. /* Evaluate PSI and the derivative DPSI */
  791. dpsi = 0.;
  792. psi = 0.;
  793. erretm = 0.;
  794. i__1 = iim1;
  795. for (j = 1; j <= i__1; ++j) {
  796. temp = z__[j] / (work[j] * delta[j]);
  797. psi += z__[j] * temp;
  798. dpsi += temp * temp;
  799. erretm += psi;
  800. /* L210: */
  801. }
  802. erretm = abs(erretm);
  803. /* Evaluate PHI and the derivative DPHI */
  804. dphi = 0.;
  805. phi = 0.;
  806. i__1 = iip1;
  807. for (j = *n; j >= i__1; --j) {
  808. temp = z__[j] / (work[j] * delta[j]);
  809. phi += z__[j] * temp;
  810. dphi += temp * temp;
  811. erretm += phi;
  812. /* L220: */
  813. }
  814. temp = z__[ii] / (work[ii] * delta[ii]);
  815. dw = dpsi + dphi + temp * temp;
  816. temp = z__[ii] * temp;
  817. w = rhoinv + phi + psi + temp;
  818. erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3.
  819. + abs(tau) * dw;
  820. if (w * prew > 0. && abs(w) > abs(prew) / 10.) {
  821. swtch = ! swtch;
  822. }
  823. if (w <= 0.) {
  824. sg2lb = max(sg2lb,tau);
  825. } else {
  826. sg2ub = min(sg2ub,tau);
  827. }
  828. /* L230: */
  829. }
  830. /* Return with INFO = 1, NITER = MAXIT and not converged */
  831. *info = 1;
  832. }
  833. L240:
  834. return 0;
  835. /* End of DLASD4 */
  836. } /* _starpu_dlasd4_ */