dlarrf.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424
  1. /* dlarrf.f -- translated by f2c (version 20061008).
  2. You must link the resulting object file with libf2c:
  3. on Microsoft Windows system, link with libf2c.lib;
  4. on Linux or Unix systems, link with .../path/to/libf2c.a -lm
  5. or, if you install libf2c.a in a standard place, with -lf2c -lm
  6. -- in that order, at the end of the command line, as in
  7. cc *.o -lf2c -lm
  8. Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
  9. http://www.netlib.org/f2c/libf2c.zip
  10. */
  11. #include "f2c.h"
  12. #include "blaswrap.h"
  13. /* Table of constant values */
  14. static integer c__1 = 1;
  15. /* Subroutine */ int _starpu_dlarrf_(integer *n, doublereal *d__, doublereal *l,
  16. doublereal *ld, integer *clstrt, integer *clend, doublereal *w,
  17. doublereal *wgap, doublereal *werr, doublereal *spdiam, doublereal *
  18. clgapl, doublereal *clgapr, doublereal *pivmin, doublereal *sigma,
  19. doublereal *dplus, doublereal *lplus, doublereal *work, integer *info)
  20. {
  21. /* System generated locals */
  22. integer i__1;
  23. doublereal d__1, d__2, d__3;
  24. /* Builtin functions */
  25. double sqrt(doublereal);
  26. /* Local variables */
  27. integer i__;
  28. doublereal s, bestshift, smlgrowth, eps, tmp, max1, max2, rrr1, rrr2,
  29. znm2, growthbound, fail, fact, oldp;
  30. integer indx;
  31. doublereal prod;
  32. integer ktry;
  33. doublereal fail2, avgap, ldmax, rdmax;
  34. integer shift;
  35. extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *,
  36. doublereal *, integer *);
  37. logical dorrr1;
  38. extern doublereal _starpu_dlamch_(char *);
  39. doublereal ldelta;
  40. logical nofail;
  41. doublereal mingap, lsigma, rdelta;
  42. extern logical _starpu_disnan_(doublereal *);
  43. logical forcer;
  44. doublereal rsigma, clwdth;
  45. logical sawnan1, sawnan2, tryrrr1;
  46. /* -- LAPACK auxiliary routine (version 3.2) -- */
  47. /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
  48. /* November 2006 */
  49. /* * */
  50. /* .. Scalar Arguments .. */
  51. /* .. */
  52. /* .. Array Arguments .. */
  53. /* .. */
  54. /* Purpose */
  55. /* ======= */
  56. /* Given the initial representation L D L^T and its cluster of close */
  57. /* eigenvalues (in a relative measure), W( CLSTRT ), W( CLSTRT+1 ), ... */
  58. /* W( CLEND ), DLARRF finds a new relatively robust representation */
  59. /* L D L^T - SIGMA I = L(+) D(+) L(+)^T such that at least one of the */
  60. /* eigenvalues of L(+) D(+) L(+)^T is relatively isolated. */
  61. /* Arguments */
  62. /* ========= */
  63. /* N (input) INTEGER */
  64. /* The order of the matrix (subblock, if the matrix splitted). */
  65. /* D (input) DOUBLE PRECISION array, dimension (N) */
  66. /* The N diagonal elements of the diagonal matrix D. */
  67. /* L (input) DOUBLE PRECISION array, dimension (N-1) */
  68. /* The (N-1) subdiagonal elements of the unit bidiagonal */
  69. /* matrix L. */
  70. /* LD (input) DOUBLE PRECISION array, dimension (N-1) */
  71. /* The (N-1) elements L(i)*D(i). */
  72. /* CLSTRT (input) INTEGER */
  73. /* The index of the first eigenvalue in the cluster. */
  74. /* CLEND (input) INTEGER */
  75. /* The index of the last eigenvalue in the cluster. */
  76. /* W (input) DOUBLE PRECISION array, dimension >= (CLEND-CLSTRT+1) */
  77. /* The eigenvalue APPROXIMATIONS of L D L^T in ascending order. */
  78. /* W( CLSTRT ) through W( CLEND ) form the cluster of relatively */
  79. /* close eigenalues. */
  80. /* WGAP (input/output) DOUBLE PRECISION array, dimension >= (CLEND-CLSTRT+1) */
  81. /* The separation from the right neighbor eigenvalue in W. */
  82. /* WERR (input) DOUBLE PRECISION array, dimension >= (CLEND-CLSTRT+1) */
  83. /* WERR contain the semiwidth of the uncertainty */
  84. /* interval of the corresponding eigenvalue APPROXIMATION in W */
  85. /* SPDIAM (input) estimate of the spectral diameter obtained from the */
  86. /* Gerschgorin intervals */
  87. /* CLGAPL, CLGAPR (input) absolute gap on each end of the cluster. */
  88. /* Set by the calling routine to protect against shifts too close */
  89. /* to eigenvalues outside the cluster. */
  90. /* PIVMIN (input) DOUBLE PRECISION */
  91. /* The minimum pivot allowed in the Sturm sequence. */
  92. /* SIGMA (output) DOUBLE PRECISION */
  93. /* The shift used to form L(+) D(+) L(+)^T. */
  94. /* DPLUS (output) DOUBLE PRECISION array, dimension (N) */
  95. /* The N diagonal elements of the diagonal matrix D(+). */
  96. /* LPLUS (output) DOUBLE PRECISION array, dimension (N-1) */
  97. /* The first (N-1) elements of LPLUS contain the subdiagonal */
  98. /* elements of the unit bidiagonal matrix L(+). */
  99. /* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */
  100. /* Workspace. */
  101. /* Further Details */
  102. /* =============== */
  103. /* Based on contributions by */
  104. /* Beresford Parlett, University of California, Berkeley, USA */
  105. /* Jim Demmel, University of California, Berkeley, USA */
  106. /* Inderjit Dhillon, University of Texas, Austin, USA */
  107. /* Osni Marques, LBNL/NERSC, USA */
  108. /* Christof Voemel, University of California, Berkeley, USA */
  109. /* ===================================================================== */
  110. /* .. Parameters .. */
  111. /* .. */
  112. /* .. Local Scalars .. */
  113. /* .. */
  114. /* .. External Functions .. */
  115. /* .. */
  116. /* .. External Subroutines .. */
  117. /* .. */
  118. /* .. Intrinsic Functions .. */
  119. /* .. */
  120. /* .. Executable Statements .. */
  121. /* Parameter adjustments */
  122. --work;
  123. --lplus;
  124. --dplus;
  125. --werr;
  126. --wgap;
  127. --w;
  128. --ld;
  129. --l;
  130. --d__;
  131. /* Function Body */
  132. *info = 0;
  133. fact = 2.;
  134. eps = _starpu_dlamch_("Precision");
  135. shift = 0;
  136. forcer = FALSE_;
  137. /* Note that we cannot guarantee that for any of the shifts tried, */
  138. /* the factorization has a small or even moderate element growth. */
  139. /* There could be Ritz values at both ends of the cluster and despite */
  140. /* backing off, there are examples where all factorizations tried */
  141. /* (in IEEE mode, allowing zero pivots & infinities) have INFINITE */
  142. /* element growth. */
  143. /* For this reason, we should use PIVMIN in this subroutine so that at */
  144. /* least the L D L^T factorization exists. It can be checked afterwards */
  145. /* whether the element growth caused bad residuals/orthogonality. */
  146. /* Decide whether the code should accept the best among all */
  147. /* representations despite large element growth or signal INFO=1 */
  148. nofail = TRUE_;
  149. /* Compute the average gap length of the cluster */
  150. clwdth = (d__1 = w[*clend] - w[*clstrt], abs(d__1)) + werr[*clend] + werr[
  151. *clstrt];
  152. avgap = clwdth / (doublereal) (*clend - *clstrt);
  153. mingap = min(*clgapl,*clgapr);
  154. /* Initial values for shifts to both ends of cluster */
  155. /* Computing MIN */
  156. d__1 = w[*clstrt], d__2 = w[*clend];
  157. lsigma = min(d__1,d__2) - werr[*clstrt];
  158. /* Computing MAX */
  159. d__1 = w[*clstrt], d__2 = w[*clend];
  160. rsigma = max(d__1,d__2) + werr[*clend];
  161. /* Use a small fudge to make sure that we really shift to the outside */
  162. lsigma -= abs(lsigma) * 4. * eps;
  163. rsigma += abs(rsigma) * 4. * eps;
  164. /* Compute upper bounds for how much to back off the initial shifts */
  165. ldmax = mingap * .25 + *pivmin * 2.;
  166. rdmax = mingap * .25 + *pivmin * 2.;
  167. /* Computing MAX */
  168. d__1 = avgap, d__2 = wgap[*clstrt];
  169. ldelta = max(d__1,d__2) / fact;
  170. /* Computing MAX */
  171. d__1 = avgap, d__2 = wgap[*clend - 1];
  172. rdelta = max(d__1,d__2) / fact;
  173. /* Initialize the record of the best representation found */
  174. s = _starpu_dlamch_("S");
  175. smlgrowth = 1. / s;
  176. fail = (doublereal) (*n - 1) * mingap / (*spdiam * eps);
  177. fail2 = (doublereal) (*n - 1) * mingap / (*spdiam * sqrt(eps));
  178. bestshift = lsigma;
  179. /* while (KTRY <= KTRYMAX) */
  180. ktry = 0;
  181. growthbound = *spdiam * 8.;
  182. L5:
  183. sawnan1 = FALSE_;
  184. sawnan2 = FALSE_;
  185. /* Ensure that we do not back off too much of the initial shifts */
  186. ldelta = min(ldmax,ldelta);
  187. rdelta = min(rdmax,rdelta);
  188. /* Compute the element growth when shifting to both ends of the cluster */
  189. /* accept the shift if there is no element growth at one of the two ends */
  190. /* Left end */
  191. s = -lsigma;
  192. dplus[1] = d__[1] + s;
  193. if (abs(dplus[1]) < *pivmin) {
  194. dplus[1] = -(*pivmin);
  195. /* Need to set SAWNAN1 because refined RRR test should not be used */
  196. /* in this case */
  197. sawnan1 = TRUE_;
  198. }
  199. max1 = abs(dplus[1]);
  200. i__1 = *n - 1;
  201. for (i__ = 1; i__ <= i__1; ++i__) {
  202. lplus[i__] = ld[i__] / dplus[i__];
  203. s = s * lplus[i__] * l[i__] - lsigma;
  204. dplus[i__ + 1] = d__[i__ + 1] + s;
  205. if ((d__1 = dplus[i__ + 1], abs(d__1)) < *pivmin) {
  206. dplus[i__ + 1] = -(*pivmin);
  207. /* Need to set SAWNAN1 because refined RRR test should not be used */
  208. /* in this case */
  209. sawnan1 = TRUE_;
  210. }
  211. /* Computing MAX */
  212. d__2 = max1, d__3 = (d__1 = dplus[i__ + 1], abs(d__1));
  213. max1 = max(d__2,d__3);
  214. /* L6: */
  215. }
  216. sawnan1 = sawnan1 || _starpu_disnan_(&max1);
  217. if (forcer || max1 <= growthbound && ! sawnan1) {
  218. *sigma = lsigma;
  219. shift = 1;
  220. goto L100;
  221. }
  222. /* Right end */
  223. s = -rsigma;
  224. work[1] = d__[1] + s;
  225. if (abs(work[1]) < *pivmin) {
  226. work[1] = -(*pivmin);
  227. /* Need to set SAWNAN2 because refined RRR test should not be used */
  228. /* in this case */
  229. sawnan2 = TRUE_;
  230. }
  231. max2 = abs(work[1]);
  232. i__1 = *n - 1;
  233. for (i__ = 1; i__ <= i__1; ++i__) {
  234. work[*n + i__] = ld[i__] / work[i__];
  235. s = s * work[*n + i__] * l[i__] - rsigma;
  236. work[i__ + 1] = d__[i__ + 1] + s;
  237. if ((d__1 = work[i__ + 1], abs(d__1)) < *pivmin) {
  238. work[i__ + 1] = -(*pivmin);
  239. /* Need to set SAWNAN2 because refined RRR test should not be used */
  240. /* in this case */
  241. sawnan2 = TRUE_;
  242. }
  243. /* Computing MAX */
  244. d__2 = max2, d__3 = (d__1 = work[i__ + 1], abs(d__1));
  245. max2 = max(d__2,d__3);
  246. /* L7: */
  247. }
  248. sawnan2 = sawnan2 || _starpu_disnan_(&max2);
  249. if (forcer || max2 <= growthbound && ! sawnan2) {
  250. *sigma = rsigma;
  251. shift = 2;
  252. goto L100;
  253. }
  254. /* If we are at this point, both shifts led to too much element growth */
  255. /* Record the better of the two shifts (provided it didn't lead to NaN) */
  256. if (sawnan1 && sawnan2) {
  257. /* both MAX1 and MAX2 are NaN */
  258. goto L50;
  259. } else {
  260. if (! sawnan1) {
  261. indx = 1;
  262. if (max1 <= smlgrowth) {
  263. smlgrowth = max1;
  264. bestshift = lsigma;
  265. }
  266. }
  267. if (! sawnan2) {
  268. if (sawnan1 || max2 <= max1) {
  269. indx = 2;
  270. }
  271. if (max2 <= smlgrowth) {
  272. smlgrowth = max2;
  273. bestshift = rsigma;
  274. }
  275. }
  276. }
  277. /* If we are here, both the left and the right shift led to */
  278. /* element growth. If the element growth is moderate, then */
  279. /* we may still accept the representation, if it passes a */
  280. /* refined test for RRR. This test supposes that no NaN occurred. */
  281. /* Moreover, we use the refined RRR test only for isolated clusters. */
  282. if (clwdth < mingap / 128. && min(max1,max2) < fail2 && ! sawnan1 && !
  283. sawnan2) {
  284. dorrr1 = TRUE_;
  285. } else {
  286. dorrr1 = FALSE_;
  287. }
  288. tryrrr1 = TRUE_;
  289. if (tryrrr1 && dorrr1) {
  290. if (indx == 1) {
  291. tmp = (d__1 = dplus[*n], abs(d__1));
  292. znm2 = 1.;
  293. prod = 1.;
  294. oldp = 1.;
  295. for (i__ = *n - 1; i__ >= 1; --i__) {
  296. if (prod <= eps) {
  297. prod = dplus[i__ + 1] * work[*n + i__ + 1] / (dplus[i__] *
  298. work[*n + i__]) * oldp;
  299. } else {
  300. prod *= (d__1 = work[*n + i__], abs(d__1));
  301. }
  302. oldp = prod;
  303. /* Computing 2nd power */
  304. d__1 = prod;
  305. znm2 += d__1 * d__1;
  306. /* Computing MAX */
  307. d__2 = tmp, d__3 = (d__1 = dplus[i__] * prod, abs(d__1));
  308. tmp = max(d__2,d__3);
  309. /* L15: */
  310. }
  311. rrr1 = tmp / (*spdiam * sqrt(znm2));
  312. if (rrr1 <= 8.) {
  313. *sigma = lsigma;
  314. shift = 1;
  315. goto L100;
  316. }
  317. } else if (indx == 2) {
  318. tmp = (d__1 = work[*n], abs(d__1));
  319. znm2 = 1.;
  320. prod = 1.;
  321. oldp = 1.;
  322. for (i__ = *n - 1; i__ >= 1; --i__) {
  323. if (prod <= eps) {
  324. prod = work[i__ + 1] * lplus[i__ + 1] / (work[i__] *
  325. lplus[i__]) * oldp;
  326. } else {
  327. prod *= (d__1 = lplus[i__], abs(d__1));
  328. }
  329. oldp = prod;
  330. /* Computing 2nd power */
  331. d__1 = prod;
  332. znm2 += d__1 * d__1;
  333. /* Computing MAX */
  334. d__2 = tmp, d__3 = (d__1 = work[i__] * prod, abs(d__1));
  335. tmp = max(d__2,d__3);
  336. /* L16: */
  337. }
  338. rrr2 = tmp / (*spdiam * sqrt(znm2));
  339. if (rrr2 <= 8.) {
  340. *sigma = rsigma;
  341. shift = 2;
  342. goto L100;
  343. }
  344. }
  345. }
  346. L50:
  347. if (ktry < 1) {
  348. /* If we are here, both shifts failed also the RRR test. */
  349. /* Back off to the outside */
  350. /* Computing MAX */
  351. d__1 = lsigma - ldelta, d__2 = lsigma - ldmax;
  352. lsigma = max(d__1,d__2);
  353. /* Computing MIN */
  354. d__1 = rsigma + rdelta, d__2 = rsigma + rdmax;
  355. rsigma = min(d__1,d__2);
  356. ldelta *= 2.;
  357. rdelta *= 2.;
  358. ++ktry;
  359. goto L5;
  360. } else {
  361. /* None of the representations investigated satisfied our */
  362. /* criteria. Take the best one we found. */
  363. if (smlgrowth < fail || nofail) {
  364. lsigma = bestshift;
  365. rsigma = bestshift;
  366. forcer = TRUE_;
  367. goto L5;
  368. } else {
  369. *info = 1;
  370. return 0;
  371. }
  372. }
  373. L100:
  374. if (shift == 1) {
  375. } else if (shift == 2) {
  376. /* store new L and D back into DPLUS, LPLUS */
  377. _starpu_dcopy_(n, &work[1], &c__1, &dplus[1], &c__1);
  378. i__1 = *n - 1;
  379. _starpu_dcopy_(&i__1, &work[*n + 1], &c__1, &lplus[1], &c__1);
  380. }
  381. return 0;
  382. /* End of DLARRF */
  383. } /* _starpu_dlarrf_ */