dlar1v.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442
  1. /* dlar1v.f -- translated by f2c (version 20061008).
  2. You must link the resulting object file with libf2c:
  3. on Microsoft Windows system, link with libf2c.lib;
  4. on Linux or Unix systems, link with .../path/to/libf2c.a -lm
  5. or, if you install libf2c.a in a standard place, with -lf2c -lm
  6. -- in that order, at the end of the command line, as in
  7. cc *.o -lf2c -lm
  8. Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
  9. http://www.netlib.org/f2c/libf2c.zip
  10. */
  11. #include "f2c.h"
  12. #include "blaswrap.h"
  13. /* Subroutine */ int _starpu_dlar1v_(integer *n, integer *b1, integer *bn, doublereal
  14. *lambda, doublereal *d__, doublereal *l, doublereal *ld, doublereal *
  15. lld, doublereal *pivmin, doublereal *gaptol, doublereal *z__, logical
  16. *wantnc, integer *negcnt, doublereal *ztz, doublereal *mingma,
  17. integer *r__, integer *isuppz, doublereal *nrminv, doublereal *resid,
  18. doublereal *rqcorr, doublereal *work)
  19. {
  20. /* System generated locals */
  21. integer i__1;
  22. doublereal d__1, d__2, d__3;
  23. /* Builtin functions */
  24. double sqrt(doublereal);
  25. /* Local variables */
  26. integer i__;
  27. doublereal s;
  28. integer r1, r2;
  29. doublereal eps, tmp;
  30. integer neg1, neg2, indp, inds;
  31. doublereal dplus;
  32. extern doublereal _starpu_dlamch_(char *);
  33. extern logical _starpu_disnan_(doublereal *);
  34. integer indlpl, indumn;
  35. doublereal dminus;
  36. logical sawnan1, sawnan2;
  37. /* -- LAPACK auxiliary routine (version 3.2) -- */
  38. /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
  39. /* November 2006 */
  40. /* .. Scalar Arguments .. */
  41. /* .. */
  42. /* .. Array Arguments .. */
  43. /* .. */
  44. /* Purpose */
  45. /* ======= */
  46. /* DLAR1V computes the (scaled) r-th column of the inverse of */
  47. /* the sumbmatrix in rows B1 through BN of the tridiagonal matrix */
  48. /* L D L^T - sigma I. When sigma is close to an eigenvalue, the */
  49. /* computed vector is an accurate eigenvector. Usually, r corresponds */
  50. /* to the index where the eigenvector is largest in magnitude. */
  51. /* The following steps accomplish this computation : */
  52. /* (a) Stationary qd transform, L D L^T - sigma I = L(+) D(+) L(+)^T, */
  53. /* (b) Progressive qd transform, L D L^T - sigma I = U(-) D(-) U(-)^T, */
  54. /* (c) Computation of the diagonal elements of the inverse of */
  55. /* L D L^T - sigma I by combining the above transforms, and choosing */
  56. /* r as the index where the diagonal of the inverse is (one of the) */
  57. /* largest in magnitude. */
  58. /* (d) Computation of the (scaled) r-th column of the inverse using the */
  59. /* twisted factorization obtained by combining the top part of the */
  60. /* the stationary and the bottom part of the progressive transform. */
  61. /* Arguments */
  62. /* ========= */
  63. /* N (input) INTEGER */
  64. /* The order of the matrix L D L^T. */
  65. /* B1 (input) INTEGER */
  66. /* First index of the submatrix of L D L^T. */
  67. /* BN (input) INTEGER */
  68. /* Last index of the submatrix of L D L^T. */
  69. /* LAMBDA (input) DOUBLE PRECISION */
  70. /* The shift. In order to compute an accurate eigenvector, */
  71. /* LAMBDA should be a good approximation to an eigenvalue */
  72. /* of L D L^T. */
  73. /* L (input) DOUBLE PRECISION array, dimension (N-1) */
  74. /* The (n-1) subdiagonal elements of the unit bidiagonal matrix */
  75. /* L, in elements 1 to N-1. */
  76. /* D (input) DOUBLE PRECISION array, dimension (N) */
  77. /* The n diagonal elements of the diagonal matrix D. */
  78. /* LD (input) DOUBLE PRECISION array, dimension (N-1) */
  79. /* The n-1 elements L(i)*D(i). */
  80. /* LLD (input) DOUBLE PRECISION array, dimension (N-1) */
  81. /* The n-1 elements L(i)*L(i)*D(i). */
  82. /* PIVMIN (input) DOUBLE PRECISION */
  83. /* The minimum pivot in the Sturm sequence. */
  84. /* GAPTOL (input) DOUBLE PRECISION */
  85. /* Tolerance that indicates when eigenvector entries are negligible */
  86. /* w.r.t. their contribution to the residual. */
  87. /* Z (input/output) DOUBLE PRECISION array, dimension (N) */
  88. /* On input, all entries of Z must be set to 0. */
  89. /* On output, Z contains the (scaled) r-th column of the */
  90. /* inverse. The scaling is such that Z(R) equals 1. */
  91. /* WANTNC (input) LOGICAL */
  92. /* Specifies whether NEGCNT has to be computed. */
  93. /* NEGCNT (output) INTEGER */
  94. /* If WANTNC is .TRUE. then NEGCNT = the number of pivots < pivmin */
  95. /* in the matrix factorization L D L^T, and NEGCNT = -1 otherwise. */
  96. /* ZTZ (output) DOUBLE PRECISION */
  97. /* The square of the 2-norm of Z. */
  98. /* MINGMA (output) DOUBLE PRECISION */
  99. /* The reciprocal of the largest (in magnitude) diagonal */
  100. /* element of the inverse of L D L^T - sigma I. */
  101. /* R (input/output) INTEGER */
  102. /* The twist index for the twisted factorization used to */
  103. /* compute Z. */
  104. /* On input, 0 <= R <= N. If R is input as 0, R is set to */
  105. /* the index where (L D L^T - sigma I)^{-1} is largest */
  106. /* in magnitude. If 1 <= R <= N, R is unchanged. */
  107. /* On output, R contains the twist index used to compute Z. */
  108. /* Ideally, R designates the position of the maximum entry in the */
  109. /* eigenvector. */
  110. /* ISUPPZ (output) INTEGER array, dimension (2) */
  111. /* The support of the vector in Z, i.e., the vector Z is */
  112. /* nonzero only in elements ISUPPZ(1) through ISUPPZ( 2 ). */
  113. /* NRMINV (output) DOUBLE PRECISION */
  114. /* NRMINV = 1/SQRT( ZTZ ) */
  115. /* RESID (output) DOUBLE PRECISION */
  116. /* The residual of the FP vector. */
  117. /* RESID = ABS( MINGMA )/SQRT( ZTZ ) */
  118. /* RQCORR (output) DOUBLE PRECISION */
  119. /* The Rayleigh Quotient correction to LAMBDA. */
  120. /* RQCORR = MINGMA*TMP */
  121. /* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) */
  122. /* Further Details */
  123. /* =============== */
  124. /* Based on contributions by */
  125. /* Beresford Parlett, University of California, Berkeley, USA */
  126. /* Jim Demmel, University of California, Berkeley, USA */
  127. /* Inderjit Dhillon, University of Texas, Austin, USA */
  128. /* Osni Marques, LBNL/NERSC, USA */
  129. /* Christof Voemel, University of California, Berkeley, USA */
  130. /* ===================================================================== */
  131. /* .. Parameters .. */
  132. /* .. */
  133. /* .. Local Scalars .. */
  134. /* .. */
  135. /* .. External Functions .. */
  136. /* .. */
  137. /* .. Intrinsic Functions .. */
  138. /* .. */
  139. /* .. Executable Statements .. */
  140. /* Parameter adjustments */
  141. --work;
  142. --isuppz;
  143. --z__;
  144. --lld;
  145. --ld;
  146. --l;
  147. --d__;
  148. /* Function Body */
  149. eps = _starpu_dlamch_("Precision");
  150. if (*r__ == 0) {
  151. r1 = *b1;
  152. r2 = *bn;
  153. } else {
  154. r1 = *r__;
  155. r2 = *r__;
  156. }
  157. /* Storage for LPLUS */
  158. indlpl = 0;
  159. /* Storage for UMINUS */
  160. indumn = *n;
  161. inds = (*n << 1) + 1;
  162. indp = *n * 3 + 1;
  163. if (*b1 == 1) {
  164. work[inds] = 0.;
  165. } else {
  166. work[inds + *b1 - 1] = lld[*b1 - 1];
  167. }
  168. /* Compute the stationary transform (using the differential form) */
  169. /* until the index R2. */
  170. sawnan1 = FALSE_;
  171. neg1 = 0;
  172. s = work[inds + *b1 - 1] - *lambda;
  173. i__1 = r1 - 1;
  174. for (i__ = *b1; i__ <= i__1; ++i__) {
  175. dplus = d__[i__] + s;
  176. work[indlpl + i__] = ld[i__] / dplus;
  177. if (dplus < 0.) {
  178. ++neg1;
  179. }
  180. work[inds + i__] = s * work[indlpl + i__] * l[i__];
  181. s = work[inds + i__] - *lambda;
  182. /* L50: */
  183. }
  184. sawnan1 = _starpu_disnan_(&s);
  185. if (sawnan1) {
  186. goto L60;
  187. }
  188. i__1 = r2 - 1;
  189. for (i__ = r1; i__ <= i__1; ++i__) {
  190. dplus = d__[i__] + s;
  191. work[indlpl + i__] = ld[i__] / dplus;
  192. work[inds + i__] = s * work[indlpl + i__] * l[i__];
  193. s = work[inds + i__] - *lambda;
  194. /* L51: */
  195. }
  196. sawnan1 = _starpu_disnan_(&s);
  197. L60:
  198. if (sawnan1) {
  199. /* Runs a slower version of the above loop if a NaN is detected */
  200. neg1 = 0;
  201. s = work[inds + *b1 - 1] - *lambda;
  202. i__1 = r1 - 1;
  203. for (i__ = *b1; i__ <= i__1; ++i__) {
  204. dplus = d__[i__] + s;
  205. if (abs(dplus) < *pivmin) {
  206. dplus = -(*pivmin);
  207. }
  208. work[indlpl + i__] = ld[i__] / dplus;
  209. if (dplus < 0.) {
  210. ++neg1;
  211. }
  212. work[inds + i__] = s * work[indlpl + i__] * l[i__];
  213. if (work[indlpl + i__] == 0.) {
  214. work[inds + i__] = lld[i__];
  215. }
  216. s = work[inds + i__] - *lambda;
  217. /* L70: */
  218. }
  219. i__1 = r2 - 1;
  220. for (i__ = r1; i__ <= i__1; ++i__) {
  221. dplus = d__[i__] + s;
  222. if (abs(dplus) < *pivmin) {
  223. dplus = -(*pivmin);
  224. }
  225. work[indlpl + i__] = ld[i__] / dplus;
  226. work[inds + i__] = s * work[indlpl + i__] * l[i__];
  227. if (work[indlpl + i__] == 0.) {
  228. work[inds + i__] = lld[i__];
  229. }
  230. s = work[inds + i__] - *lambda;
  231. /* L71: */
  232. }
  233. }
  234. /* Compute the progressive transform (using the differential form) */
  235. /* until the index R1 */
  236. sawnan2 = FALSE_;
  237. neg2 = 0;
  238. work[indp + *bn - 1] = d__[*bn] - *lambda;
  239. i__1 = r1;
  240. for (i__ = *bn - 1; i__ >= i__1; --i__) {
  241. dminus = lld[i__] + work[indp + i__];
  242. tmp = d__[i__] / dminus;
  243. if (dminus < 0.) {
  244. ++neg2;
  245. }
  246. work[indumn + i__] = l[i__] * tmp;
  247. work[indp + i__ - 1] = work[indp + i__] * tmp - *lambda;
  248. /* L80: */
  249. }
  250. tmp = work[indp + r1 - 1];
  251. sawnan2 = _starpu_disnan_(&tmp);
  252. if (sawnan2) {
  253. /* Runs a slower version of the above loop if a NaN is detected */
  254. neg2 = 0;
  255. i__1 = r1;
  256. for (i__ = *bn - 1; i__ >= i__1; --i__) {
  257. dminus = lld[i__] + work[indp + i__];
  258. if (abs(dminus) < *pivmin) {
  259. dminus = -(*pivmin);
  260. }
  261. tmp = d__[i__] / dminus;
  262. if (dminus < 0.) {
  263. ++neg2;
  264. }
  265. work[indumn + i__] = l[i__] * tmp;
  266. work[indp + i__ - 1] = work[indp + i__] * tmp - *lambda;
  267. if (tmp == 0.) {
  268. work[indp + i__ - 1] = d__[i__] - *lambda;
  269. }
  270. /* L100: */
  271. }
  272. }
  273. /* Find the index (from R1 to R2) of the largest (in magnitude) */
  274. /* diagonal element of the inverse */
  275. *mingma = work[inds + r1 - 1] + work[indp + r1 - 1];
  276. if (*mingma < 0.) {
  277. ++neg1;
  278. }
  279. if (*wantnc) {
  280. *negcnt = neg1 + neg2;
  281. } else {
  282. *negcnt = -1;
  283. }
  284. if (abs(*mingma) == 0.) {
  285. *mingma = eps * work[inds + r1 - 1];
  286. }
  287. *r__ = r1;
  288. i__1 = r2 - 1;
  289. for (i__ = r1; i__ <= i__1; ++i__) {
  290. tmp = work[inds + i__] + work[indp + i__];
  291. if (tmp == 0.) {
  292. tmp = eps * work[inds + i__];
  293. }
  294. if (abs(tmp) <= abs(*mingma)) {
  295. *mingma = tmp;
  296. *r__ = i__ + 1;
  297. }
  298. /* L110: */
  299. }
  300. /* Compute the FP vector: solve N^T v = e_r */
  301. isuppz[1] = *b1;
  302. isuppz[2] = *bn;
  303. z__[*r__] = 1.;
  304. *ztz = 1.;
  305. /* Compute the FP vector upwards from R */
  306. if (! sawnan1 && ! sawnan2) {
  307. i__1 = *b1;
  308. for (i__ = *r__ - 1; i__ >= i__1; --i__) {
  309. z__[i__] = -(work[indlpl + i__] * z__[i__ + 1]);
  310. if (((d__1 = z__[i__], abs(d__1)) + (d__2 = z__[i__ + 1], abs(
  311. d__2))) * (d__3 = ld[i__], abs(d__3)) < *gaptol) {
  312. z__[i__] = 0.;
  313. isuppz[1] = i__ + 1;
  314. goto L220;
  315. }
  316. *ztz += z__[i__] * z__[i__];
  317. /* L210: */
  318. }
  319. L220:
  320. ;
  321. } else {
  322. /* Run slower loop if NaN occurred. */
  323. i__1 = *b1;
  324. for (i__ = *r__ - 1; i__ >= i__1; --i__) {
  325. if (z__[i__ + 1] == 0.) {
  326. z__[i__] = -(ld[i__ + 1] / ld[i__]) * z__[i__ + 2];
  327. } else {
  328. z__[i__] = -(work[indlpl + i__] * z__[i__ + 1]);
  329. }
  330. if (((d__1 = z__[i__], abs(d__1)) + (d__2 = z__[i__ + 1], abs(
  331. d__2))) * (d__3 = ld[i__], abs(d__3)) < *gaptol) {
  332. z__[i__] = 0.;
  333. isuppz[1] = i__ + 1;
  334. goto L240;
  335. }
  336. *ztz += z__[i__] * z__[i__];
  337. /* L230: */
  338. }
  339. L240:
  340. ;
  341. }
  342. /* Compute the FP vector downwards from R in blocks of size BLKSIZ */
  343. if (! sawnan1 && ! sawnan2) {
  344. i__1 = *bn - 1;
  345. for (i__ = *r__; i__ <= i__1; ++i__) {
  346. z__[i__ + 1] = -(work[indumn + i__] * z__[i__]);
  347. if (((d__1 = z__[i__], abs(d__1)) + (d__2 = z__[i__ + 1], abs(
  348. d__2))) * (d__3 = ld[i__], abs(d__3)) < *gaptol) {
  349. z__[i__ + 1] = 0.;
  350. isuppz[2] = i__;
  351. goto L260;
  352. }
  353. *ztz += z__[i__ + 1] * z__[i__ + 1];
  354. /* L250: */
  355. }
  356. L260:
  357. ;
  358. } else {
  359. /* Run slower loop if NaN occurred. */
  360. i__1 = *bn - 1;
  361. for (i__ = *r__; i__ <= i__1; ++i__) {
  362. if (z__[i__] == 0.) {
  363. z__[i__ + 1] = -(ld[i__ - 1] / ld[i__]) * z__[i__ - 1];
  364. } else {
  365. z__[i__ + 1] = -(work[indumn + i__] * z__[i__]);
  366. }
  367. if (((d__1 = z__[i__], abs(d__1)) + (d__2 = z__[i__ + 1], abs(
  368. d__2))) * (d__3 = ld[i__], abs(d__3)) < *gaptol) {
  369. z__[i__ + 1] = 0.;
  370. isuppz[2] = i__;
  371. goto L280;
  372. }
  373. *ztz += z__[i__ + 1] * z__[i__ + 1];
  374. /* L270: */
  375. }
  376. L280:
  377. ;
  378. }
  379. /* Compute quantities for convergence test */
  380. tmp = 1. / *ztz;
  381. *nrminv = sqrt(tmp);
  382. *resid = abs(*mingma) * *nrminv;
  383. *rqcorr = *mingma * tmp;
  384. return 0;
  385. /* End of DLAR1V */
  386. } /* _starpu_dlar1v_ */