dlaneg.c 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219
  1. /* dlaneg.f -- translated by f2c (version 20061008).
  2. You must link the resulting object file with libf2c:
  3. on Microsoft Windows system, link with libf2c.lib;
  4. on Linux or Unix systems, link with .../path/to/libf2c.a -lm
  5. or, if you install libf2c.a in a standard place, with -lf2c -lm
  6. -- in that order, at the end of the command line, as in
  7. cc *.o -lf2c -lm
  8. Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
  9. http://www.netlib.org/f2c/libf2c.zip
  10. */
  11. #include "f2c.h"
  12. #include "blaswrap.h"
  13. integer _starpu_dlaneg_(integer *n, doublereal *d__, doublereal *lld, doublereal *
  14. sigma, doublereal *pivmin, integer *r__)
  15. {
  16. /* System generated locals */
  17. integer ret_val, i__1, i__2, i__3, i__4;
  18. /* Local variables */
  19. integer j;
  20. doublereal p, t;
  21. integer bj;
  22. doublereal tmp;
  23. integer neg1, neg2;
  24. doublereal bsav, gamma, dplus;
  25. extern logical _starpu_disnan_(doublereal *);
  26. integer negcnt;
  27. logical sawnan;
  28. doublereal dminus;
  29. /* -- LAPACK auxiliary routine (version 3.2) -- */
  30. /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
  31. /* November 2006 */
  32. /* .. Scalar Arguments .. */
  33. /* .. */
  34. /* .. Array Arguments .. */
  35. /* .. */
  36. /* Purpose */
  37. /* ======= */
  38. /* DLANEG computes the Sturm count, the number of negative pivots */
  39. /* encountered while factoring tridiagonal T - sigma I = L D L^T. */
  40. /* This implementation works directly on the factors without forming */
  41. /* the tridiagonal matrix T. The Sturm count is also the number of */
  42. /* eigenvalues of T less than sigma. */
  43. /* This routine is called from DLARRB. */
  44. /* The current routine does not use the PIVMIN parameter but rather */
  45. /* requires IEEE-754 propagation of Infinities and NaNs. This */
  46. /* routine also has no input range restrictions but does require */
  47. /* default exception handling such that x/0 produces Inf when x is */
  48. /* non-zero, and Inf/Inf produces NaN. For more information, see: */
  49. /* Marques, Riedy, and Voemel, "Benefits of IEEE-754 Features in */
  50. /* Modern Symmetric Tridiagonal Eigensolvers," SIAM Journal on */
  51. /* Scientific Computing, v28, n5, 2006. DOI 10.1137/050641624 */
  52. /* (Tech report version in LAWN 172 with the same title.) */
  53. /* Arguments */
  54. /* ========= */
  55. /* N (input) INTEGER */
  56. /* The order of the matrix. */
  57. /* D (input) DOUBLE PRECISION array, dimension (N) */
  58. /* The N diagonal elements of the diagonal matrix D. */
  59. /* LLD (input) DOUBLE PRECISION array, dimension (N-1) */
  60. /* The (N-1) elements L(i)*L(i)*D(i). */
  61. /* SIGMA (input) DOUBLE PRECISION */
  62. /* Shift amount in T - sigma I = L D L^T. */
  63. /* PIVMIN (input) DOUBLE PRECISION */
  64. /* The minimum pivot in the Sturm sequence. May be used */
  65. /* when zero pivots are encountered on non-IEEE-754 */
  66. /* architectures. */
  67. /* R (input) INTEGER */
  68. /* The twist index for the twisted factorization that is used */
  69. /* for the negcount. */
  70. /* Further Details */
  71. /* =============== */
  72. /* Based on contributions by */
  73. /* Osni Marques, LBNL/NERSC, USA */
  74. /* Christof Voemel, University of California, Berkeley, USA */
  75. /* Jason Riedy, University of California, Berkeley, USA */
  76. /* ===================================================================== */
  77. /* .. Parameters .. */
  78. /* Some architectures propagate Infinities and NaNs very slowly, so */
  79. /* the code computes counts in BLKLEN chunks. Then a NaN can */
  80. /* propagate at most BLKLEN columns before being detected. This is */
  81. /* not a general tuning parameter; it needs only to be just large */
  82. /* enough that the overhead is tiny in common cases. */
  83. /* .. */
  84. /* .. Local Scalars .. */
  85. /* .. */
  86. /* .. Intrinsic Functions .. */
  87. /* .. */
  88. /* .. External Functions .. */
  89. /* .. */
  90. /* .. Executable Statements .. */
  91. /* Parameter adjustments */
  92. --lld;
  93. --d__;
  94. /* Function Body */
  95. negcnt = 0;
  96. /* I) upper part: L D L^T - SIGMA I = L+ D+ L+^T */
  97. t = -(*sigma);
  98. i__1 = *r__ - 1;
  99. for (bj = 1; bj <= i__1; bj += 128) {
  100. neg1 = 0;
  101. bsav = t;
  102. /* Computing MIN */
  103. i__3 = bj + 127, i__4 = *r__ - 1;
  104. i__2 = min(i__3,i__4);
  105. for (j = bj; j <= i__2; ++j) {
  106. dplus = d__[j] + t;
  107. if (dplus < 0.) {
  108. ++neg1;
  109. }
  110. tmp = t / dplus;
  111. t = tmp * lld[j] - *sigma;
  112. /* L21: */
  113. }
  114. sawnan = _starpu_disnan_(&t);
  115. /* Run a slower version of the above loop if a NaN is detected. */
  116. /* A NaN should occur only with a zero pivot after an infinite */
  117. /* pivot. In that case, substituting 1 for T/DPLUS is the */
  118. /* correct limit. */
  119. if (sawnan) {
  120. neg1 = 0;
  121. t = bsav;
  122. /* Computing MIN */
  123. i__3 = bj + 127, i__4 = *r__ - 1;
  124. i__2 = min(i__3,i__4);
  125. for (j = bj; j <= i__2; ++j) {
  126. dplus = d__[j] + t;
  127. if (dplus < 0.) {
  128. ++neg1;
  129. }
  130. tmp = t / dplus;
  131. if (_starpu_disnan_(&tmp)) {
  132. tmp = 1.;
  133. }
  134. t = tmp * lld[j] - *sigma;
  135. /* L22: */
  136. }
  137. }
  138. negcnt += neg1;
  139. /* L210: */
  140. }
  141. /* II) lower part: L D L^T - SIGMA I = U- D- U-^T */
  142. p = d__[*n] - *sigma;
  143. i__1 = *r__;
  144. for (bj = *n - 1; bj >= i__1; bj += -128) {
  145. neg2 = 0;
  146. bsav = p;
  147. /* Computing MAX */
  148. i__3 = bj - 127;
  149. i__2 = max(i__3,*r__);
  150. for (j = bj; j >= i__2; --j) {
  151. dminus = lld[j] + p;
  152. if (dminus < 0.) {
  153. ++neg2;
  154. }
  155. tmp = p / dminus;
  156. p = tmp * d__[j] - *sigma;
  157. /* L23: */
  158. }
  159. sawnan = _starpu_disnan_(&p);
  160. /* As above, run a slower version that substitutes 1 for Inf/Inf. */
  161. if (sawnan) {
  162. neg2 = 0;
  163. p = bsav;
  164. /* Computing MAX */
  165. i__3 = bj - 127;
  166. i__2 = max(i__3,*r__);
  167. for (j = bj; j >= i__2; --j) {
  168. dminus = lld[j] + p;
  169. if (dminus < 0.) {
  170. ++neg2;
  171. }
  172. tmp = p / dminus;
  173. if (_starpu_disnan_(&tmp)) {
  174. tmp = 1.;
  175. }
  176. p = tmp * d__[j] - *sigma;
  177. /* L24: */
  178. }
  179. }
  180. negcnt += neg2;
  181. /* L230: */
  182. }
  183. /* III) Twist index */
  184. /* T was shifted by SIGMA initially. */
  185. gamma = t + *sigma + p;
  186. if (gamma < 0.) {
  187. ++negcnt;
  188. }
  189. ret_val = negcnt;
  190. return ret_val;
  191. } /* _starpu_dlaneg_ */