dlag2.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357
  1. /* dlag2.f -- translated by f2c (version 20061008).
  2. You must link the resulting object file with libf2c:
  3. on Microsoft Windows system, link with libf2c.lib;
  4. on Linux or Unix systems, link with .../path/to/libf2c.a -lm
  5. or, if you install libf2c.a in a standard place, with -lf2c -lm
  6. -- in that order, at the end of the command line, as in
  7. cc *.o -lf2c -lm
  8. Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
  9. http://www.netlib.org/f2c/libf2c.zip
  10. */
  11. #include "f2c.h"
  12. #include "blaswrap.h"
  13. /* Subroutine */ int _starpu_dlag2_(doublereal *a, integer *lda, doublereal *b,
  14. integer *ldb, doublereal *safmin, doublereal *scale1, doublereal *
  15. scale2, doublereal *wr1, doublereal *wr2, doublereal *wi)
  16. {
  17. /* System generated locals */
  18. integer a_dim1, a_offset, b_dim1, b_offset;
  19. doublereal d__1, d__2, d__3, d__4, d__5, d__6;
  20. /* Builtin functions */
  21. double sqrt(doublereal), d_sign(doublereal *, doublereal *);
  22. /* Local variables */
  23. doublereal r__, c1, c2, c3, c4, c5, s1, s2, a11, a12, a21, a22, b11, b12,
  24. b22, pp, qq, ss, as11, as12, as22, sum, abi22, diff, bmin, wbig,
  25. wabs, wdet, binv11, binv22, discr, anorm, bnorm, bsize, shift,
  26. rtmin, rtmax, wsize, ascale, bscale, wscale, safmax, wsmall;
  27. /* -- LAPACK auxiliary routine (version 3.2) -- */
  28. /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
  29. /* November 2006 */
  30. /* .. Scalar Arguments .. */
  31. /* .. */
  32. /* .. Array Arguments .. */
  33. /* .. */
  34. /* Purpose */
  35. /* ======= */
  36. /* DLAG2 computes the eigenvalues of a 2 x 2 generalized eigenvalue */
  37. /* problem A - w B, with scaling as necessary to avoid over-/underflow. */
  38. /* The scaling factor "s" results in a modified eigenvalue equation */
  39. /* s A - w B */
  40. /* where s is a non-negative scaling factor chosen so that w, w B, */
  41. /* and s A do not overflow and, if possible, do not underflow, either. */
  42. /* Arguments */
  43. /* ========= */
  44. /* A (input) DOUBLE PRECISION array, dimension (LDA, 2) */
  45. /* On entry, the 2 x 2 matrix A. It is assumed that its 1-norm */
  46. /* is less than 1/SAFMIN. Entries less than */
  47. /* sqrt(SAFMIN)*norm(A) are subject to being treated as zero. */
  48. /* LDA (input) INTEGER */
  49. /* The leading dimension of the array A. LDA >= 2. */
  50. /* B (input) DOUBLE PRECISION array, dimension (LDB, 2) */
  51. /* On entry, the 2 x 2 upper triangular matrix B. It is */
  52. /* assumed that the one-norm of B is less than 1/SAFMIN. The */
  53. /* diagonals should be at least sqrt(SAFMIN) times the largest */
  54. /* element of B (in absolute value); if a diagonal is smaller */
  55. /* than that, then +/- sqrt(SAFMIN) will be used instead of */
  56. /* that diagonal. */
  57. /* LDB (input) INTEGER */
  58. /* The leading dimension of the array B. LDB >= 2. */
  59. /* SAFMIN (input) DOUBLE PRECISION */
  60. /* The smallest positive number s.t. 1/SAFMIN does not */
  61. /* overflow. (This should always be DLAMCH('S') -- it is an */
  62. /* argument in order to avoid having to call DLAMCH frequently.) */
  63. /* SCALE1 (output) DOUBLE PRECISION */
  64. /* A scaling factor used to avoid over-/underflow in the */
  65. /* eigenvalue equation which defines the first eigenvalue. If */
  66. /* the eigenvalues are complex, then the eigenvalues are */
  67. /* ( WR1 +/- WI i ) / SCALE1 (which may lie outside the */
  68. /* exponent range of the machine), SCALE1=SCALE2, and SCALE1 */
  69. /* will always be positive. If the eigenvalues are real, then */
  70. /* the first (real) eigenvalue is WR1 / SCALE1 , but this may */
  71. /* overflow or underflow, and in fact, SCALE1 may be zero or */
  72. /* less than the underflow threshhold if the exact eigenvalue */
  73. /* is sufficiently large. */
  74. /* SCALE2 (output) DOUBLE PRECISION */
  75. /* A scaling factor used to avoid over-/underflow in the */
  76. /* eigenvalue equation which defines the second eigenvalue. If */
  77. /* the eigenvalues are complex, then SCALE2=SCALE1. If the */
  78. /* eigenvalues are real, then the second (real) eigenvalue is */
  79. /* WR2 / SCALE2 , but this may overflow or underflow, and in */
  80. /* fact, SCALE2 may be zero or less than the underflow */
  81. /* threshhold if the exact eigenvalue is sufficiently large. */
  82. /* WR1 (output) DOUBLE PRECISION */
  83. /* If the eigenvalue is real, then WR1 is SCALE1 times the */
  84. /* eigenvalue closest to the (2,2) element of A B**(-1). If the */
  85. /* eigenvalue is complex, then WR1=WR2 is SCALE1 times the real */
  86. /* part of the eigenvalues. */
  87. /* WR2 (output) DOUBLE PRECISION */
  88. /* If the eigenvalue is real, then WR2 is SCALE2 times the */
  89. /* other eigenvalue. If the eigenvalue is complex, then */
  90. /* WR1=WR2 is SCALE1 times the real part of the eigenvalues. */
  91. /* WI (output) DOUBLE PRECISION */
  92. /* If the eigenvalue is real, then WI is zero. If the */
  93. /* eigenvalue is complex, then WI is SCALE1 times the imaginary */
  94. /* part of the eigenvalues. WI will always be non-negative. */
  95. /* ===================================================================== */
  96. /* .. Parameters .. */
  97. /* .. */
  98. /* .. Local Scalars .. */
  99. /* .. */
  100. /* .. Intrinsic Functions .. */
  101. /* .. */
  102. /* .. Executable Statements .. */
  103. /* Parameter adjustments */
  104. a_dim1 = *lda;
  105. a_offset = 1 + a_dim1;
  106. a -= a_offset;
  107. b_dim1 = *ldb;
  108. b_offset = 1 + b_dim1;
  109. b -= b_offset;
  110. /* Function Body */
  111. rtmin = sqrt(*safmin);
  112. rtmax = 1. / rtmin;
  113. safmax = 1. / *safmin;
  114. /* Scale A */
  115. /* Computing MAX */
  116. d__5 = (d__1 = a[a_dim1 + 1], abs(d__1)) + (d__2 = a[a_dim1 + 2], abs(
  117. d__2)), d__6 = (d__3 = a[(a_dim1 << 1) + 1], abs(d__3)) + (d__4 =
  118. a[(a_dim1 << 1) + 2], abs(d__4)), d__5 = max(d__5,d__6);
  119. anorm = max(d__5,*safmin);
  120. ascale = 1. / anorm;
  121. a11 = ascale * a[a_dim1 + 1];
  122. a21 = ascale * a[a_dim1 + 2];
  123. a12 = ascale * a[(a_dim1 << 1) + 1];
  124. a22 = ascale * a[(a_dim1 << 1) + 2];
  125. /* Perturb B if necessary to insure non-singularity */
  126. b11 = b[b_dim1 + 1];
  127. b12 = b[(b_dim1 << 1) + 1];
  128. b22 = b[(b_dim1 << 1) + 2];
  129. /* Computing MAX */
  130. d__1 = abs(b11), d__2 = abs(b12), d__1 = max(d__1,d__2), d__2 = abs(b22),
  131. d__1 = max(d__1,d__2);
  132. bmin = rtmin * max(d__1,rtmin);
  133. if (abs(b11) < bmin) {
  134. b11 = d_sign(&bmin, &b11);
  135. }
  136. if (abs(b22) < bmin) {
  137. b22 = d_sign(&bmin, &b22);
  138. }
  139. /* Scale B */
  140. /* Computing MAX */
  141. d__1 = abs(b11), d__2 = abs(b12) + abs(b22), d__1 = max(d__1,d__2);
  142. bnorm = max(d__1,*safmin);
  143. /* Computing MAX */
  144. d__1 = abs(b11), d__2 = abs(b22);
  145. bsize = max(d__1,d__2);
  146. bscale = 1. / bsize;
  147. b11 *= bscale;
  148. b12 *= bscale;
  149. b22 *= bscale;
  150. /* Compute larger eigenvalue by method described by C. van Loan */
  151. /* ( AS is A shifted by -SHIFT*B ) */
  152. binv11 = 1. / b11;
  153. binv22 = 1. / b22;
  154. s1 = a11 * binv11;
  155. s2 = a22 * binv22;
  156. if (abs(s1) <= abs(s2)) {
  157. as12 = a12 - s1 * b12;
  158. as22 = a22 - s1 * b22;
  159. ss = a21 * (binv11 * binv22);
  160. abi22 = as22 * binv22 - ss * b12;
  161. pp = abi22 * .5;
  162. shift = s1;
  163. } else {
  164. as12 = a12 - s2 * b12;
  165. as11 = a11 - s2 * b11;
  166. ss = a21 * (binv11 * binv22);
  167. abi22 = -ss * b12;
  168. pp = (as11 * binv11 + abi22) * .5;
  169. shift = s2;
  170. }
  171. qq = ss * as12;
  172. if ((d__1 = pp * rtmin, abs(d__1)) >= 1.) {
  173. /* Computing 2nd power */
  174. d__1 = rtmin * pp;
  175. discr = d__1 * d__1 + qq * *safmin;
  176. r__ = sqrt((abs(discr))) * rtmax;
  177. } else {
  178. /* Computing 2nd power */
  179. d__1 = pp;
  180. if (d__1 * d__1 + abs(qq) <= *safmin) {
  181. /* Computing 2nd power */
  182. d__1 = rtmax * pp;
  183. discr = d__1 * d__1 + qq * safmax;
  184. r__ = sqrt((abs(discr))) * rtmin;
  185. } else {
  186. /* Computing 2nd power */
  187. d__1 = pp;
  188. discr = d__1 * d__1 + qq;
  189. r__ = sqrt((abs(discr)));
  190. }
  191. }
  192. /* Note: the test of R in the following IF is to cover the case when */
  193. /* DISCR is small and negative and is flushed to zero during */
  194. /* the calculation of R. On machines which have a consistent */
  195. /* flush-to-zero threshhold and handle numbers above that */
  196. /* threshhold correctly, it would not be necessary. */
  197. if (discr >= 0. || r__ == 0.) {
  198. sum = pp + d_sign(&r__, &pp);
  199. diff = pp - d_sign(&r__, &pp);
  200. wbig = shift + sum;
  201. /* Compute smaller eigenvalue */
  202. wsmall = shift + diff;
  203. /* Computing MAX */
  204. d__1 = abs(wsmall);
  205. if (abs(wbig) * .5 > max(d__1,*safmin)) {
  206. wdet = (a11 * a22 - a12 * a21) * (binv11 * binv22);
  207. wsmall = wdet / wbig;
  208. }
  209. /* Choose (real) eigenvalue closest to 2,2 element of A*B**(-1) */
  210. /* for WR1. */
  211. if (pp > abi22) {
  212. *wr1 = min(wbig,wsmall);
  213. *wr2 = max(wbig,wsmall);
  214. } else {
  215. *wr1 = max(wbig,wsmall);
  216. *wr2 = min(wbig,wsmall);
  217. }
  218. *wi = 0.;
  219. } else {
  220. /* Complex eigenvalues */
  221. *wr1 = shift + pp;
  222. *wr2 = *wr1;
  223. *wi = r__;
  224. }
  225. /* Further scaling to avoid underflow and overflow in computing */
  226. /* SCALE1 and overflow in computing w*B. */
  227. /* This scale factor (WSCALE) is bounded from above using C1 and C2, */
  228. /* and from below using C3 and C4. */
  229. /* C1 implements the condition s A must never overflow. */
  230. /* C2 implements the condition w B must never overflow. */
  231. /* C3, with C2, */
  232. /* implement the condition that s A - w B must never overflow. */
  233. /* C4 implements the condition s should not underflow. */
  234. /* C5 implements the condition max(s,|w|) should be at least 2. */
  235. c1 = bsize * (*safmin * max(1.,ascale));
  236. c2 = *safmin * max(1.,bnorm);
  237. c3 = bsize * *safmin;
  238. if (ascale <= 1. && bsize <= 1.) {
  239. /* Computing MIN */
  240. d__1 = 1., d__2 = ascale / *safmin * bsize;
  241. c4 = min(d__1,d__2);
  242. } else {
  243. c4 = 1.;
  244. }
  245. if (ascale <= 1. || bsize <= 1.) {
  246. /* Computing MIN */
  247. d__1 = 1., d__2 = ascale * bsize;
  248. c5 = min(d__1,d__2);
  249. } else {
  250. c5 = 1.;
  251. }
  252. /* Scale first eigenvalue */
  253. wabs = abs(*wr1) + abs(*wi);
  254. /* Computing MAX */
  255. /* Computing MIN */
  256. d__3 = c4, d__4 = max(wabs,c5) * .5;
  257. d__1 = max(*safmin,c1), d__2 = (wabs * c2 + c3) * 1.0000100000000001,
  258. d__1 = max(d__1,d__2), d__2 = min(d__3,d__4);
  259. wsize = max(d__1,d__2);
  260. if (wsize != 1.) {
  261. wscale = 1. / wsize;
  262. if (wsize > 1.) {
  263. *scale1 = max(ascale,bsize) * wscale * min(ascale,bsize);
  264. } else {
  265. *scale1 = min(ascale,bsize) * wscale * max(ascale,bsize);
  266. }
  267. *wr1 *= wscale;
  268. if (*wi != 0.) {
  269. *wi *= wscale;
  270. *wr2 = *wr1;
  271. *scale2 = *scale1;
  272. }
  273. } else {
  274. *scale1 = ascale * bsize;
  275. *scale2 = *scale1;
  276. }
  277. /* Scale second eigenvalue (if real) */
  278. if (*wi == 0.) {
  279. /* Computing MAX */
  280. /* Computing MIN */
  281. /* Computing MAX */
  282. d__5 = abs(*wr2);
  283. d__3 = c4, d__4 = max(d__5,c5) * .5;
  284. d__1 = max(*safmin,c1), d__2 = (abs(*wr2) * c2 + c3) *
  285. 1.0000100000000001, d__1 = max(d__1,d__2), d__2 = min(d__3,
  286. d__4);
  287. wsize = max(d__1,d__2);
  288. if (wsize != 1.) {
  289. wscale = 1. / wsize;
  290. if (wsize > 1.) {
  291. *scale2 = max(ascale,bsize) * wscale * min(ascale,bsize);
  292. } else {
  293. *scale2 = min(ascale,bsize) * wscale * max(ascale,bsize);
  294. }
  295. *wr2 *= wscale;
  296. } else {
  297. *scale2 = ascale * bsize;
  298. }
  299. }
  300. /* End of DLAG2 */
  301. return 0;
  302. } /* _starpu_dlag2_ */