dla_syrpvgrw.c 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331
  1. /* _starpu_dla_syrpvgrw.f -- translated by f2c (version 20061008).
  2. You must link the resulting object file with libf2c:
  3. on Microsoft Windows system, link with libf2c.lib;
  4. on Linux or Unix systems, link with .../path/to/libf2c.a -lm
  5. or, if you install libf2c.a in a standard place, with -lf2c -lm
  6. -- in that order, at the end of the command line, as in
  7. cc *.o -lf2c -lm
  8. Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
  9. http://www.netlib.org/f2c/libf2c.zip
  10. */
  11. #include "f2c.h"
  12. #include "blaswrap.h"
  13. doublereal _starpu_dla_syrpvgrw__(char *uplo, integer *n, integer *info, doublereal *
  14. a, integer *lda, doublereal *af, integer *ldaf, integer *ipiv,
  15. doublereal *work, ftnlen uplo_len)
  16. {
  17. /* System generated locals */
  18. integer a_dim1, a_offset, af_dim1, af_offset, i__1, i__2;
  19. doublereal ret_val, d__1, d__2, d__3;
  20. /* Local variables */
  21. integer i__, j, k, kp;
  22. doublereal tmp, amax, umax;
  23. extern logical _starpu_lsame_(char *, char *);
  24. integer ncols;
  25. logical upper;
  26. doublereal rpvgrw;
  27. /* -- LAPACK routine (version 3.2.1) -- */
  28. /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */
  29. /* -- Jason Riedy of Univ. of California Berkeley. -- */
  30. /* -- April 2009 -- */
  31. /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
  32. /* -- Univ. of California Berkeley and NAG Ltd. -- */
  33. /* .. */
  34. /* .. Scalar Arguments .. */
  35. /* .. */
  36. /* .. Array Arguments .. */
  37. /* .. */
  38. /* Purpose */
  39. /* ======= */
  40. /* DLA_SYRPVGRW computes the reciprocal pivot growth factor */
  41. /* norm(A)/norm(U). The "max absolute element" norm is used. If this is */
  42. /* much less than 1, the stability of the LU factorization of the */
  43. /* (equilibrated) matrix A could be poor. This also means that the */
  44. /* solution X, estimated condition numbers, and error bounds could be */
  45. /* unreliable. */
  46. /* Arguments */
  47. /* ========= */
  48. /* UPLO (input) CHARACTER*1 */
  49. /* = 'U': Upper triangle of A is stored; */
  50. /* = 'L': Lower triangle of A is stored. */
  51. /* N (input) INTEGER */
  52. /* The number of linear equations, i.e., the order of the */
  53. /* matrix A. N >= 0. */
  54. /* INFO (input) INTEGER */
  55. /* The value of INFO returned from DSYTRF, .i.e., the pivot in */
  56. /* column INFO is exactly 0. */
  57. /* NCOLS (input) INTEGER */
  58. /* The number of columns of the matrix A. NCOLS >= 0. */
  59. /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */
  60. /* On entry, the N-by-N matrix A. */
  61. /* LDA (input) INTEGER */
  62. /* The leading dimension of the array A. LDA >= max(1,N). */
  63. /* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */
  64. /* The block diagonal matrix D and the multipliers used to */
  65. /* obtain the factor U or L as computed by DSYTRF. */
  66. /* LDAF (input) INTEGER */
  67. /* The leading dimension of the array AF. LDAF >= max(1,N). */
  68. /* IPIV (input) INTEGER array, dimension (N) */
  69. /* Details of the interchanges and the block structure of D */
  70. /* as determined by DSYTRF. */
  71. /* WORK (input) DOUBLE PRECISION array, dimension (2*N) */
  72. /* ===================================================================== */
  73. /* .. Local Scalars .. */
  74. /* .. */
  75. /* .. Intrinsic Functions .. */
  76. /* .. */
  77. /* .. External Functions .. */
  78. /* .. */
  79. /* .. Executable Statements .. */
  80. /* Parameter adjustments */
  81. a_dim1 = *lda;
  82. a_offset = 1 + a_dim1;
  83. a -= a_offset;
  84. af_dim1 = *ldaf;
  85. af_offset = 1 + af_dim1;
  86. af -= af_offset;
  87. --ipiv;
  88. --work;
  89. /* Function Body */
  90. upper = _starpu_lsame_("Upper", uplo);
  91. if (*info == 0) {
  92. if (upper) {
  93. ncols = 1;
  94. } else {
  95. ncols = *n;
  96. }
  97. } else {
  98. ncols = *info;
  99. }
  100. rpvgrw = 1.;
  101. i__1 = *n << 1;
  102. for (i__ = 1; i__ <= i__1; ++i__) {
  103. work[i__] = 0.;
  104. }
  105. /* Find the max magnitude entry of each column of A. Compute the max */
  106. /* for all N columns so we can apply the pivot permutation while */
  107. /* looping below. Assume a full factorization is the common case. */
  108. if (upper) {
  109. i__1 = *n;
  110. for (j = 1; j <= i__1; ++j) {
  111. i__2 = j;
  112. for (i__ = 1; i__ <= i__2; ++i__) {
  113. /* Computing MAX */
  114. d__2 = (d__1 = a[i__ + j * a_dim1], abs(d__1)), d__3 = work[*
  115. n + i__];
  116. work[*n + i__] = max(d__2,d__3);
  117. /* Computing MAX */
  118. d__2 = (d__1 = a[i__ + j * a_dim1], abs(d__1)), d__3 = work[*
  119. n + j];
  120. work[*n + j] = max(d__2,d__3);
  121. }
  122. }
  123. } else {
  124. i__1 = *n;
  125. for (j = 1; j <= i__1; ++j) {
  126. i__2 = *n;
  127. for (i__ = j; i__ <= i__2; ++i__) {
  128. /* Computing MAX */
  129. d__2 = (d__1 = a[i__ + j * a_dim1], abs(d__1)), d__3 = work[*
  130. n + i__];
  131. work[*n + i__] = max(d__2,d__3);
  132. /* Computing MAX */
  133. d__2 = (d__1 = a[i__ + j * a_dim1], abs(d__1)), d__3 = work[*
  134. n + j];
  135. work[*n + j] = max(d__2,d__3);
  136. }
  137. }
  138. }
  139. /* Now find the max magnitude entry of each column of U or L. Also */
  140. /* permute the magnitudes of A above so they're in the same order as */
  141. /* the factor. */
  142. /* The iteration orders and permutations were copied from dsytrs. */
  143. /* Calls to SSWAP would be severe overkill. */
  144. if (upper) {
  145. k = *n;
  146. while(k < ncols && k > 0) {
  147. if (ipiv[k] > 0) {
  148. /* 1x1 pivot */
  149. kp = ipiv[k];
  150. if (kp != k) {
  151. tmp = work[*n + k];
  152. work[*n + k] = work[*n + kp];
  153. work[*n + kp] = tmp;
  154. }
  155. i__1 = k;
  156. for (i__ = 1; i__ <= i__1; ++i__) {
  157. /* Computing MAX */
  158. d__2 = (d__1 = af[i__ + k * af_dim1], abs(d__1)), d__3 =
  159. work[k];
  160. work[k] = max(d__2,d__3);
  161. }
  162. --k;
  163. } else {
  164. /* 2x2 pivot */
  165. kp = -ipiv[k];
  166. tmp = work[*n + k - 1];
  167. work[*n + k - 1] = work[*n + kp];
  168. work[*n + kp] = tmp;
  169. i__1 = k - 1;
  170. for (i__ = 1; i__ <= i__1; ++i__) {
  171. /* Computing MAX */
  172. d__2 = (d__1 = af[i__ + k * af_dim1], abs(d__1)), d__3 =
  173. work[k];
  174. work[k] = max(d__2,d__3);
  175. /* Computing MAX */
  176. d__2 = (d__1 = af[i__ + (k - 1) * af_dim1], abs(d__1)),
  177. d__3 = work[k - 1];
  178. work[k - 1] = max(d__2,d__3);
  179. }
  180. /* Computing MAX */
  181. d__2 = (d__1 = af[k + k * af_dim1], abs(d__1)), d__3 = work[k]
  182. ;
  183. work[k] = max(d__2,d__3);
  184. k += -2;
  185. }
  186. }
  187. k = ncols;
  188. while(k <= *n) {
  189. if (ipiv[k] > 0) {
  190. kp = ipiv[k];
  191. if (kp != k) {
  192. tmp = work[*n + k];
  193. work[*n + k] = work[*n + kp];
  194. work[*n + kp] = tmp;
  195. }
  196. ++k;
  197. } else {
  198. kp = -ipiv[k];
  199. tmp = work[*n + k];
  200. work[*n + k] = work[*n + kp];
  201. work[*n + kp] = tmp;
  202. k += 2;
  203. }
  204. }
  205. } else {
  206. k = 1;
  207. while(k <= ncols) {
  208. if (ipiv[k] > 0) {
  209. /* 1x1 pivot */
  210. kp = ipiv[k];
  211. if (kp != k) {
  212. tmp = work[*n + k];
  213. work[*n + k] = work[*n + kp];
  214. work[*n + kp] = tmp;
  215. }
  216. i__1 = *n;
  217. for (i__ = k; i__ <= i__1; ++i__) {
  218. /* Computing MAX */
  219. d__2 = (d__1 = af[i__ + k * af_dim1], abs(d__1)), d__3 =
  220. work[k];
  221. work[k] = max(d__2,d__3);
  222. }
  223. ++k;
  224. } else {
  225. /* 2x2 pivot */
  226. kp = -ipiv[k];
  227. tmp = work[*n + k + 1];
  228. work[*n + k + 1] = work[*n + kp];
  229. work[*n + kp] = tmp;
  230. i__1 = *n;
  231. for (i__ = k + 1; i__ <= i__1; ++i__) {
  232. /* Computing MAX */
  233. d__2 = (d__1 = af[i__ + k * af_dim1], abs(d__1)), d__3 =
  234. work[k];
  235. work[k] = max(d__2,d__3);
  236. /* Computing MAX */
  237. d__2 = (d__1 = af[i__ + (k + 1) * af_dim1], abs(d__1)),
  238. d__3 = work[k + 1];
  239. work[k + 1] = max(d__2,d__3);
  240. }
  241. /* Computing MAX */
  242. d__2 = (d__1 = af[k + k * af_dim1], abs(d__1)), d__3 = work[k]
  243. ;
  244. work[k] = max(d__2,d__3);
  245. k += 2;
  246. }
  247. }
  248. k = ncols;
  249. while(k >= 1) {
  250. if (ipiv[k] > 0) {
  251. kp = ipiv[k];
  252. if (kp != k) {
  253. tmp = work[*n + k];
  254. work[*n + k] = work[*n + kp];
  255. work[*n + kp] = tmp;
  256. }
  257. --k;
  258. } else {
  259. kp = -ipiv[k];
  260. tmp = work[*n + k];
  261. work[*n + k] = work[*n + kp];
  262. work[*n + kp] = tmp;
  263. k += -2;
  264. }
  265. }
  266. }
  267. /* Compute the *inverse* of the max element growth factor. Dividing */
  268. /* by zero would imply the largest entry of the factor's column is */
  269. /* zero. Than can happen when either the column of A is zero or */
  270. /* massive pivots made the factor underflow to zero. Neither counts */
  271. /* as growth in itself, so simply ignore terms with zero */
  272. /* denominators. */
  273. if (upper) {
  274. i__1 = *n;
  275. for (i__ = ncols; i__ <= i__1; ++i__) {
  276. umax = work[i__];
  277. amax = work[*n + i__];
  278. if (umax != 0.) {
  279. /* Computing MIN */
  280. d__1 = amax / umax;
  281. rpvgrw = min(d__1,rpvgrw);
  282. }
  283. }
  284. } else {
  285. i__1 = ncols;
  286. for (i__ = 1; i__ <= i__1; ++i__) {
  287. umax = work[i__];
  288. amax = work[*n + i__];
  289. if (umax != 0.) {
  290. /* Computing MIN */
  291. d__1 = amax / umax;
  292. rpvgrw = min(d__1,rpvgrw);
  293. }
  294. }
  295. }
  296. ret_val = rpvgrw;
  297. return ret_val;
  298. } /* _starpu_dla_syrpvgrw__ */