iparmq.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. /* iparmq.f -- translated by f2c (version 20061008).
  2. You must link the resulting object file with libf2c:
  3. on Microsoft Windows system, link with libf2c.lib;
  4. on Linux or Unix systems, link with .../path/to/libf2c.a -lm
  5. or, if you install libf2c.a in a standard place, with -lf2c -lm
  6. -- in that order, at the end of the command line, as in
  7. cc *.o -lf2c -lm
  8. Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
  9. http://www.netlib.org/f2c/libf2c.zip
  10. */
  11. #include "f2c.h"
  12. #include "blaswrap.h"
  13. integer _starpu_iparmq_(integer *ispec, char *name__, char *opts, integer *n, integer
  14. *ilo, integer *ihi, integer *lwork)
  15. {
  16. /* System generated locals */
  17. integer ret_val, i__1, i__2;
  18. real r__1;
  19. /* Builtin functions */
  20. double log(doublereal);
  21. integer i_nint(real *);
  22. /* Local variables */
  23. integer nh, ns;
  24. /* -- LAPACK auxiliary routine (version 3.2) -- */
  25. /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
  26. /* November 2006 */
  27. /* .. Scalar Arguments .. */
  28. /* Purpose */
  29. /* ======= */
  30. /* This program sets problem and machine dependent parameters */
  31. /* useful for xHSEQR and its subroutines. It is called whenever */
  32. /* ILAENV is called with 12 <= ISPEC <= 16 */
  33. /* Arguments */
  34. /* ========= */
  35. /* ISPEC (input) integer scalar */
  36. /* ISPEC specifies which tunable parameter IPARMQ should */
  37. /* return. */
  38. /* ISPEC=12: (INMIN) Matrices of order nmin or less */
  39. /* are sent directly to xLAHQR, the implicit */
  40. /* double shift QR algorithm. NMIN must be */
  41. /* at least 11. */
  42. /* ISPEC=13: (INWIN) Size of the deflation window. */
  43. /* This is best set greater than or equal to */
  44. /* the number of simultaneous shifts NS. */
  45. /* Larger matrices benefit from larger deflation */
  46. /* windows. */
  47. /* ISPEC=14: (INIBL) Determines when to stop nibbling and */
  48. /* invest in an (expensive) multi-shift QR sweep. */
  49. /* If the aggressive early deflation subroutine */
  50. /* finds LD converged eigenvalues from an order */
  51. /* NW deflation window and LD.GT.(NW*NIBBLE)/100, */
  52. /* then the next QR sweep is skipped and early */
  53. /* deflation is applied immediately to the */
  54. /* remaining active diagonal block. Setting */
  55. /* IPARMQ(ISPEC=14) = 0 causes TTQRE to skip a */
  56. /* multi-shift QR sweep whenever early deflation */
  57. /* finds a converged eigenvalue. Setting */
  58. /* IPARMQ(ISPEC=14) greater than or equal to 100 */
  59. /* prevents TTQRE from skipping a multi-shift */
  60. /* QR sweep. */
  61. /* ISPEC=15: (NSHFTS) The number of simultaneous shifts in */
  62. /* a multi-shift QR iteration. */
  63. /* ISPEC=16: (IACC22) IPARMQ is set to 0, 1 or 2 with the */
  64. /* following meanings. */
  65. /* 0: During the multi-shift QR sweep, */
  66. /* xLAQR5 does not accumulate reflections and */
  67. /* does not use matrix-matrix multiply to */
  68. /* update the far-from-diagonal matrix */
  69. /* entries. */
  70. /* 1: During the multi-shift QR sweep, */
  71. /* xLAQR5 and/or xLAQRaccumulates reflections and uses */
  72. /* matrix-matrix multiply to update the */
  73. /* far-from-diagonal matrix entries. */
  74. /* 2: During the multi-shift QR sweep. */
  75. /* xLAQR5 accumulates reflections and takes */
  76. /* advantage of 2-by-2 block structure during */
  77. /* matrix-matrix multiplies. */
  78. /* (If xTRMM is slower than xGEMM, then */
  79. /* IPARMQ(ISPEC=16)=1 may be more efficient than */
  80. /* IPARMQ(ISPEC=16)=2 despite the greater level of */
  81. /* arithmetic work implied by the latter choice.) */
  82. /* NAME (input) character string */
  83. /* Name of the calling subroutine */
  84. /* OPTS (input) character string */
  85. /* This is a concatenation of the string arguments to */
  86. /* TTQRE. */
  87. /* N (input) integer scalar */
  88. /* N is the order of the Hessenberg matrix H. */
  89. /* ILO (input) INTEGER */
  90. /* IHI (input) INTEGER */
  91. /* It is assumed that H is already upper triangular */
  92. /* in rows and columns 1:ILO-1 and IHI+1:N. */
  93. /* LWORK (input) integer scalar */
  94. /* The amount of workspace available. */
  95. /* Further Details */
  96. /* =============== */
  97. /* Little is known about how best to choose these parameters. */
  98. /* It is possible to use different values of the parameters */
  99. /* for each of CHSEQR, DHSEQR, SHSEQR and ZHSEQR. */
  100. /* It is probably best to choose different parameters for */
  101. /* different matrices and different parameters at different */
  102. /* times during the iteration, but this has not been */
  103. /* implemented --- yet. */
  104. /* The best choices of most of the parameters depend */
  105. /* in an ill-understood way on the relative execution */
  106. /* rate of xLAQR3 and xLAQR5 and on the nature of each */
  107. /* particular eigenvalue problem. Experiment may be the */
  108. /* only practical way to determine which choices are most */
  109. /* effective. */
  110. /* Following is a list of default values supplied by IPARMQ. */
  111. /* These defaults may be adjusted in order to attain better */
  112. /* performance in any particular computational environment. */
  113. /* IPARMQ(ISPEC=12) The xLAHQR vs xLAQR0 crossover point. */
  114. /* Default: 75. (Must be at least 11.) */
  115. /* IPARMQ(ISPEC=13) Recommended deflation window size. */
  116. /* This depends on ILO, IHI and NS, the */
  117. /* number of simultaneous shifts returned */
  118. /* by IPARMQ(ISPEC=15). The default for */
  119. /* (IHI-ILO+1).LE.500 is NS. The default */
  120. /* for (IHI-ILO+1).GT.500 is 3*NS/2. */
  121. /* IPARMQ(ISPEC=14) Nibble crossover point. Default: 14. */
  122. /* IPARMQ(ISPEC=15) Number of simultaneous shifts, NS. */
  123. /* a multi-shift QR iteration. */
  124. /* If IHI-ILO+1 is ... */
  125. /* greater than ...but less ... the */
  126. /* or equal to ... than default is */
  127. /* 0 30 NS = 2+ */
  128. /* 30 60 NS = 4+ */
  129. /* 60 150 NS = 10 */
  130. /* 150 590 NS = ** */
  131. /* 590 3000 NS = 64 */
  132. /* 3000 6000 NS = 128 */
  133. /* 6000 infinity NS = 256 */
  134. /* (+) By default matrices of this order are */
  135. /* passed to the implicit double shift routine */
  136. /* xLAHQR. See IPARMQ(ISPEC=12) above. These */
  137. /* values of NS are used only in case of a rare */
  138. /* xLAHQR failure. */
  139. /* (**) The asterisks (**) indicate an ad-hoc */
  140. /* function increasing from 10 to 64. */
  141. /* IPARMQ(ISPEC=16) Select structured matrix multiply. */
  142. /* (See ISPEC=16 above for details.) */
  143. /* Default: 3. */
  144. /* ================================================================ */
  145. /* .. Parameters .. */
  146. /* .. */
  147. /* .. Local Scalars .. */
  148. /* .. */
  149. /* .. Intrinsic Functions .. */
  150. /* .. */
  151. /* .. Executable Statements .. */
  152. if (*ispec == 15 || *ispec == 13 || *ispec == 16) {
  153. /* ==== Set the number simultaneous shifts ==== */
  154. nh = *ihi - *ilo + 1;
  155. ns = 2;
  156. if (nh >= 30) {
  157. ns = 4;
  158. }
  159. if (nh >= 60) {
  160. ns = 10;
  161. }
  162. if (nh >= 150) {
  163. /* Computing MAX */
  164. r__1 = log((real) nh) / log(2.f);
  165. i__1 = 10, i__2 = nh / i_nint(&r__1);
  166. ns = max(i__1,i__2);
  167. }
  168. if (nh >= 590) {
  169. ns = 64;
  170. }
  171. if (nh >= 3000) {
  172. ns = 128;
  173. }
  174. if (nh >= 6000) {
  175. ns = 256;
  176. }
  177. /* Computing MAX */
  178. i__1 = 2, i__2 = ns - ns % 2;
  179. ns = max(i__1,i__2);
  180. }
  181. if (*ispec == 12) {
  182. /* ===== Matrices of order smaller than NMIN get sent */
  183. /* . to xLAHQR, the classic double shift algorithm. */
  184. /* . This must be at least 11. ==== */
  185. ret_val = 75;
  186. } else if (*ispec == 14) {
  187. /* ==== INIBL: skip a multi-shift qr iteration and */
  188. /* . whenever aggressive early deflation finds */
  189. /* . at least (NIBBLE*(window size)/100) deflations. ==== */
  190. ret_val = 14;
  191. } else if (*ispec == 15) {
  192. /* ==== NSHFTS: The number of simultaneous shifts ===== */
  193. ret_val = ns;
  194. } else if (*ispec == 13) {
  195. /* ==== NW: deflation window size. ==== */
  196. if (nh <= 500) {
  197. ret_val = ns;
  198. } else {
  199. ret_val = ns * 3 / 2;
  200. }
  201. } else if (*ispec == 16) {
  202. /* ==== IACC22: Whether to accumulate reflections */
  203. /* . before updating the far-from-diagonal elements */
  204. /* . and whether to use 2-by-2 block structure while */
  205. /* . doing it. A small amount of work could be saved */
  206. /* . by making this choice dependent also upon the */
  207. /* . NH=IHI-ILO+1. */
  208. ret_val = 0;
  209. if (ns >= 14) {
  210. ret_val = 1;
  211. }
  212. if (ns >= 14) {
  213. ret_val = 2;
  214. }
  215. } else {
  216. /* ===== invalid value of ispec ===== */
  217. ret_val = -1;
  218. }
  219. /* ==== End of IPARMQ ==== */
  220. return ret_val;
  221. } /* _starpu_iparmq_ */