dlaeda.c 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. /* dlaeda.f -- translated by f2c (version 20061008).
  2. You must link the resulting object file with libf2c:
  3. on Microsoft Windows system, link with libf2c.lib;
  4. on Linux or Unix systems, link with .../path/to/libf2c.a -lm
  5. or, if you install libf2c.a in a standard place, with -lf2c -lm
  6. -- in that order, at the end of the command line, as in
  7. cc *.o -lf2c -lm
  8. Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
  9. http://www.netlib.org/f2c/libf2c.zip
  10. */
  11. #include "f2c.h"
  12. #include "blaswrap.h"
  13. /* Table of constant values */
  14. static integer c__2 = 2;
  15. static integer c__1 = 1;
  16. static doublereal c_b24 = 1.;
  17. static doublereal c_b26 = 0.;
  18. /* Subroutine */ int _starpu_dlaeda_(integer *n, integer *tlvls, integer *curlvl,
  19. integer *curpbm, integer *prmptr, integer *perm, integer *givptr,
  20. integer *givcol, doublereal *givnum, doublereal *q, integer *qptr,
  21. doublereal *z__, doublereal *ztemp, integer *info)
  22. {
  23. /* System generated locals */
  24. integer i__1, i__2, i__3;
  25. /* Builtin functions */
  26. integer pow_ii(integer *, integer *);
  27. double sqrt(doublereal);
  28. /* Local variables */
  29. integer i__, k, mid, ptr;
  30. extern /* Subroutine */ int _starpu_drot_(integer *, doublereal *, integer *,
  31. doublereal *, integer *, doublereal *, doublereal *);
  32. integer curr, bsiz1, bsiz2, psiz1, psiz2, zptr1;
  33. extern /* Subroutine */ int _starpu_dgemv_(char *, integer *, integer *,
  34. doublereal *, doublereal *, integer *, doublereal *, integer *,
  35. doublereal *, doublereal *, integer *), _starpu_dcopy_(integer *,
  36. doublereal *, integer *, doublereal *, integer *), _starpu_xerbla_(char *,
  37. integer *);
  38. /* -- LAPACK routine (version 3.2) -- */
  39. /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
  40. /* November 2006 */
  41. /* .. Scalar Arguments .. */
  42. /* .. */
  43. /* .. Array Arguments .. */
  44. /* .. */
  45. /* Purpose */
  46. /* ======= */
  47. /* DLAEDA computes the Z vector corresponding to the merge step in the */
  48. /* CURLVLth step of the merge process with TLVLS steps for the CURPBMth */
  49. /* problem. */
  50. /* Arguments */
  51. /* ========= */
  52. /* N (input) INTEGER */
  53. /* The dimension of the symmetric tridiagonal matrix. N >= 0. */
  54. /* TLVLS (input) INTEGER */
  55. /* The total number of merging levels in the overall divide and */
  56. /* conquer tree. */
  57. /* CURLVL (input) INTEGER */
  58. /* The current level in the overall merge routine, */
  59. /* 0 <= curlvl <= tlvls. */
  60. /* CURPBM (input) INTEGER */
  61. /* The current problem in the current level in the overall */
  62. /* merge routine (counting from upper left to lower right). */
  63. /* PRMPTR (input) INTEGER array, dimension (N lg N) */
  64. /* Contains a list of pointers which indicate where in PERM a */
  65. /* level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) */
  66. /* indicates the size of the permutation and incidentally the */
  67. /* size of the full, non-deflated problem. */
  68. /* PERM (input) INTEGER array, dimension (N lg N) */
  69. /* Contains the permutations (from deflation and sorting) to be */
  70. /* applied to each eigenblock. */
  71. /* GIVPTR (input) INTEGER array, dimension (N lg N) */
  72. /* Contains a list of pointers which indicate where in GIVCOL a */
  73. /* level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) */
  74. /* indicates the number of Givens rotations. */
  75. /* GIVCOL (input) INTEGER array, dimension (2, N lg N) */
  76. /* Each pair of numbers indicates a pair of columns to take place */
  77. /* in a Givens rotation. */
  78. /* GIVNUM (input) DOUBLE PRECISION array, dimension (2, N lg N) */
  79. /* Each number indicates the S value to be used in the */
  80. /* corresponding Givens rotation. */
  81. /* Q (input) DOUBLE PRECISION array, dimension (N**2) */
  82. /* Contains the square eigenblocks from previous levels, the */
  83. /* starting positions for blocks are given by QPTR. */
  84. /* QPTR (input) INTEGER array, dimension (N+2) */
  85. /* Contains a list of pointers which indicate where in Q an */
  86. /* eigenblock is stored. SQRT( QPTR(i+1) - QPTR(i) ) indicates */
  87. /* the size of the block. */
  88. /* Z (output) DOUBLE PRECISION array, dimension (N) */
  89. /* On output this vector contains the updating vector (the last */
  90. /* row of the first sub-eigenvector matrix and the first row of */
  91. /* the second sub-eigenvector matrix). */
  92. /* ZTEMP (workspace) DOUBLE PRECISION array, dimension (N) */
  93. /* INFO (output) INTEGER */
  94. /* = 0: successful exit. */
  95. /* < 0: if INFO = -i, the i-th argument had an illegal value. */
  96. /* Further Details */
  97. /* =============== */
  98. /* Based on contributions by */
  99. /* Jeff Rutter, Computer Science Division, University of California */
  100. /* at Berkeley, USA */
  101. /* ===================================================================== */
  102. /* .. Parameters .. */
  103. /* .. */
  104. /* .. Local Scalars .. */
  105. /* .. */
  106. /* .. External Subroutines .. */
  107. /* .. */
  108. /* .. Intrinsic Functions .. */
  109. /* .. */
  110. /* .. Executable Statements .. */
  111. /* Test the input parameters. */
  112. /* Parameter adjustments */
  113. --ztemp;
  114. --z__;
  115. --qptr;
  116. --q;
  117. givnum -= 3;
  118. givcol -= 3;
  119. --givptr;
  120. --perm;
  121. --prmptr;
  122. /* Function Body */
  123. *info = 0;
  124. if (*n < 0) {
  125. *info = -1;
  126. }
  127. if (*info != 0) {
  128. i__1 = -(*info);
  129. _starpu_xerbla_("DLAEDA", &i__1);
  130. return 0;
  131. }
  132. /* Quick return if possible */
  133. if (*n == 0) {
  134. return 0;
  135. }
  136. /* Determine location of first number in second half. */
  137. mid = *n / 2 + 1;
  138. /* Gather last/first rows of appropriate eigenblocks into center of Z */
  139. ptr = 1;
  140. /* Determine location of lowest level subproblem in the full storage */
  141. /* scheme */
  142. i__1 = *curlvl - 1;
  143. curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1;
  144. /* Determine size of these matrices. We add HALF to the value of */
  145. /* the SQRT in case the machine underestimates one of these square */
  146. /* roots. */
  147. bsiz1 = (integer) (sqrt((doublereal) (qptr[curr + 1] - qptr[curr])) + .5);
  148. bsiz2 = (integer) (sqrt((doublereal) (qptr[curr + 2] - qptr[curr + 1])) +
  149. .5);
  150. i__1 = mid - bsiz1 - 1;
  151. for (k = 1; k <= i__1; ++k) {
  152. z__[k] = 0.;
  153. /* L10: */
  154. }
  155. _starpu_dcopy_(&bsiz1, &q[qptr[curr] + bsiz1 - 1], &bsiz1, &z__[mid - bsiz1], &
  156. c__1);
  157. _starpu_dcopy_(&bsiz2, &q[qptr[curr + 1]], &bsiz2, &z__[mid], &c__1);
  158. i__1 = *n;
  159. for (k = mid + bsiz2; k <= i__1; ++k) {
  160. z__[k] = 0.;
  161. /* L20: */
  162. }
  163. /* Loop thru remaining levels 1 -> CURLVL applying the Givens */
  164. /* rotations and permutation and then multiplying the center matrices */
  165. /* against the current Z. */
  166. ptr = pow_ii(&c__2, tlvls) + 1;
  167. i__1 = *curlvl - 1;
  168. for (k = 1; k <= i__1; ++k) {
  169. i__2 = *curlvl - k;
  170. i__3 = *curlvl - k - 1;
  171. curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) -
  172. 1;
  173. psiz1 = prmptr[curr + 1] - prmptr[curr];
  174. psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
  175. zptr1 = mid - psiz1;
  176. /* Apply Givens at CURR and CURR+1 */
  177. i__2 = givptr[curr + 1] - 1;
  178. for (i__ = givptr[curr]; i__ <= i__2; ++i__) {
  179. _starpu_drot_(&c__1, &z__[zptr1 + givcol[(i__ << 1) + 1] - 1], &c__1, &
  180. z__[zptr1 + givcol[(i__ << 1) + 2] - 1], &c__1, &givnum[(
  181. i__ << 1) + 1], &givnum[(i__ << 1) + 2]);
  182. /* L30: */
  183. }
  184. i__2 = givptr[curr + 2] - 1;
  185. for (i__ = givptr[curr + 1]; i__ <= i__2; ++i__) {
  186. _starpu_drot_(&c__1, &z__[mid - 1 + givcol[(i__ << 1) + 1]], &c__1, &z__[
  187. mid - 1 + givcol[(i__ << 1) + 2]], &c__1, &givnum[(i__ <<
  188. 1) + 1], &givnum[(i__ << 1) + 2]);
  189. /* L40: */
  190. }
  191. psiz1 = prmptr[curr + 1] - prmptr[curr];
  192. psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
  193. i__2 = psiz1 - 1;
  194. for (i__ = 0; i__ <= i__2; ++i__) {
  195. ztemp[i__ + 1] = z__[zptr1 + perm[prmptr[curr] + i__] - 1];
  196. /* L50: */
  197. }
  198. i__2 = psiz2 - 1;
  199. for (i__ = 0; i__ <= i__2; ++i__) {
  200. ztemp[psiz1 + i__ + 1] = z__[mid + perm[prmptr[curr + 1] + i__] -
  201. 1];
  202. /* L60: */
  203. }
  204. /* Multiply Blocks at CURR and CURR+1 */
  205. /* Determine size of these matrices. We add HALF to the value of */
  206. /* the SQRT in case the machine underestimates one of these */
  207. /* square roots. */
  208. bsiz1 = (integer) (sqrt((doublereal) (qptr[curr + 1] - qptr[curr])) +
  209. .5);
  210. bsiz2 = (integer) (sqrt((doublereal) (qptr[curr + 2] - qptr[curr + 1])
  211. ) + .5);
  212. if (bsiz1 > 0) {
  213. _starpu_dgemv_("T", &bsiz1, &bsiz1, &c_b24, &q[qptr[curr]], &bsiz1, &
  214. ztemp[1], &c__1, &c_b26, &z__[zptr1], &c__1);
  215. }
  216. i__2 = psiz1 - bsiz1;
  217. _starpu_dcopy_(&i__2, &ztemp[bsiz1 + 1], &c__1, &z__[zptr1 + bsiz1], &c__1);
  218. if (bsiz2 > 0) {
  219. _starpu_dgemv_("T", &bsiz2, &bsiz2, &c_b24, &q[qptr[curr + 1]], &bsiz2, &
  220. ztemp[psiz1 + 1], &c__1, &c_b26, &z__[mid], &c__1);
  221. }
  222. i__2 = psiz2 - bsiz2;
  223. _starpu_dcopy_(&i__2, &ztemp[psiz1 + bsiz2 + 1], &c__1, &z__[mid + bsiz2], &
  224. c__1);
  225. i__2 = *tlvls - k;
  226. ptr += pow_ii(&c__2, &i__2);
  227. /* L70: */
  228. }
  229. return 0;
  230. /* End of DLAEDA */
  231. } /* _starpu_dlaeda_ */