dlarzb.c 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289
  1. /* dlarzb.f -- translated by f2c (version 20061008).
  2. You must link the resulting object file with libf2c:
  3. on Microsoft Windows system, link with libf2c.lib;
  4. on Linux or Unix systems, link with .../path/to/libf2c.a -lm
  5. or, if you install libf2c.a in a standard place, with -lf2c -lm
  6. -- in that order, at the end of the command line, as in
  7. cc *.o -lf2c -lm
  8. Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
  9. http://www.netlib.org/f2c/libf2c.zip
  10. */
  11. #include "f2c.h"
  12. #include "blaswrap.h"
  13. /* Table of constant values */
  14. static integer c__1 = 1;
  15. static doublereal c_b13 = 1.;
  16. static doublereal c_b23 = -1.;
  17. /* Subroutine */ int _starpu_dlarzb_(char *side, char *trans, char *direct, char *
  18. storev, integer *m, integer *n, integer *k, integer *l, doublereal *v,
  19. integer *ldv, doublereal *t, integer *ldt, doublereal *c__, integer *
  20. ldc, doublereal *work, integer *ldwork)
  21. {
  22. /* System generated locals */
  23. integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1,
  24. work_offset, i__1, i__2;
  25. /* Local variables */
  26. integer i__, j, info;
  27. extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *,
  28. integer *, doublereal *, doublereal *, integer *, doublereal *,
  29. integer *, doublereal *, doublereal *, integer *);
  30. extern logical _starpu_lsame_(char *, char *);
  31. extern /* Subroutine */ int _starpu_dcopy_(integer *, doublereal *, integer *,
  32. doublereal *, integer *), _starpu_dtrmm_(char *, char *, char *, char *,
  33. integer *, integer *, doublereal *, doublereal *, integer *,
  34. doublereal *, integer *), _starpu_xerbla_(
  35. char *, integer *);
  36. char transt[1];
  37. /* -- LAPACK routine (version 3.2) -- */
  38. /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
  39. /* November 2006 */
  40. /* .. Scalar Arguments .. */
  41. /* .. */
  42. /* .. Array Arguments .. */
  43. /* .. */
  44. /* Purpose */
  45. /* ======= */
  46. /* DLARZB applies a real block reflector H or its transpose H**T to */
  47. /* a real distributed M-by-N C from the left or the right. */
  48. /* Currently, only STOREV = 'R' and DIRECT = 'B' are supported. */
  49. /* Arguments */
  50. /* ========= */
  51. /* SIDE (input) CHARACTER*1 */
  52. /* = 'L': apply H or H' from the Left */
  53. /* = 'R': apply H or H' from the Right */
  54. /* TRANS (input) CHARACTER*1 */
  55. /* = 'N': apply H (No transpose) */
  56. /* = 'C': apply H' (Transpose) */
  57. /* DIRECT (input) CHARACTER*1 */
  58. /* Indicates how H is formed from a product of elementary */
  59. /* reflectors */
  60. /* = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) */
  61. /* = 'B': H = H(k) . . . H(2) H(1) (Backward) */
  62. /* STOREV (input) CHARACTER*1 */
  63. /* Indicates how the vectors which define the elementary */
  64. /* reflectors are stored: */
  65. /* = 'C': Columnwise (not supported yet) */
  66. /* = 'R': Rowwise */
  67. /* M (input) INTEGER */
  68. /* The number of rows of the matrix C. */
  69. /* N (input) INTEGER */
  70. /* The number of columns of the matrix C. */
  71. /* K (input) INTEGER */
  72. /* The order of the matrix T (= the number of elementary */
  73. /* reflectors whose product defines the block reflector). */
  74. /* L (input) INTEGER */
  75. /* The number of columns of the matrix V containing the */
  76. /* meaningful part of the Householder reflectors. */
  77. /* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. */
  78. /* V (input) DOUBLE PRECISION array, dimension (LDV,NV). */
  79. /* If STOREV = 'C', NV = K; if STOREV = 'R', NV = L. */
  80. /* LDV (input) INTEGER */
  81. /* The leading dimension of the array V. */
  82. /* If STOREV = 'C', LDV >= L; if STOREV = 'R', LDV >= K. */
  83. /* T (input) DOUBLE PRECISION array, dimension (LDT,K) */
  84. /* The triangular K-by-K matrix T in the representation of the */
  85. /* block reflector. */
  86. /* LDT (input) INTEGER */
  87. /* The leading dimension of the array T. LDT >= K. */
  88. /* C (input/output) DOUBLE PRECISION array, dimension (LDC,N) */
  89. /* On entry, the M-by-N matrix C. */
  90. /* On exit, C is overwritten by H*C or H'*C or C*H or C*H'. */
  91. /* LDC (input) INTEGER */
  92. /* The leading dimension of the array C. LDC >= max(1,M). */
  93. /* WORK (workspace) DOUBLE PRECISION array, dimension (LDWORK,K) */
  94. /* LDWORK (input) INTEGER */
  95. /* The leading dimension of the array WORK. */
  96. /* If SIDE = 'L', LDWORK >= max(1,N); */
  97. /* if SIDE = 'R', LDWORK >= max(1,M). */
  98. /* Further Details */
  99. /* =============== */
  100. /* Based on contributions by */
  101. /* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA */
  102. /* ===================================================================== */
  103. /* .. Parameters .. */
  104. /* .. */
  105. /* .. Local Scalars .. */
  106. /* .. */
  107. /* .. External Functions .. */
  108. /* .. */
  109. /* .. External Subroutines .. */
  110. /* .. */
  111. /* .. Executable Statements .. */
  112. /* Quick return if possible */
  113. /* Parameter adjustments */
  114. v_dim1 = *ldv;
  115. v_offset = 1 + v_dim1;
  116. v -= v_offset;
  117. t_dim1 = *ldt;
  118. t_offset = 1 + t_dim1;
  119. t -= t_offset;
  120. c_dim1 = *ldc;
  121. c_offset = 1 + c_dim1;
  122. c__ -= c_offset;
  123. work_dim1 = *ldwork;
  124. work_offset = 1 + work_dim1;
  125. work -= work_offset;
  126. /* Function Body */
  127. if (*m <= 0 || *n <= 0) {
  128. return 0;
  129. }
  130. /* Check for currently supported options */
  131. info = 0;
  132. if (! _starpu_lsame_(direct, "B")) {
  133. info = -3;
  134. } else if (! _starpu_lsame_(storev, "R")) {
  135. info = -4;
  136. }
  137. if (info != 0) {
  138. i__1 = -info;
  139. _starpu_xerbla_("DLARZB", &i__1);
  140. return 0;
  141. }
  142. if (_starpu_lsame_(trans, "N")) {
  143. *(unsigned char *)transt = 'T';
  144. } else {
  145. *(unsigned char *)transt = 'N';
  146. }
  147. if (_starpu_lsame_(side, "L")) {
  148. /* Form H * C or H' * C */
  149. /* W( 1:n, 1:k ) = C( 1:k, 1:n )' */
  150. i__1 = *k;
  151. for (j = 1; j <= i__1; ++j) {
  152. _starpu_dcopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1], &c__1);
  153. /* L10: */
  154. }
  155. /* W( 1:n, 1:k ) = W( 1:n, 1:k ) + ... */
  156. /* C( m-l+1:m, 1:n )' * V( 1:k, 1:l )' */
  157. if (*l > 0) {
  158. _starpu_dgemm_("Transpose", "Transpose", n, k, l, &c_b13, &c__[*m - *l +
  159. 1 + c_dim1], ldc, &v[v_offset], ldv, &c_b13, &work[
  160. work_offset], ldwork);
  161. }
  162. /* W( 1:n, 1:k ) = W( 1:n, 1:k ) * T' or W( 1:m, 1:k ) * T */
  163. _starpu_dtrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b13, &t[
  164. t_offset], ldt, &work[work_offset], ldwork);
  165. /* C( 1:k, 1:n ) = C( 1:k, 1:n ) - W( 1:n, 1:k )' */
  166. i__1 = *n;
  167. for (j = 1; j <= i__1; ++j) {
  168. i__2 = *k;
  169. for (i__ = 1; i__ <= i__2; ++i__) {
  170. c__[i__ + j * c_dim1] -= work[j + i__ * work_dim1];
  171. /* L20: */
  172. }
  173. /* L30: */
  174. }
  175. /* C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ... */
  176. /* V( 1:k, 1:l )' * W( 1:n, 1:k )' */
  177. if (*l > 0) {
  178. _starpu_dgemm_("Transpose", "Transpose", l, n, k, &c_b23, &v[v_offset],
  179. ldv, &work[work_offset], ldwork, &c_b13, &c__[*m - *l + 1
  180. + c_dim1], ldc);
  181. }
  182. } else if (_starpu_lsame_(side, "R")) {
  183. /* Form C * H or C * H' */
  184. /* W( 1:m, 1:k ) = C( 1:m, 1:k ) */
  185. i__1 = *k;
  186. for (j = 1; j <= i__1; ++j) {
  187. _starpu_dcopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j * work_dim1 + 1], &
  188. c__1);
  189. /* L40: */
  190. }
  191. /* W( 1:m, 1:k ) = W( 1:m, 1:k ) + ... */
  192. /* C( 1:m, n-l+1:n ) * V( 1:k, 1:l )' */
  193. if (*l > 0) {
  194. _starpu_dgemm_("No transpose", "Transpose", m, k, l, &c_b13, &c__[(*n - *
  195. l + 1) * c_dim1 + 1], ldc, &v[v_offset], ldv, &c_b13, &
  196. work[work_offset], ldwork);
  197. }
  198. /* W( 1:m, 1:k ) = W( 1:m, 1:k ) * T or W( 1:m, 1:k ) * T' */
  199. _starpu_dtrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b13, &t[t_offset]
  200. , ldt, &work[work_offset], ldwork);
  201. /* C( 1:m, 1:k ) = C( 1:m, 1:k ) - W( 1:m, 1:k ) */
  202. i__1 = *k;
  203. for (j = 1; j <= i__1; ++j) {
  204. i__2 = *m;
  205. for (i__ = 1; i__ <= i__2; ++i__) {
  206. c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1];
  207. /* L50: */
  208. }
  209. /* L60: */
  210. }
  211. /* C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ... */
  212. /* W( 1:m, 1:k ) * V( 1:k, 1:l ) */
  213. if (*l > 0) {
  214. _starpu_dgemm_("No transpose", "No transpose", m, l, k, &c_b23, &work[
  215. work_offset], ldwork, &v[v_offset], ldv, &c_b13, &c__[(*n
  216. - *l + 1) * c_dim1 + 1], ldc);
  217. }
  218. }
  219. return 0;
  220. /* End of DLARZB */
  221. } /* _starpu_dlarzb_ */