dtfsm.c 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977
  1. /* dtfsm.f -- translated by f2c (version 20061008).
  2. You must link the resulting object file with libf2c:
  3. on Microsoft Windows system, link with libf2c.lib;
  4. on Linux or Unix systems, link with .../path/to/libf2c.a -lm
  5. or, if you install libf2c.a in a standard place, with -lf2c -lm
  6. -- in that order, at the end of the command line, as in
  7. cc *.o -lf2c -lm
  8. Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
  9. http://www.netlib.org/f2c/libf2c.zip
  10. */
  11. #include "f2c.h"
  12. #include "blaswrap.h"
  13. /* Table of constant values */
  14. static doublereal c_b23 = -1.;
  15. static doublereal c_b27 = 1.;
  16. /* Subroutine */ int _starpu_dtfsm_(char *transr, char *side, char *uplo, char *trans,
  17. char *diag, integer *m, integer *n, doublereal *alpha, doublereal *a,
  18. doublereal *b, integer *ldb)
  19. {
  20. /* System generated locals */
  21. integer b_dim1, b_offset, i__1, i__2;
  22. /* Local variables */
  23. integer i__, j, k, m1, m2, n1, n2, info;
  24. logical normaltransr;
  25. extern /* Subroutine */ int _starpu_dgemm_(char *, char *, integer *, integer *,
  26. integer *, doublereal *, doublereal *, integer *, doublereal *,
  27. integer *, doublereal *, doublereal *, integer *);
  28. logical lside;
  29. extern logical _starpu_lsame_(char *, char *);
  30. logical lower;
  31. extern /* Subroutine */ int _starpu_dtrsm_(char *, char *, char *, char *,
  32. integer *, integer *, doublereal *, doublereal *, integer *,
  33. doublereal *, integer *), _starpu_xerbla_(
  34. char *, integer *);
  35. logical misodd, nisodd, notrans;
  36. /* -- LAPACK routine (version 3.2.1) -- */
  37. /* -- Contributed by Fred Gustavson of the IBM Watson Research Center -- */
  38. /* -- April 2009 -- */
  39. /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
  40. /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
  41. /* .. */
  42. /* .. Scalar Arguments .. */
  43. /* .. */
  44. /* .. Array Arguments .. */
  45. /* .. */
  46. /* Purpose */
  47. /* ======= */
  48. /* Level 3 BLAS like routine for A in RFP Format. */
  49. /* DTFSM solves the matrix equation */
  50. /* op( A )*X = alpha*B or X*op( A ) = alpha*B */
  51. /* where alpha is a scalar, X and B are m by n matrices, A is a unit, or */
  52. /* non-unit, upper or lower triangular matrix and op( A ) is one of */
  53. /* op( A ) = A or op( A ) = A'. */
  54. /* A is in Rectangular Full Packed (RFP) Format. */
  55. /* The matrix X is overwritten on B. */
  56. /* Arguments */
  57. /* ========== */
  58. /* TRANSR - (input) CHARACTER */
  59. /* = 'N': The Normal Form of RFP A is stored; */
  60. /* = 'T': The Transpose Form of RFP A is stored. */
  61. /* SIDE - (input) CHARACTER */
  62. /* On entry, SIDE specifies whether op( A ) appears on the left */
  63. /* or right of X as follows: */
  64. /* SIDE = 'L' or 'l' op( A )*X = alpha*B. */
  65. /* SIDE = 'R' or 'r' X*op( A ) = alpha*B. */
  66. /* Unchanged on exit. */
  67. /* UPLO - (input) CHARACTER */
  68. /* On entry, UPLO specifies whether the RFP matrix A came from */
  69. /* an upper or lower triangular matrix as follows: */
  70. /* UPLO = 'U' or 'u' RFP A came from an upper triangular matrix */
  71. /* UPLO = 'L' or 'l' RFP A came from a lower triangular matrix */
  72. /* Unchanged on exit. */
  73. /* TRANS - (input) CHARACTER */
  74. /* On entry, TRANS specifies the form of op( A ) to be used */
  75. /* in the matrix multiplication as follows: */
  76. /* TRANS = 'N' or 'n' op( A ) = A. */
  77. /* TRANS = 'T' or 't' op( A ) = A'. */
  78. /* Unchanged on exit. */
  79. /* DIAG - (input) CHARACTER */
  80. /* On entry, DIAG specifies whether or not RFP A is unit */
  81. /* triangular as follows: */
  82. /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */
  83. /* DIAG = 'N' or 'n' A is not assumed to be unit */
  84. /* triangular. */
  85. /* Unchanged on exit. */
  86. /* M - (input) INTEGER. */
  87. /* On entry, M specifies the number of rows of B. M must be at */
  88. /* least zero. */
  89. /* Unchanged on exit. */
  90. /* N - (input) INTEGER. */
  91. /* On entry, N specifies the number of columns of B. N must be */
  92. /* at least zero. */
  93. /* Unchanged on exit. */
  94. /* ALPHA - (input) DOUBLE PRECISION. */
  95. /* On entry, ALPHA specifies the scalar alpha. When alpha is */
  96. /* zero then A is not referenced and B need not be set before */
  97. /* entry. */
  98. /* Unchanged on exit. */
  99. /* A - (input) DOUBLE PRECISION array, dimension (NT); */
  100. /* NT = N*(N+1)/2. On entry, the matrix A in RFP Format. */
  101. /* RFP Format is described by TRANSR, UPLO and N as follows: */
  102. /* If TRANSR='N' then RFP A is (0:N,0:K-1) when N is even; */
  103. /* K=N/2. RFP A is (0:N-1,0:K) when N is odd; K=N/2. If */
  104. /* TRANSR = 'T' then RFP is the transpose of RFP A as */
  105. /* defined when TRANSR = 'N'. The contents of RFP A are defined */
  106. /* by UPLO as follows: If UPLO = 'U' the RFP A contains the NT */
  107. /* elements of upper packed A either in normal or */
  108. /* transpose Format. If UPLO = 'L' the RFP A contains */
  109. /* the NT elements of lower packed A either in normal or */
  110. /* transpose Format. The LDA of RFP A is (N+1)/2 when */
  111. /* TRANSR = 'T'. When TRANSR is 'N' the LDA is N+1 when N is */
  112. /* even and is N when is odd. */
  113. /* See the Note below for more details. Unchanged on exit. */
  114. /* B - (input/ouptut) DOUBLE PRECISION array, DIMENSION (LDB,N) */
  115. /* Before entry, the leading m by n part of the array B must */
  116. /* contain the right-hand side matrix B, and on exit is */
  117. /* overwritten by the solution matrix X. */
  118. /* LDB - (input) INTEGER. */
  119. /* On entry, LDB specifies the first dimension of B as declared */
  120. /* in the calling (sub) program. LDB must be at least */
  121. /* max( 1, m ). */
  122. /* Unchanged on exit. */
  123. /* Further Details */
  124. /* =============== */
  125. /* We first consider Rectangular Full Packed (RFP) Format when N is */
  126. /* even. We give an example where N = 6. */
  127. /* AP is Upper AP is Lower */
  128. /* 00 01 02 03 04 05 00 */
  129. /* 11 12 13 14 15 10 11 */
  130. /* 22 23 24 25 20 21 22 */
  131. /* 33 34 35 30 31 32 33 */
  132. /* 44 45 40 41 42 43 44 */
  133. /* 55 50 51 52 53 54 55 */
  134. /* Let TRANSR = 'N'. RFP holds AP as follows: */
  135. /* For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last */
  136. /* three columns of AP upper. The lower triangle A(4:6,0:2) consists of */
  137. /* the transpose of the first three columns of AP upper. */
  138. /* For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first */
  139. /* three columns of AP lower. The upper triangle A(0:2,0:2) consists of */
  140. /* the transpose of the last three columns of AP lower. */
  141. /* This covers the case N even and TRANSR = 'N'. */
  142. /* RFP A RFP A */
  143. /* 03 04 05 33 43 53 */
  144. /* 13 14 15 00 44 54 */
  145. /* 23 24 25 10 11 55 */
  146. /* 33 34 35 20 21 22 */
  147. /* 00 44 45 30 31 32 */
  148. /* 01 11 55 40 41 42 */
  149. /* 02 12 22 50 51 52 */
  150. /* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */
  151. /* transpose of RFP A above. One therefore gets: */
  152. /* RFP A RFP A */
  153. /* 03 13 23 33 00 01 02 33 00 10 20 30 40 50 */
  154. /* 04 14 24 34 44 11 12 43 44 11 21 31 41 51 */
  155. /* 05 15 25 35 45 55 22 53 54 55 22 32 42 52 */
  156. /* We first consider Rectangular Full Packed (RFP) Format when N is */
  157. /* odd. We give an example where N = 5. */
  158. /* AP is Upper AP is Lower */
  159. /* 00 01 02 03 04 00 */
  160. /* 11 12 13 14 10 11 */
  161. /* 22 23 24 20 21 22 */
  162. /* 33 34 30 31 32 33 */
  163. /* 44 40 41 42 43 44 */
  164. /* Let TRANSR = 'N'. RFP holds AP as follows: */
  165. /* For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last */
  166. /* three columns of AP upper. The lower triangle A(3:4,0:1) consists of */
  167. /* the transpose of the first two columns of AP upper. */
  168. /* For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first */
  169. /* three columns of AP lower. The upper triangle A(0:1,1:2) consists of */
  170. /* the transpose of the last two columns of AP lower. */
  171. /* This covers the case N odd and TRANSR = 'N'. */
  172. /* RFP A RFP A */
  173. /* 02 03 04 00 33 43 */
  174. /* 12 13 14 10 11 44 */
  175. /* 22 23 24 20 21 22 */
  176. /* 00 33 34 30 31 32 */
  177. /* 01 11 44 40 41 42 */
  178. /* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */
  179. /* transpose of RFP A above. One therefore gets: */
  180. /* RFP A RFP A */
  181. /* 02 12 22 00 01 00 10 20 30 40 50 */
  182. /* 03 13 23 33 11 33 11 21 31 41 51 */
  183. /* 04 14 24 34 44 43 44 22 32 42 52 */
  184. /* Reference */
  185. /* ========= */
  186. /* ===================================================================== */
  187. /* .. */
  188. /* .. Parameters .. */
  189. /* .. */
  190. /* .. Local Scalars .. */
  191. /* .. */
  192. /* .. External Functions .. */
  193. /* .. */
  194. /* .. External Subroutines .. */
  195. /* .. */
  196. /* .. Intrinsic Functions .. */
  197. /* .. */
  198. /* .. Executable Statements .. */
  199. /* Test the input parameters. */
  200. /* Parameter adjustments */
  201. b_dim1 = *ldb - 1 - 0 + 1;
  202. b_offset = 0 + b_dim1 * 0;
  203. b -= b_offset;
  204. /* Function Body */
  205. info = 0;
  206. normaltransr = _starpu_lsame_(transr, "N");
  207. lside = _starpu_lsame_(side, "L");
  208. lower = _starpu_lsame_(uplo, "L");
  209. notrans = _starpu_lsame_(trans, "N");
  210. if (! normaltransr && ! _starpu_lsame_(transr, "T")) {
  211. info = -1;
  212. } else if (! lside && ! _starpu_lsame_(side, "R")) {
  213. info = -2;
  214. } else if (! lower && ! _starpu_lsame_(uplo, "U")) {
  215. info = -3;
  216. } else if (! notrans && ! _starpu_lsame_(trans, "T")) {
  217. info = -4;
  218. } else if (! _starpu_lsame_(diag, "N") && ! _starpu_lsame_(diag,
  219. "U")) {
  220. info = -5;
  221. } else if (*m < 0) {
  222. info = -6;
  223. } else if (*n < 0) {
  224. info = -7;
  225. } else if (*ldb < max(1,*m)) {
  226. info = -11;
  227. }
  228. if (info != 0) {
  229. i__1 = -info;
  230. _starpu_xerbla_("DTFSM ", &i__1);
  231. return 0;
  232. }
  233. /* Quick return when ( (N.EQ.0).OR.(M.EQ.0) ) */
  234. if (*m == 0 || *n == 0) {
  235. return 0;
  236. }
  237. /* Quick return when ALPHA.EQ.(0D+0) */
  238. if (*alpha == 0.) {
  239. i__1 = *n - 1;
  240. for (j = 0; j <= i__1; ++j) {
  241. i__2 = *m - 1;
  242. for (i__ = 0; i__ <= i__2; ++i__) {
  243. b[i__ + j * b_dim1] = 0.;
  244. /* L10: */
  245. }
  246. /* L20: */
  247. }
  248. return 0;
  249. }
  250. if (lside) {
  251. /* SIDE = 'L' */
  252. /* A is M-by-M. */
  253. /* If M is odd, set NISODD = .TRUE., and M1 and M2. */
  254. /* If M is even, NISODD = .FALSE., and M. */
  255. if (*m % 2 == 0) {
  256. misodd = FALSE_;
  257. k = *m / 2;
  258. } else {
  259. misodd = TRUE_;
  260. if (lower) {
  261. m2 = *m / 2;
  262. m1 = *m - m2;
  263. } else {
  264. m1 = *m / 2;
  265. m2 = *m - m1;
  266. }
  267. }
  268. if (misodd) {
  269. /* SIDE = 'L' and N is odd */
  270. if (normaltransr) {
  271. /* SIDE = 'L', N is odd, and TRANSR = 'N' */
  272. if (lower) {
  273. /* SIDE ='L', N is odd, TRANSR = 'N', and UPLO = 'L' */
  274. if (notrans) {
  275. /* SIDE ='L', N is odd, TRANSR = 'N', UPLO = 'L', and */
  276. /* TRANS = 'N' */
  277. if (*m == 1) {
  278. _starpu_dtrsm_("L", "L", "N", diag, &m1, n, alpha, a, m, &
  279. b[b_offset], ldb);
  280. } else {
  281. _starpu_dtrsm_("L", "L", "N", diag, &m1, n, alpha, a, m, &
  282. b[b_offset], ldb);
  283. _starpu_dgemm_("N", "N", &m2, n, &m1, &c_b23, &a[m1], m, &
  284. b[b_offset], ldb, alpha, &b[m1], ldb);
  285. _starpu_dtrsm_("L", "U", "T", diag, &m2, n, &c_b27, &a[*m]
  286. , m, &b[m1], ldb);
  287. }
  288. } else {
  289. /* SIDE ='L', N is odd, TRANSR = 'N', UPLO = 'L', and */
  290. /* TRANS = 'T' */
  291. if (*m == 1) {
  292. _starpu_dtrsm_("L", "L", "T", diag, &m1, n, alpha, a, m, &
  293. b[b_offset], ldb);
  294. } else {
  295. _starpu_dtrsm_("L", "U", "N", diag, &m2, n, alpha, &a[*m],
  296. m, &b[m1], ldb);
  297. _starpu_dgemm_("T", "N", &m1, n, &m2, &c_b23, &a[m1], m, &
  298. b[m1], ldb, alpha, &b[b_offset], ldb);
  299. _starpu_dtrsm_("L", "L", "T", diag, &m1, n, &c_b27, a, m,
  300. &b[b_offset], ldb);
  301. }
  302. }
  303. } else {
  304. /* SIDE ='L', N is odd, TRANSR = 'N', and UPLO = 'U' */
  305. if (! notrans) {
  306. /* SIDE ='L', N is odd, TRANSR = 'N', UPLO = 'U', and */
  307. /* TRANS = 'N' */
  308. _starpu_dtrsm_("L", "L", "N", diag, &m1, n, alpha, &a[m2], m,
  309. &b[b_offset], ldb);
  310. _starpu_dgemm_("T", "N", &m2, n, &m1, &c_b23, a, m, &b[
  311. b_offset], ldb, alpha, &b[m1], ldb);
  312. _starpu_dtrsm_("L", "U", "T", diag, &m2, n, &c_b27, &a[m1], m,
  313. &b[m1], ldb);
  314. } else {
  315. /* SIDE ='L', N is odd, TRANSR = 'N', UPLO = 'U', and */
  316. /* TRANS = 'T' */
  317. _starpu_dtrsm_("L", "U", "N", diag, &m2, n, alpha, &a[m1], m,
  318. &b[m1], ldb);
  319. _starpu_dgemm_("N", "N", &m1, n, &m2, &c_b23, a, m, &b[m1],
  320. ldb, alpha, &b[b_offset], ldb);
  321. _starpu_dtrsm_("L", "L", "T", diag, &m1, n, &c_b27, &a[m2], m,
  322. &b[b_offset], ldb);
  323. }
  324. }
  325. } else {
  326. /* SIDE = 'L', N is odd, and TRANSR = 'T' */
  327. if (lower) {
  328. /* SIDE ='L', N is odd, TRANSR = 'T', and UPLO = 'L' */
  329. if (notrans) {
  330. /* SIDE ='L', N is odd, TRANSR = 'T', UPLO = 'L', and */
  331. /* TRANS = 'N' */
  332. if (*m == 1) {
  333. _starpu_dtrsm_("L", "U", "T", diag, &m1, n, alpha, a, &m1,
  334. &b[b_offset], ldb);
  335. } else {
  336. _starpu_dtrsm_("L", "U", "T", diag, &m1, n, alpha, a, &m1,
  337. &b[b_offset], ldb);
  338. _starpu_dgemm_("T", "N", &m2, n, &m1, &c_b23, &a[m1 * m1],
  339. &m1, &b[b_offset], ldb, alpha, &b[m1],
  340. ldb);
  341. _starpu_dtrsm_("L", "L", "N", diag, &m2, n, &c_b27, &a[1],
  342. &m1, &b[m1], ldb);
  343. }
  344. } else {
  345. /* SIDE ='L', N is odd, TRANSR = 'T', UPLO = 'L', and */
  346. /* TRANS = 'T' */
  347. if (*m == 1) {
  348. _starpu_dtrsm_("L", "U", "N", diag, &m1, n, alpha, a, &m1,
  349. &b[b_offset], ldb);
  350. } else {
  351. _starpu_dtrsm_("L", "L", "T", diag, &m2, n, alpha, &a[1],
  352. &m1, &b[m1], ldb);
  353. _starpu_dgemm_("N", "N", &m1, n, &m2, &c_b23, &a[m1 * m1],
  354. &m1, &b[m1], ldb, alpha, &b[b_offset],
  355. ldb);
  356. _starpu_dtrsm_("L", "U", "N", diag, &m1, n, &c_b27, a, &
  357. m1, &b[b_offset], ldb);
  358. }
  359. }
  360. } else {
  361. /* SIDE ='L', N is odd, TRANSR = 'T', and UPLO = 'U' */
  362. if (! notrans) {
  363. /* SIDE ='L', N is odd, TRANSR = 'T', UPLO = 'U', and */
  364. /* TRANS = 'N' */
  365. _starpu_dtrsm_("L", "U", "T", diag, &m1, n, alpha, &a[m2 * m2]
  366. , &m2, &b[b_offset], ldb);
  367. _starpu_dgemm_("N", "N", &m2, n, &m1, &c_b23, a, &m2, &b[
  368. b_offset], ldb, alpha, &b[m1], ldb);
  369. _starpu_dtrsm_("L", "L", "N", diag, &m2, n, &c_b27, &a[m1 *
  370. m2], &m2, &b[m1], ldb);
  371. } else {
  372. /* SIDE ='L', N is odd, TRANSR = 'T', UPLO = 'U', and */
  373. /* TRANS = 'T' */
  374. _starpu_dtrsm_("L", "L", "T", diag, &m2, n, alpha, &a[m1 * m2]
  375. , &m2, &b[m1], ldb);
  376. _starpu_dgemm_("T", "N", &m1, n, &m2, &c_b23, a, &m2, &b[m1],
  377. ldb, alpha, &b[b_offset], ldb);
  378. _starpu_dtrsm_("L", "U", "N", diag, &m1, n, &c_b27, &a[m2 *
  379. m2], &m2, &b[b_offset], ldb);
  380. }
  381. }
  382. }
  383. } else {
  384. /* SIDE = 'L' and N is even */
  385. if (normaltransr) {
  386. /* SIDE = 'L', N is even, and TRANSR = 'N' */
  387. if (lower) {
  388. /* SIDE ='L', N is even, TRANSR = 'N', and UPLO = 'L' */
  389. if (notrans) {
  390. /* SIDE ='L', N is even, TRANSR = 'N', UPLO = 'L', */
  391. /* and TRANS = 'N' */
  392. i__1 = *m + 1;
  393. _starpu_dtrsm_("L", "L", "N", diag, &k, n, alpha, &a[1], &
  394. i__1, &b[b_offset], ldb);
  395. i__1 = *m + 1;
  396. _starpu_dgemm_("N", "N", &k, n, &k, &c_b23, &a[k + 1], &i__1,
  397. &b[b_offset], ldb, alpha, &b[k], ldb);
  398. i__1 = *m + 1;
  399. _starpu_dtrsm_("L", "U", "T", diag, &k, n, &c_b27, a, &i__1, &
  400. b[k], ldb);
  401. } else {
  402. /* SIDE ='L', N is even, TRANSR = 'N', UPLO = 'L', */
  403. /* and TRANS = 'T' */
  404. i__1 = *m + 1;
  405. _starpu_dtrsm_("L", "U", "N", diag, &k, n, alpha, a, &i__1, &
  406. b[k], ldb);
  407. i__1 = *m + 1;
  408. _starpu_dgemm_("T", "N", &k, n, &k, &c_b23, &a[k + 1], &i__1,
  409. &b[k], ldb, alpha, &b[b_offset], ldb);
  410. i__1 = *m + 1;
  411. _starpu_dtrsm_("L", "L", "T", diag, &k, n, &c_b27, &a[1], &
  412. i__1, &b[b_offset], ldb);
  413. }
  414. } else {
  415. /* SIDE ='L', N is even, TRANSR = 'N', and UPLO = 'U' */
  416. if (! notrans) {
  417. /* SIDE ='L', N is even, TRANSR = 'N', UPLO = 'U', */
  418. /* and TRANS = 'N' */
  419. i__1 = *m + 1;
  420. _starpu_dtrsm_("L", "L", "N", diag, &k, n, alpha, &a[k + 1], &
  421. i__1, &b[b_offset], ldb);
  422. i__1 = *m + 1;
  423. _starpu_dgemm_("T", "N", &k, n, &k, &c_b23, a, &i__1, &b[
  424. b_offset], ldb, alpha, &b[k], ldb);
  425. i__1 = *m + 1;
  426. _starpu_dtrsm_("L", "U", "T", diag, &k, n, &c_b27, &a[k], &
  427. i__1, &b[k], ldb);
  428. } else {
  429. /* SIDE ='L', N is even, TRANSR = 'N', UPLO = 'U', */
  430. /* and TRANS = 'T' */
  431. i__1 = *m + 1;
  432. _starpu_dtrsm_("L", "U", "N", diag, &k, n, alpha, &a[k], &
  433. i__1, &b[k], ldb);
  434. i__1 = *m + 1;
  435. _starpu_dgemm_("N", "N", &k, n, &k, &c_b23, a, &i__1, &b[k],
  436. ldb, alpha, &b[b_offset], ldb);
  437. i__1 = *m + 1;
  438. _starpu_dtrsm_("L", "L", "T", diag, &k, n, &c_b27, &a[k + 1],
  439. &i__1, &b[b_offset], ldb);
  440. }
  441. }
  442. } else {
  443. /* SIDE = 'L', N is even, and TRANSR = 'T' */
  444. if (lower) {
  445. /* SIDE ='L', N is even, TRANSR = 'T', and UPLO = 'L' */
  446. if (notrans) {
  447. /* SIDE ='L', N is even, TRANSR = 'T', UPLO = 'L', */
  448. /* and TRANS = 'N' */
  449. _starpu_dtrsm_("L", "U", "T", diag, &k, n, alpha, &a[k], &k, &
  450. b[b_offset], ldb);
  451. _starpu_dgemm_("T", "N", &k, n, &k, &c_b23, &a[k * (k + 1)], &
  452. k, &b[b_offset], ldb, alpha, &b[k], ldb);
  453. _starpu_dtrsm_("L", "L", "N", diag, &k, n, &c_b27, a, &k, &b[
  454. k], ldb);
  455. } else {
  456. /* SIDE ='L', N is even, TRANSR = 'T', UPLO = 'L', */
  457. /* and TRANS = 'T' */
  458. _starpu_dtrsm_("L", "L", "T", diag, &k, n, alpha, a, &k, &b[k]
  459. , ldb);
  460. _starpu_dgemm_("N", "N", &k, n, &k, &c_b23, &a[k * (k + 1)], &
  461. k, &b[k], ldb, alpha, &b[b_offset], ldb);
  462. _starpu_dtrsm_("L", "U", "N", diag, &k, n, &c_b27, &a[k], &k,
  463. &b[b_offset], ldb);
  464. }
  465. } else {
  466. /* SIDE ='L', N is even, TRANSR = 'T', and UPLO = 'U' */
  467. if (! notrans) {
  468. /* SIDE ='L', N is even, TRANSR = 'T', UPLO = 'U', */
  469. /* and TRANS = 'N' */
  470. _starpu_dtrsm_("L", "U", "T", diag, &k, n, alpha, &a[k * (k +
  471. 1)], &k, &b[b_offset], ldb);
  472. _starpu_dgemm_("N", "N", &k, n, &k, &c_b23, a, &k, &b[
  473. b_offset], ldb, alpha, &b[k], ldb);
  474. _starpu_dtrsm_("L", "L", "N", diag, &k, n, &c_b27, &a[k * k],
  475. &k, &b[k], ldb);
  476. } else {
  477. /* SIDE ='L', N is even, TRANSR = 'T', UPLO = 'U', */
  478. /* and TRANS = 'T' */
  479. _starpu_dtrsm_("L", "L", "T", diag, &k, n, alpha, &a[k * k], &
  480. k, &b[k], ldb);
  481. _starpu_dgemm_("T", "N", &k, n, &k, &c_b23, a, &k, &b[k], ldb,
  482. alpha, &b[b_offset], ldb);
  483. _starpu_dtrsm_("L", "U", "N", diag, &k, n, &c_b27, &a[k * (k
  484. + 1)], &k, &b[b_offset], ldb);
  485. }
  486. }
  487. }
  488. }
  489. } else {
  490. /* SIDE = 'R' */
  491. /* A is N-by-N. */
  492. /* If N is odd, set NISODD = .TRUE., and N1 and N2. */
  493. /* If N is even, NISODD = .FALSE., and K. */
  494. if (*n % 2 == 0) {
  495. nisodd = FALSE_;
  496. k = *n / 2;
  497. } else {
  498. nisodd = TRUE_;
  499. if (lower) {
  500. n2 = *n / 2;
  501. n1 = *n - n2;
  502. } else {
  503. n1 = *n / 2;
  504. n2 = *n - n1;
  505. }
  506. }
  507. if (nisodd) {
  508. /* SIDE = 'R' and N is odd */
  509. if (normaltransr) {
  510. /* SIDE = 'R', N is odd, and TRANSR = 'N' */
  511. if (lower) {
  512. /* SIDE ='R', N is odd, TRANSR = 'N', and UPLO = 'L' */
  513. if (notrans) {
  514. /* SIDE ='R', N is odd, TRANSR = 'N', UPLO = 'L', and */
  515. /* TRANS = 'N' */
  516. _starpu_dtrsm_("R", "U", "T", diag, m, &n2, alpha, &a[*n], n,
  517. &b[n1 * b_dim1], ldb);
  518. _starpu_dgemm_("N", "N", m, &n1, &n2, &c_b23, &b[n1 * b_dim1],
  519. ldb, &a[n1], n, alpha, b, ldb);
  520. _starpu_dtrsm_("R", "L", "N", diag, m, &n1, &c_b27, a, n, b,
  521. ldb);
  522. } else {
  523. /* SIDE ='R', N is odd, TRANSR = 'N', UPLO = 'L', and */
  524. /* TRANS = 'T' */
  525. _starpu_dtrsm_("R", "L", "T", diag, m, &n1, alpha, a, n, b,
  526. ldb);
  527. _starpu_dgemm_("N", "T", m, &n2, &n1, &c_b23, b, ldb, &a[n1],
  528. n, alpha, &b[n1 * b_dim1], ldb);
  529. _starpu_dtrsm_("R", "U", "N", diag, m, &n2, &c_b27, &a[*n], n,
  530. &b[n1 * b_dim1], ldb);
  531. }
  532. } else {
  533. /* SIDE ='R', N is odd, TRANSR = 'N', and UPLO = 'U' */
  534. if (notrans) {
  535. /* SIDE ='R', N is odd, TRANSR = 'N', UPLO = 'U', and */
  536. /* TRANS = 'N' */
  537. _starpu_dtrsm_("R", "L", "T", diag, m, &n1, alpha, &a[n2], n,
  538. b, ldb);
  539. _starpu_dgemm_("N", "N", m, &n2, &n1, &c_b23, b, ldb, a, n,
  540. alpha, &b[n1 * b_dim1], ldb);
  541. _starpu_dtrsm_("R", "U", "N", diag, m, &n2, &c_b27, &a[n1], n,
  542. &b[n1 * b_dim1], ldb);
  543. } else {
  544. /* SIDE ='R', N is odd, TRANSR = 'N', UPLO = 'U', and */
  545. /* TRANS = 'T' */
  546. _starpu_dtrsm_("R", "U", "T", diag, m, &n2, alpha, &a[n1], n,
  547. &b[n1 * b_dim1], ldb);
  548. _starpu_dgemm_("N", "T", m, &n1, &n2, &c_b23, &b[n1 * b_dim1],
  549. ldb, a, n, alpha, b, ldb);
  550. _starpu_dtrsm_("R", "L", "N", diag, m, &n1, &c_b27, &a[n2], n,
  551. b, ldb);
  552. }
  553. }
  554. } else {
  555. /* SIDE = 'R', N is odd, and TRANSR = 'T' */
  556. if (lower) {
  557. /* SIDE ='R', N is odd, TRANSR = 'T', and UPLO = 'L' */
  558. if (notrans) {
  559. /* SIDE ='R', N is odd, TRANSR = 'T', UPLO = 'L', and */
  560. /* TRANS = 'N' */
  561. _starpu_dtrsm_("R", "L", "N", diag, m, &n2, alpha, &a[1], &n1,
  562. &b[n1 * b_dim1], ldb);
  563. _starpu_dgemm_("N", "T", m, &n1, &n2, &c_b23, &b[n1 * b_dim1],
  564. ldb, &a[n1 * n1], &n1, alpha, b, ldb);
  565. _starpu_dtrsm_("R", "U", "T", diag, m, &n1, &c_b27, a, &n1, b,
  566. ldb);
  567. } else {
  568. /* SIDE ='R', N is odd, TRANSR = 'T', UPLO = 'L', and */
  569. /* TRANS = 'T' */
  570. _starpu_dtrsm_("R", "U", "N", diag, m, &n1, alpha, a, &n1, b,
  571. ldb);
  572. _starpu_dgemm_("N", "N", m, &n2, &n1, &c_b23, b, ldb, &a[n1 *
  573. n1], &n1, alpha, &b[n1 * b_dim1], ldb);
  574. _starpu_dtrsm_("R", "L", "T", diag, m, &n2, &c_b27, &a[1], &
  575. n1, &b[n1 * b_dim1], ldb);
  576. }
  577. } else {
  578. /* SIDE ='R', N is odd, TRANSR = 'T', and UPLO = 'U' */
  579. if (notrans) {
  580. /* SIDE ='R', N is odd, TRANSR = 'T', UPLO = 'U', and */
  581. /* TRANS = 'N' */
  582. _starpu_dtrsm_("R", "U", "N", diag, m, &n1, alpha, &a[n2 * n2]
  583. , &n2, b, ldb);
  584. _starpu_dgemm_("N", "T", m, &n2, &n1, &c_b23, b, ldb, a, &n2,
  585. alpha, &b[n1 * b_dim1], ldb);
  586. _starpu_dtrsm_("R", "L", "T", diag, m, &n2, &c_b27, &a[n1 *
  587. n2], &n2, &b[n1 * b_dim1], ldb);
  588. } else {
  589. /* SIDE ='R', N is odd, TRANSR = 'T', UPLO = 'U', and */
  590. /* TRANS = 'T' */
  591. _starpu_dtrsm_("R", "L", "N", diag, m, &n2, alpha, &a[n1 * n2]
  592. , &n2, &b[n1 * b_dim1], ldb);
  593. _starpu_dgemm_("N", "N", m, &n1, &n2, &c_b23, &b[n1 * b_dim1],
  594. ldb, a, &n2, alpha, b, ldb);
  595. _starpu_dtrsm_("R", "U", "T", diag, m, &n1, &c_b27, &a[n2 *
  596. n2], &n2, b, ldb);
  597. }
  598. }
  599. }
  600. } else {
  601. /* SIDE = 'R' and N is even */
  602. if (normaltransr) {
  603. /* SIDE = 'R', N is even, and TRANSR = 'N' */
  604. if (lower) {
  605. /* SIDE ='R', N is even, TRANSR = 'N', and UPLO = 'L' */
  606. if (notrans) {
  607. /* SIDE ='R', N is even, TRANSR = 'N', UPLO = 'L', */
  608. /* and TRANS = 'N' */
  609. i__1 = *n + 1;
  610. _starpu_dtrsm_("R", "U", "T", diag, m, &k, alpha, a, &i__1, &
  611. b[k * b_dim1], ldb);
  612. i__1 = *n + 1;
  613. _starpu_dgemm_("N", "N", m, &k, &k, &c_b23, &b[k * b_dim1],
  614. ldb, &a[k + 1], &i__1, alpha, b, ldb);
  615. i__1 = *n + 1;
  616. _starpu_dtrsm_("R", "L", "N", diag, m, &k, &c_b27, &a[1], &
  617. i__1, b, ldb);
  618. } else {
  619. /* SIDE ='R', N is even, TRANSR = 'N', UPLO = 'L', */
  620. /* and TRANS = 'T' */
  621. i__1 = *n + 1;
  622. _starpu_dtrsm_("R", "L", "T", diag, m, &k, alpha, &a[1], &
  623. i__1, b, ldb);
  624. i__1 = *n + 1;
  625. _starpu_dgemm_("N", "T", m, &k, &k, &c_b23, b, ldb, &a[k + 1],
  626. &i__1, alpha, &b[k * b_dim1], ldb);
  627. i__1 = *n + 1;
  628. _starpu_dtrsm_("R", "U", "N", diag, m, &k, &c_b27, a, &i__1, &
  629. b[k * b_dim1], ldb);
  630. }
  631. } else {
  632. /* SIDE ='R', N is even, TRANSR = 'N', and UPLO = 'U' */
  633. if (notrans) {
  634. /* SIDE ='R', N is even, TRANSR = 'N', UPLO = 'U', */
  635. /* and TRANS = 'N' */
  636. i__1 = *n + 1;
  637. _starpu_dtrsm_("R", "L", "T", diag, m, &k, alpha, &a[k + 1], &
  638. i__1, b, ldb);
  639. i__1 = *n + 1;
  640. _starpu_dgemm_("N", "N", m, &k, &k, &c_b23, b, ldb, a, &i__1,
  641. alpha, &b[k * b_dim1], ldb);
  642. i__1 = *n + 1;
  643. _starpu_dtrsm_("R", "U", "N", diag, m, &k, &c_b27, &a[k], &
  644. i__1, &b[k * b_dim1], ldb);
  645. } else {
  646. /* SIDE ='R', N is even, TRANSR = 'N', UPLO = 'U', */
  647. /* and TRANS = 'T' */
  648. i__1 = *n + 1;
  649. _starpu_dtrsm_("R", "U", "T", diag, m, &k, alpha, &a[k], &
  650. i__1, &b[k * b_dim1], ldb);
  651. i__1 = *n + 1;
  652. _starpu_dgemm_("N", "T", m, &k, &k, &c_b23, &b[k * b_dim1],
  653. ldb, a, &i__1, alpha, b, ldb);
  654. i__1 = *n + 1;
  655. _starpu_dtrsm_("R", "L", "N", diag, m, &k, &c_b27, &a[k + 1],
  656. &i__1, b, ldb);
  657. }
  658. }
  659. } else {
  660. /* SIDE = 'R', N is even, and TRANSR = 'T' */
  661. if (lower) {
  662. /* SIDE ='R', N is even, TRANSR = 'T', and UPLO = 'L' */
  663. if (notrans) {
  664. /* SIDE ='R', N is even, TRANSR = 'T', UPLO = 'L', */
  665. /* and TRANS = 'N' */
  666. _starpu_dtrsm_("R", "L", "N", diag, m, &k, alpha, a, &k, &b[k
  667. * b_dim1], ldb);
  668. _starpu_dgemm_("N", "T", m, &k, &k, &c_b23, &b[k * b_dim1],
  669. ldb, &a[(k + 1) * k], &k, alpha, b, ldb);
  670. _starpu_dtrsm_("R", "U", "T", diag, m, &k, &c_b27, &a[k], &k,
  671. b, ldb);
  672. } else {
  673. /* SIDE ='R', N is even, TRANSR = 'T', UPLO = 'L', */
  674. /* and TRANS = 'T' */
  675. _starpu_dtrsm_("R", "U", "N", diag, m, &k, alpha, &a[k], &k,
  676. b, ldb);
  677. _starpu_dgemm_("N", "N", m, &k, &k, &c_b23, b, ldb, &a[(k + 1)
  678. * k], &k, alpha, &b[k * b_dim1], ldb);
  679. _starpu_dtrsm_("R", "L", "T", diag, m, &k, &c_b27, a, &k, &b[
  680. k * b_dim1], ldb);
  681. }
  682. } else {
  683. /* SIDE ='R', N is even, TRANSR = 'T', and UPLO = 'U' */
  684. if (notrans) {
  685. /* SIDE ='R', N is even, TRANSR = 'T', UPLO = 'U', */
  686. /* and TRANS = 'N' */
  687. _starpu_dtrsm_("R", "U", "N", diag, m, &k, alpha, &a[(k + 1) *
  688. k], &k, b, ldb);
  689. _starpu_dgemm_("N", "T", m, &k, &k, &c_b23, b, ldb, a, &k,
  690. alpha, &b[k * b_dim1], ldb);
  691. _starpu_dtrsm_("R", "L", "T", diag, m, &k, &c_b27, &a[k * k],
  692. &k, &b[k * b_dim1], ldb);
  693. } else {
  694. /* SIDE ='R', N is even, TRANSR = 'T', UPLO = 'U', */
  695. /* and TRANS = 'T' */
  696. _starpu_dtrsm_("R", "L", "N", diag, m, &k, alpha, &a[k * k], &
  697. k, &b[k * b_dim1], ldb);
  698. _starpu_dgemm_("N", "N", m, &k, &k, &c_b23, &b[k * b_dim1],
  699. ldb, a, &k, alpha, b, ldb);
  700. _starpu_dtrsm_("R", "U", "T", diag, m, &k, &c_b27, &a[(k + 1)
  701. * k], &k, b, ldb);
  702. }
  703. }
  704. }
  705. }
  706. }
  707. return 0;
  708. /* End of DTFSM */
  709. } /* _starpu_dtfsm_ */