dtgsyl.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693
  1. /* dtgsyl.f -- translated by f2c (version 20061008).
  2. You must link the resulting object file with libf2c:
  3. on Microsoft Windows system, link with libf2c.lib;
  4. on Linux or Unix systems, link with .../path/to/libf2c.a -lm
  5. or, if you install libf2c.a in a standard place, with -lf2c -lm
  6. -- in that order, at the end of the command line, as in
  7. cc *.o -lf2c -lm
  8. Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
  9. http://www.netlib.org/f2c/libf2c.zip
  10. */
  11. #include "f2c.h"
  12. #include "blaswrap.h"
  13. /* Table of constant values */
  14. static integer c__2 = 2;
  15. static integer c_n1 = -1;
  16. static integer c__5 = 5;
  17. static doublereal c_b14 = 0.;
  18. static integer c__1 = 1;
  19. static doublereal c_b51 = -1.;
  20. static doublereal c_b52 = 1.;
  21. /* Subroutine */ int _starpu_dtgsyl_(char *trans, integer *ijob, integer *m, integer *
  22. n, doublereal *a, integer *lda, doublereal *b, integer *ldb,
  23. doublereal *c__, integer *ldc, doublereal *d__, integer *ldd,
  24. doublereal *e, integer *lde, doublereal *f, integer *ldf, doublereal *
  25. scale, doublereal *dif, doublereal *work, integer *lwork, integer *
  26. iwork, integer *info)
  27. {
  28. /* System generated locals */
  29. integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, d_dim1,
  30. d_offset, e_dim1, e_offset, f_dim1, f_offset, i__1, i__2, i__3,
  31. i__4;
  32. /* Builtin functions */
  33. double sqrt(doublereal);
  34. /* Local variables */
  35. integer i__, j, k, p, q, ie, je, mb, nb, is, js, pq;
  36. doublereal dsum;
  37. integer ppqq;
  38. extern /* Subroutine */ int _starpu_dscal_(integer *, doublereal *, doublereal *,
  39. integer *), _starpu_dgemm_(char *, char *, integer *, integer *, integer *
  40. , doublereal *, doublereal *, integer *, doublereal *, integer *,
  41. doublereal *, doublereal *, integer *);
  42. extern logical _starpu_lsame_(char *, char *);
  43. integer ifunc, linfo, lwmin;
  44. doublereal scale2;
  45. extern /* Subroutine */ int _starpu_dtgsy2_(char *, integer *, integer *, integer
  46. *, doublereal *, integer *, doublereal *, integer *, doublereal *,
  47. integer *, doublereal *, integer *, doublereal *, integer *,
  48. doublereal *, integer *, doublereal *, doublereal *, doublereal *,
  49. integer *, integer *, integer *);
  50. doublereal dscale, scaloc;
  51. extern /* Subroutine */ int _starpu_dlacpy_(char *, integer *, integer *,
  52. doublereal *, integer *, doublereal *, integer *),
  53. _starpu_dlaset_(char *, integer *, integer *, doublereal *, doublereal *,
  54. doublereal *, integer *);
  55. extern integer _starpu_ilaenv_(integer *, char *, char *, integer *, integer *,
  56. integer *, integer *);
  57. extern /* Subroutine */ int _starpu_xerbla_(char *, integer *);
  58. integer iround;
  59. logical notran;
  60. integer isolve;
  61. logical lquery;
  62. /* -- LAPACK routine (version 3.2) -- */
  63. /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
  64. /* November 2006 */
  65. /* .. Scalar Arguments .. */
  66. /* .. */
  67. /* .. Array Arguments .. */
  68. /* .. */
  69. /* Purpose */
  70. /* ======= */
  71. /* DTGSYL solves the generalized Sylvester equation: */
  72. /* A * R - L * B = scale * C (1) */
  73. /* D * R - L * E = scale * F */
  74. /* where R and L are unknown m-by-n matrices, (A, D), (B, E) and */
  75. /* (C, F) are given matrix pairs of size m-by-m, n-by-n and m-by-n, */
  76. /* respectively, with real entries. (A, D) and (B, E) must be in */
  77. /* generalized (real) Schur canonical form, i.e. A, B are upper quasi */
  78. /* triangular and D, E are upper triangular. */
  79. /* The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 is an output */
  80. /* scaling factor chosen to avoid overflow. */
  81. /* In matrix notation (1) is equivalent to solve Zx = scale b, where */
  82. /* Z is defined as */
  83. /* Z = [ kron(In, A) -kron(B', Im) ] (2) */
  84. /* [ kron(In, D) -kron(E', Im) ]. */
  85. /* Here Ik is the identity matrix of size k and X' is the transpose of */
  86. /* X. kron(X, Y) is the Kronecker product between the matrices X and Y. */
  87. /* If TRANS = 'T', DTGSYL solves the transposed system Z'*y = scale*b, */
  88. /* which is equivalent to solve for R and L in */
  89. /* A' * R + D' * L = scale * C (3) */
  90. /* R * B' + L * E' = scale * (-F) */
  91. /* This case (TRANS = 'T') is used to compute an one-norm-based estimate */
  92. /* of Dif[(A,D), (B,E)], the separation between the matrix pairs (A,D) */
  93. /* and (B,E), using DLACON. */
  94. /* If IJOB >= 1, DTGSYL computes a Frobenius norm-based estimate */
  95. /* of Dif[(A,D),(B,E)]. That is, the reciprocal of a lower bound on the */
  96. /* reciprocal of the smallest singular value of Z. See [1-2] for more */
  97. /* information. */
  98. /* This is a level 3 BLAS algorithm. */
  99. /* Arguments */
  100. /* ========= */
  101. /* TRANS (input) CHARACTER*1 */
  102. /* = 'N', solve the generalized Sylvester equation (1). */
  103. /* = 'T', solve the 'transposed' system (3). */
  104. /* IJOB (input) INTEGER */
  105. /* Specifies what kind of functionality to be performed. */
  106. /* =0: solve (1) only. */
  107. /* =1: The functionality of 0 and 3. */
  108. /* =2: The functionality of 0 and 4. */
  109. /* =3: Only an estimate of Dif[(A,D), (B,E)] is computed. */
  110. /* (look ahead strategy IJOB = 1 is used). */
  111. /* =4: Only an estimate of Dif[(A,D), (B,E)] is computed. */
  112. /* ( DGECON on sub-systems is used ). */
  113. /* Not referenced if TRANS = 'T'. */
  114. /* M (input) INTEGER */
  115. /* The order of the matrices A and D, and the row dimension of */
  116. /* the matrices C, F, R and L. */
  117. /* N (input) INTEGER */
  118. /* The order of the matrices B and E, and the column dimension */
  119. /* of the matrices C, F, R and L. */
  120. /* A (input) DOUBLE PRECISION array, dimension (LDA, M) */
  121. /* The upper quasi triangular matrix A. */
  122. /* LDA (input) INTEGER */
  123. /* The leading dimension of the array A. LDA >= max(1, M). */
  124. /* B (input) DOUBLE PRECISION array, dimension (LDB, N) */
  125. /* The upper quasi triangular matrix B. */
  126. /* LDB (input) INTEGER */
  127. /* The leading dimension of the array B. LDB >= max(1, N). */
  128. /* C (input/output) DOUBLE PRECISION array, dimension (LDC, N) */
  129. /* On entry, C contains the right-hand-side of the first matrix */
  130. /* equation in (1) or (3). */
  131. /* On exit, if IJOB = 0, 1 or 2, C has been overwritten by */
  132. /* the solution R. If IJOB = 3 or 4 and TRANS = 'N', C holds R, */
  133. /* the solution achieved during the computation of the */
  134. /* Dif-estimate. */
  135. /* LDC (input) INTEGER */
  136. /* The leading dimension of the array C. LDC >= max(1, M). */
  137. /* D (input) DOUBLE PRECISION array, dimension (LDD, M) */
  138. /* The upper triangular matrix D. */
  139. /* LDD (input) INTEGER */
  140. /* The leading dimension of the array D. LDD >= max(1, M). */
  141. /* E (input) DOUBLE PRECISION array, dimension (LDE, N) */
  142. /* The upper triangular matrix E. */
  143. /* LDE (input) INTEGER */
  144. /* The leading dimension of the array E. LDE >= max(1, N). */
  145. /* F (input/output) DOUBLE PRECISION array, dimension (LDF, N) */
  146. /* On entry, F contains the right-hand-side of the second matrix */
  147. /* equation in (1) or (3). */
  148. /* On exit, if IJOB = 0, 1 or 2, F has been overwritten by */
  149. /* the solution L. If IJOB = 3 or 4 and TRANS = 'N', F holds L, */
  150. /* the solution achieved during the computation of the */
  151. /* Dif-estimate. */
  152. /* LDF (input) INTEGER */
  153. /* The leading dimension of the array F. LDF >= max(1, M). */
  154. /* DIF (output) DOUBLE PRECISION */
  155. /* On exit DIF is the reciprocal of a lower bound of the */
  156. /* reciprocal of the Dif-function, i.e. DIF is an upper bound of */
  157. /* Dif[(A,D), (B,E)] = sigma_min(Z), where Z as in (2). */
  158. /* IF IJOB = 0 or TRANS = 'T', DIF is not touched. */
  159. /* SCALE (output) DOUBLE PRECISION */
  160. /* On exit SCALE is the scaling factor in (1) or (3). */
  161. /* If 0 < SCALE < 1, C and F hold the solutions R and L, resp., */
  162. /* to a slightly perturbed system but the input matrices A, B, D */
  163. /* and E have not been changed. If SCALE = 0, C and F hold the */
  164. /* solutions R and L, respectively, to the homogeneous system */
  165. /* with C = F = 0. Normally, SCALE = 1. */
  166. /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */
  167. /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
  168. /* LWORK (input) INTEGER */
  169. /* The dimension of the array WORK. LWORK > = 1. */
  170. /* If IJOB = 1 or 2 and TRANS = 'N', LWORK >= max(1,2*M*N). */
  171. /* If LWORK = -1, then a workspace query is assumed; the routine */
  172. /* only calculates the optimal size of the WORK array, returns */
  173. /* this value as the first entry of the WORK array, and no error */
  174. /* message related to LWORK is issued by XERBLA. */
  175. /* IWORK (workspace) INTEGER array, dimension (M+N+6) */
  176. /* INFO (output) INTEGER */
  177. /* =0: successful exit */
  178. /* <0: If INFO = -i, the i-th argument had an illegal value. */
  179. /* >0: (A, D) and (B, E) have common or close eigenvalues. */
  180. /* Further Details */
  181. /* =============== */
  182. /* Based on contributions by */
  183. /* Bo Kagstrom and Peter Poromaa, Department of Computing Science, */
  184. /* Umea University, S-901 87 Umea, Sweden. */
  185. /* [1] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software */
  186. /* for Solving the Generalized Sylvester Equation and Estimating the */
  187. /* Separation between Regular Matrix Pairs, Report UMINF - 93.23, */
  188. /* Department of Computing Science, Umea University, S-901 87 Umea, */
  189. /* Sweden, December 1993, Revised April 1994, Also as LAPACK Working */
  190. /* Note 75. To appear in ACM Trans. on Math. Software, Vol 22, */
  191. /* No 1, 1996. */
  192. /* [2] B. Kagstrom, A Perturbation Analysis of the Generalized Sylvester */
  193. /* Equation (AR - LB, DR - LE ) = (C, F), SIAM J. Matrix Anal. */
  194. /* Appl., 15(4):1045-1060, 1994 */
  195. /* [3] B. Kagstrom and L. Westin, Generalized Schur Methods with */
  196. /* Condition Estimators for Solving the Generalized Sylvester */
  197. /* Equation, IEEE Transactions on Automatic Control, Vol. 34, No. 7, */
  198. /* July 1989, pp 745-751. */
  199. /* ===================================================================== */
  200. /* Replaced various illegal calls to DCOPY by calls to DLASET. */
  201. /* Sven Hammarling, 1/5/02. */
  202. /* .. Parameters .. */
  203. /* .. */
  204. /* .. Local Scalars .. */
  205. /* .. */
  206. /* .. External Functions .. */
  207. /* .. */
  208. /* .. External Subroutines .. */
  209. /* .. */
  210. /* .. Intrinsic Functions .. */
  211. /* .. */
  212. /* .. Executable Statements .. */
  213. /* Decode and test input parameters */
  214. /* Parameter adjustments */
  215. a_dim1 = *lda;
  216. a_offset = 1 + a_dim1;
  217. a -= a_offset;
  218. b_dim1 = *ldb;
  219. b_offset = 1 + b_dim1;
  220. b -= b_offset;
  221. c_dim1 = *ldc;
  222. c_offset = 1 + c_dim1;
  223. c__ -= c_offset;
  224. d_dim1 = *ldd;
  225. d_offset = 1 + d_dim1;
  226. d__ -= d_offset;
  227. e_dim1 = *lde;
  228. e_offset = 1 + e_dim1;
  229. e -= e_offset;
  230. f_dim1 = *ldf;
  231. f_offset = 1 + f_dim1;
  232. f -= f_offset;
  233. --work;
  234. --iwork;
  235. /* Function Body */
  236. *info = 0;
  237. notran = _starpu_lsame_(trans, "N");
  238. lquery = *lwork == -1;
  239. if (! notran && ! _starpu_lsame_(trans, "T")) {
  240. *info = -1;
  241. } else if (notran) {
  242. if (*ijob < 0 || *ijob > 4) {
  243. *info = -2;
  244. }
  245. }
  246. if (*info == 0) {
  247. if (*m <= 0) {
  248. *info = -3;
  249. } else if (*n <= 0) {
  250. *info = -4;
  251. } else if (*lda < max(1,*m)) {
  252. *info = -6;
  253. } else if (*ldb < max(1,*n)) {
  254. *info = -8;
  255. } else if (*ldc < max(1,*m)) {
  256. *info = -10;
  257. } else if (*ldd < max(1,*m)) {
  258. *info = -12;
  259. } else if (*lde < max(1,*n)) {
  260. *info = -14;
  261. } else if (*ldf < max(1,*m)) {
  262. *info = -16;
  263. }
  264. }
  265. if (*info == 0) {
  266. if (notran) {
  267. if (*ijob == 1 || *ijob == 2) {
  268. /* Computing MAX */
  269. i__1 = 1, i__2 = (*m << 1) * *n;
  270. lwmin = max(i__1,i__2);
  271. } else {
  272. lwmin = 1;
  273. }
  274. } else {
  275. lwmin = 1;
  276. }
  277. work[1] = (doublereal) lwmin;
  278. if (*lwork < lwmin && ! lquery) {
  279. *info = -20;
  280. }
  281. }
  282. if (*info != 0) {
  283. i__1 = -(*info);
  284. _starpu_xerbla_("DTGSYL", &i__1);
  285. return 0;
  286. } else if (lquery) {
  287. return 0;
  288. }
  289. /* Quick return if possible */
  290. if (*m == 0 || *n == 0) {
  291. *scale = 1.;
  292. if (notran) {
  293. if (*ijob != 0) {
  294. *dif = 0.;
  295. }
  296. }
  297. return 0;
  298. }
  299. /* Determine optimal block sizes MB and NB */
  300. mb = _starpu_ilaenv_(&c__2, "DTGSYL", trans, m, n, &c_n1, &c_n1);
  301. nb = _starpu_ilaenv_(&c__5, "DTGSYL", trans, m, n, &c_n1, &c_n1);
  302. isolve = 1;
  303. ifunc = 0;
  304. if (notran) {
  305. if (*ijob >= 3) {
  306. ifunc = *ijob - 2;
  307. _starpu_dlaset_("F", m, n, &c_b14, &c_b14, &c__[c_offset], ldc)
  308. ;
  309. _starpu_dlaset_("F", m, n, &c_b14, &c_b14, &f[f_offset], ldf);
  310. } else if (*ijob >= 1) {
  311. isolve = 2;
  312. }
  313. }
  314. if (mb <= 1 && nb <= 1 || mb >= *m && nb >= *n) {
  315. i__1 = isolve;
  316. for (iround = 1; iround <= i__1; ++iround) {
  317. /* Use unblocked Level 2 solver */
  318. dscale = 0.;
  319. dsum = 1.;
  320. pq = 0;
  321. _starpu_dtgsy2_(trans, &ifunc, m, n, &a[a_offset], lda, &b[b_offset], ldb,
  322. &c__[c_offset], ldc, &d__[d_offset], ldd, &e[e_offset],
  323. lde, &f[f_offset], ldf, scale, &dsum, &dscale, &iwork[1],
  324. &pq, info);
  325. if (dscale != 0.) {
  326. if (*ijob == 1 || *ijob == 3) {
  327. *dif = sqrt((doublereal) ((*m << 1) * *n)) / (dscale *
  328. sqrt(dsum));
  329. } else {
  330. *dif = sqrt((doublereal) pq) / (dscale * sqrt(dsum));
  331. }
  332. }
  333. if (isolve == 2 && iround == 1) {
  334. if (notran) {
  335. ifunc = *ijob;
  336. }
  337. scale2 = *scale;
  338. _starpu_dlacpy_("F", m, n, &c__[c_offset], ldc, &work[1], m);
  339. _starpu_dlacpy_("F", m, n, &f[f_offset], ldf, &work[*m * *n + 1], m);
  340. _starpu_dlaset_("F", m, n, &c_b14, &c_b14, &c__[c_offset], ldc);
  341. _starpu_dlaset_("F", m, n, &c_b14, &c_b14, &f[f_offset], ldf);
  342. } else if (isolve == 2 && iround == 2) {
  343. _starpu_dlacpy_("F", m, n, &work[1], m, &c__[c_offset], ldc);
  344. _starpu_dlacpy_("F", m, n, &work[*m * *n + 1], m, &f[f_offset], ldf);
  345. *scale = scale2;
  346. }
  347. /* L30: */
  348. }
  349. return 0;
  350. }
  351. /* Determine block structure of A */
  352. p = 0;
  353. i__ = 1;
  354. L40:
  355. if (i__ > *m) {
  356. goto L50;
  357. }
  358. ++p;
  359. iwork[p] = i__;
  360. i__ += mb;
  361. if (i__ >= *m) {
  362. goto L50;
  363. }
  364. if (a[i__ + (i__ - 1) * a_dim1] != 0.) {
  365. ++i__;
  366. }
  367. goto L40;
  368. L50:
  369. iwork[p + 1] = *m + 1;
  370. if (iwork[p] == iwork[p + 1]) {
  371. --p;
  372. }
  373. /* Determine block structure of B */
  374. q = p + 1;
  375. j = 1;
  376. L60:
  377. if (j > *n) {
  378. goto L70;
  379. }
  380. ++q;
  381. iwork[q] = j;
  382. j += nb;
  383. if (j >= *n) {
  384. goto L70;
  385. }
  386. if (b[j + (j - 1) * b_dim1] != 0.) {
  387. ++j;
  388. }
  389. goto L60;
  390. L70:
  391. iwork[q + 1] = *n + 1;
  392. if (iwork[q] == iwork[q + 1]) {
  393. --q;
  394. }
  395. if (notran) {
  396. i__1 = isolve;
  397. for (iround = 1; iround <= i__1; ++iround) {
  398. /* Solve (I, J)-subsystem */
  399. /* A(I, I) * R(I, J) - L(I, J) * B(J, J) = C(I, J) */
  400. /* D(I, I) * R(I, J) - L(I, J) * E(J, J) = F(I, J) */
  401. /* for I = P, P - 1,..., 1; J = 1, 2,..., Q */
  402. dscale = 0.;
  403. dsum = 1.;
  404. pq = 0;
  405. *scale = 1.;
  406. i__2 = q;
  407. for (j = p + 2; j <= i__2; ++j) {
  408. js = iwork[j];
  409. je = iwork[j + 1] - 1;
  410. nb = je - js + 1;
  411. for (i__ = p; i__ >= 1; --i__) {
  412. is = iwork[i__];
  413. ie = iwork[i__ + 1] - 1;
  414. mb = ie - is + 1;
  415. ppqq = 0;
  416. _starpu_dtgsy2_(trans, &ifunc, &mb, &nb, &a[is + is * a_dim1],
  417. lda, &b[js + js * b_dim1], ldb, &c__[is + js *
  418. c_dim1], ldc, &d__[is + is * d_dim1], ldd, &e[js
  419. + js * e_dim1], lde, &f[is + js * f_dim1], ldf, &
  420. scaloc, &dsum, &dscale, &iwork[q + 2], &ppqq, &
  421. linfo);
  422. if (linfo > 0) {
  423. *info = linfo;
  424. }
  425. pq += ppqq;
  426. if (scaloc != 1.) {
  427. i__3 = js - 1;
  428. for (k = 1; k <= i__3; ++k) {
  429. _starpu_dscal_(m, &scaloc, &c__[k * c_dim1 + 1], &c__1);
  430. _starpu_dscal_(m, &scaloc, &f[k * f_dim1 + 1], &c__1);
  431. /* L80: */
  432. }
  433. i__3 = je;
  434. for (k = js; k <= i__3; ++k) {
  435. i__4 = is - 1;
  436. _starpu_dscal_(&i__4, &scaloc, &c__[k * c_dim1 + 1], &
  437. c__1);
  438. i__4 = is - 1;
  439. _starpu_dscal_(&i__4, &scaloc, &f[k * f_dim1 + 1], &c__1);
  440. /* L90: */
  441. }
  442. i__3 = je;
  443. for (k = js; k <= i__3; ++k) {
  444. i__4 = *m - ie;
  445. _starpu_dscal_(&i__4, &scaloc, &c__[ie + 1 + k * c_dim1],
  446. &c__1);
  447. i__4 = *m - ie;
  448. _starpu_dscal_(&i__4, &scaloc, &f[ie + 1 + k * f_dim1], &
  449. c__1);
  450. /* L100: */
  451. }
  452. i__3 = *n;
  453. for (k = je + 1; k <= i__3; ++k) {
  454. _starpu_dscal_(m, &scaloc, &c__[k * c_dim1 + 1], &c__1);
  455. _starpu_dscal_(m, &scaloc, &f[k * f_dim1 + 1], &c__1);
  456. /* L110: */
  457. }
  458. *scale *= scaloc;
  459. }
  460. /* Substitute R(I, J) and L(I, J) into remaining */
  461. /* equation. */
  462. if (i__ > 1) {
  463. i__3 = is - 1;
  464. _starpu_dgemm_("N", "N", &i__3, &nb, &mb, &c_b51, &a[is *
  465. a_dim1 + 1], lda, &c__[is + js * c_dim1], ldc,
  466. &c_b52, &c__[js * c_dim1 + 1], ldc);
  467. i__3 = is - 1;
  468. _starpu_dgemm_("N", "N", &i__3, &nb, &mb, &c_b51, &d__[is *
  469. d_dim1 + 1], ldd, &c__[is + js * c_dim1], ldc,
  470. &c_b52, &f[js * f_dim1 + 1], ldf);
  471. }
  472. if (j < q) {
  473. i__3 = *n - je;
  474. _starpu_dgemm_("N", "N", &mb, &i__3, &nb, &c_b52, &f[is + js *
  475. f_dim1], ldf, &b[js + (je + 1) * b_dim1],
  476. ldb, &c_b52, &c__[is + (je + 1) * c_dim1],
  477. ldc);
  478. i__3 = *n - je;
  479. _starpu_dgemm_("N", "N", &mb, &i__3, &nb, &c_b52, &f[is + js *
  480. f_dim1], ldf, &e[js + (je + 1) * e_dim1],
  481. lde, &c_b52, &f[is + (je + 1) * f_dim1], ldf);
  482. }
  483. /* L120: */
  484. }
  485. /* L130: */
  486. }
  487. if (dscale != 0.) {
  488. if (*ijob == 1 || *ijob == 3) {
  489. *dif = sqrt((doublereal) ((*m << 1) * *n)) / (dscale *
  490. sqrt(dsum));
  491. } else {
  492. *dif = sqrt((doublereal) pq) / (dscale * sqrt(dsum));
  493. }
  494. }
  495. if (isolve == 2 && iround == 1) {
  496. if (notran) {
  497. ifunc = *ijob;
  498. }
  499. scale2 = *scale;
  500. _starpu_dlacpy_("F", m, n, &c__[c_offset], ldc, &work[1], m);
  501. _starpu_dlacpy_("F", m, n, &f[f_offset], ldf, &work[*m * *n + 1], m);
  502. _starpu_dlaset_("F", m, n, &c_b14, &c_b14, &c__[c_offset], ldc);
  503. _starpu_dlaset_("F", m, n, &c_b14, &c_b14, &f[f_offset], ldf);
  504. } else if (isolve == 2 && iround == 2) {
  505. _starpu_dlacpy_("F", m, n, &work[1], m, &c__[c_offset], ldc);
  506. _starpu_dlacpy_("F", m, n, &work[*m * *n + 1], m, &f[f_offset], ldf);
  507. *scale = scale2;
  508. }
  509. /* L150: */
  510. }
  511. } else {
  512. /* Solve transposed (I, J)-subsystem */
  513. /* A(I, I)' * R(I, J) + D(I, I)' * L(I, J) = C(I, J) */
  514. /* R(I, J) * B(J, J)' + L(I, J) * E(J, J)' = -F(I, J) */
  515. /* for I = 1,2,..., P; J = Q, Q-1,..., 1 */
  516. *scale = 1.;
  517. i__1 = p;
  518. for (i__ = 1; i__ <= i__1; ++i__) {
  519. is = iwork[i__];
  520. ie = iwork[i__ + 1] - 1;
  521. mb = ie - is + 1;
  522. i__2 = p + 2;
  523. for (j = q; j >= i__2; --j) {
  524. js = iwork[j];
  525. je = iwork[j + 1] - 1;
  526. nb = je - js + 1;
  527. _starpu_dtgsy2_(trans, &ifunc, &mb, &nb, &a[is + is * a_dim1], lda, &
  528. b[js + js * b_dim1], ldb, &c__[is + js * c_dim1], ldc,
  529. &d__[is + is * d_dim1], ldd, &e[js + js * e_dim1],
  530. lde, &f[is + js * f_dim1], ldf, &scaloc, &dsum, &
  531. dscale, &iwork[q + 2], &ppqq, &linfo);
  532. if (linfo > 0) {
  533. *info = linfo;
  534. }
  535. if (scaloc != 1.) {
  536. i__3 = js - 1;
  537. for (k = 1; k <= i__3; ++k) {
  538. _starpu_dscal_(m, &scaloc, &c__[k * c_dim1 + 1], &c__1);
  539. _starpu_dscal_(m, &scaloc, &f[k * f_dim1 + 1], &c__1);
  540. /* L160: */
  541. }
  542. i__3 = je;
  543. for (k = js; k <= i__3; ++k) {
  544. i__4 = is - 1;
  545. _starpu_dscal_(&i__4, &scaloc, &c__[k * c_dim1 + 1], &c__1);
  546. i__4 = is - 1;
  547. _starpu_dscal_(&i__4, &scaloc, &f[k * f_dim1 + 1], &c__1);
  548. /* L170: */
  549. }
  550. i__3 = je;
  551. for (k = js; k <= i__3; ++k) {
  552. i__4 = *m - ie;
  553. _starpu_dscal_(&i__4, &scaloc, &c__[ie + 1 + k * c_dim1], &
  554. c__1);
  555. i__4 = *m - ie;
  556. _starpu_dscal_(&i__4, &scaloc, &f[ie + 1 + k * f_dim1], &c__1)
  557. ;
  558. /* L180: */
  559. }
  560. i__3 = *n;
  561. for (k = je + 1; k <= i__3; ++k) {
  562. _starpu_dscal_(m, &scaloc, &c__[k * c_dim1 + 1], &c__1);
  563. _starpu_dscal_(m, &scaloc, &f[k * f_dim1 + 1], &c__1);
  564. /* L190: */
  565. }
  566. *scale *= scaloc;
  567. }
  568. /* Substitute R(I, J) and L(I, J) into remaining equation. */
  569. if (j > p + 2) {
  570. i__3 = js - 1;
  571. _starpu_dgemm_("N", "T", &mb, &i__3, &nb, &c_b52, &c__[is + js *
  572. c_dim1], ldc, &b[js * b_dim1 + 1], ldb, &c_b52, &
  573. f[is + f_dim1], ldf);
  574. i__3 = js - 1;
  575. _starpu_dgemm_("N", "T", &mb, &i__3, &nb, &c_b52, &f[is + js *
  576. f_dim1], ldf, &e[js * e_dim1 + 1], lde, &c_b52, &
  577. f[is + f_dim1], ldf);
  578. }
  579. if (i__ < p) {
  580. i__3 = *m - ie;
  581. _starpu_dgemm_("T", "N", &i__3, &nb, &mb, &c_b51, &a[is + (ie + 1)
  582. * a_dim1], lda, &c__[is + js * c_dim1], ldc, &
  583. c_b52, &c__[ie + 1 + js * c_dim1], ldc);
  584. i__3 = *m - ie;
  585. _starpu_dgemm_("T", "N", &i__3, &nb, &mb, &c_b51, &d__[is + (ie +
  586. 1) * d_dim1], ldd, &f[is + js * f_dim1], ldf, &
  587. c_b52, &c__[ie + 1 + js * c_dim1], ldc);
  588. }
  589. /* L200: */
  590. }
  591. /* L210: */
  592. }
  593. }
  594. work[1] = (doublereal) lwmin;
  595. return 0;
  596. /* End of DTGSYL */
  597. } /* _starpu_dtgsyl_ */