dlaqr04.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479
  1. // Copyright ©2016 The Gonum Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package gonum
  5. import (
  6. "math"
  7. "gonum.org/v1/gonum/blas"
  8. )
  9. // Dlaqr04 computes the eigenvalues of a block of an n×n upper Hessenberg matrix
  10. // H, and optionally the matrices T and Z from the Schur decomposition
  11. // H = Z T Zᵀ
  12. // where T is an upper quasi-triangular matrix (the Schur form), and Z is the
  13. // orthogonal matrix of Schur vectors.
  14. //
  15. // wantt indicates whether the full Schur form T is required. If wantt is false,
  16. // then only enough of H will be updated to preserve the eigenvalues.
  17. //
  18. // wantz indicates whether the n×n matrix of Schur vectors Z is required. If it
  19. // is true, the orthogonal similarity transformation will be accumulated into
  20. // Z[iloz:ihiz+1,ilo:ihi+1], otherwise Z will not be referenced.
  21. //
  22. // ilo and ihi determine the block of H on which Dlaqr04 operates. It must hold that
  23. // 0 <= ilo <= ihi < n if n > 0,
  24. // ilo == 0 and ihi == -1 if n == 0,
  25. // and the block must be isolated, that is,
  26. // ilo == 0 or H[ilo,ilo-1] == 0,
  27. // ihi == n-1 or H[ihi+1,ihi] == 0,
  28. // otherwise Dlaqr04 will panic.
  29. //
  30. // wr and wi must have length ihi+1.
  31. //
  32. // iloz and ihiz specify the rows of Z to which transformations will be applied
  33. // if wantz is true. It must hold that
  34. // 0 <= iloz <= ilo, and ihi <= ihiz < n,
  35. // otherwise Dlaqr04 will panic.
  36. //
  37. // work must have length at least lwork and lwork must be
  38. // lwork >= 1 if n <= 11,
  39. // lwork >= n if n > 11,
  40. // otherwise Dlaqr04 will panic. lwork as large as 6*n may be required for
  41. // optimal performance. On return, work[0] will contain the optimal value of
  42. // lwork.
  43. //
  44. // If lwork is -1, instead of performing Dlaqr04, the function only estimates the
  45. // optimal workspace size and stores it into work[0]. Neither h nor z are
  46. // accessed.
  47. //
  48. // recur is the non-negative recursion depth. For recur > 0, Dlaqr04 behaves
  49. // as DLAQR0, for recur == 0 it behaves as DLAQR4.
  50. //
  51. // unconverged indicates whether Dlaqr04 computed all the eigenvalues of H[ilo:ihi+1,ilo:ihi+1].
  52. //
  53. // If unconverged is zero and wantt is true, H will contain on return the upper
  54. // quasi-triangular matrix T from the Schur decomposition. 2×2 diagonal blocks
  55. // (corresponding to complex conjugate pairs of eigenvalues) will be returned in
  56. // standard form, with H[i,i] == H[i+1,i+1] and H[i+1,i]*H[i,i+1] < 0.
  57. //
  58. // If unconverged is zero and if wantt is false, the contents of h on return is
  59. // unspecified.
  60. //
  61. // If unconverged is zero, all the eigenvalues have been computed and their real
  62. // and imaginary parts will be stored on return in wr[ilo:ihi+1] and
  63. // wi[ilo:ihi+1], respectively. If two eigenvalues are computed as a complex
  64. // conjugate pair, they are stored in consecutive elements of wr and wi, say the
  65. // i-th and (i+1)th, with wi[i] > 0 and wi[i+1] < 0. If wantt is true, then the
  66. // eigenvalues are stored in the same order as on the diagonal of the Schur form
  67. // returned in H, with wr[i] = H[i,i] and, if H[i:i+2,i:i+2] is a 2×2 diagonal
  68. // block, wi[i] = sqrt(-H[i+1,i]*H[i,i+1]) and wi[i+1] = -wi[i].
  69. //
  70. // If unconverged is positive, some eigenvalues have not converged, and
  71. // wr[unconverged:ihi+1] and wi[unconverged:ihi+1] will contain those
  72. // eigenvalues which have been successfully computed. Failures are rare.
  73. //
  74. // If unconverged is positive and wantt is true, then on return
  75. // (initial H)*U = U*(final H), (*)
  76. // where U is an orthogonal matrix. The final H is upper Hessenberg and
  77. // H[unconverged:ihi+1,unconverged:ihi+1] is upper quasi-triangular.
  78. //
  79. // If unconverged is positive and wantt is false, on return the remaining
  80. // unconverged eigenvalues are the eigenvalues of the upper Hessenberg matrix
  81. // H[ilo:unconverged,ilo:unconverged].
  82. //
  83. // If unconverged is positive and wantz is true, then on return
  84. // (final Z) = (initial Z)*U,
  85. // where U is the orthogonal matrix in (*) regardless of the value of wantt.
  86. //
  87. // References:
  88. // [1] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part I:
  89. // Maintaining Well-Focused Shifts and Level 3 Performance. SIAM J. Matrix
  90. // Anal. Appl. 23(4) (2002), pp. 929—947
  91. // URL: http://dx.doi.org/10.1137/S0895479801384573
  92. // [2] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part II:
  93. // Aggressive Early Deflation. SIAM J. Matrix Anal. Appl. 23(4) (2002), pp. 948—973
  94. // URL: http://dx.doi.org/10.1137/S0895479801384585
  95. //
  96. // Dlaqr04 is an internal routine. It is exported for testing purposes.
  97. func (impl Implementation) Dlaqr04(wantt, wantz bool, n, ilo, ihi int, h []float64, ldh int, wr, wi []float64, iloz, ihiz int, z []float64, ldz int, work []float64, lwork int, recur int) (unconverged int) {
  98. const (
  99. // Matrices of order ntiny or smaller must be processed by
  100. // Dlahqr because of insufficient subdiagonal scratch space.
  101. // This is a hard limit.
  102. ntiny = 11
  103. // Exceptional deflation windows: try to cure rare slow
  104. // convergence by varying the size of the deflation window after
  105. // kexnw iterations.
  106. kexnw = 5
  107. // Exceptional shifts: try to cure rare slow convergence with
  108. // ad-hoc exceptional shifts every kexsh iterations.
  109. kexsh = 6
  110. // See https://github.com/gonum/lapack/pull/151#discussion_r68162802
  111. // and the surrounding discussion for an explanation where these
  112. // constants come from.
  113. // TODO(vladimir-ch): Similar constants for exceptional shifts
  114. // are used also in dlahqr.go. The first constant is different
  115. // there, it is equal to 3. Why? And does it matter?
  116. wilk1 = 0.75
  117. wilk2 = -0.4375
  118. )
  119. switch {
  120. case n < 0:
  121. panic(nLT0)
  122. case ilo < 0 || max(0, n-1) < ilo:
  123. panic(badIlo)
  124. case ihi < min(ilo, n-1) || n <= ihi:
  125. panic(badIhi)
  126. case ldh < max(1, n):
  127. panic(badLdH)
  128. case wantz && (iloz < 0 || ilo < iloz):
  129. panic(badIloz)
  130. case wantz && (ihiz < ihi || n <= ihiz):
  131. panic(badIhiz)
  132. case ldz < 1, wantz && ldz < n:
  133. panic(badLdZ)
  134. case lwork < 1 && lwork != -1:
  135. panic(badLWork)
  136. // TODO(vladimir-ch): Enable if and when we figure out what the minimum
  137. // necessary lwork value is. Dlaqr04 says that the minimum is n which
  138. // clashes with Dlaqr23's opinion about optimal work when nw <= 2
  139. // (independent of n).
  140. // case lwork < n && n > ntiny && lwork != -1:
  141. // panic(badLWork)
  142. case len(work) < max(1, lwork):
  143. panic(shortWork)
  144. case recur < 0:
  145. panic(recurLT0)
  146. }
  147. // Quick return.
  148. if n == 0 {
  149. work[0] = 1
  150. return 0
  151. }
  152. if lwork != -1 {
  153. switch {
  154. case len(h) < (n-1)*ldh+n:
  155. panic(shortH)
  156. case len(wr) != ihi+1:
  157. panic(badLenWr)
  158. case len(wi) != ihi+1:
  159. panic(badLenWi)
  160. case wantz && len(z) < (n-1)*ldz+n:
  161. panic(shortZ)
  162. case ilo > 0 && h[ilo*ldh+ilo-1] != 0:
  163. panic(notIsolated)
  164. case ihi+1 < n && h[(ihi+1)*ldh+ihi] != 0:
  165. panic(notIsolated)
  166. }
  167. }
  168. if n <= ntiny {
  169. // Tiny matrices must use Dlahqr.
  170. if lwork == -1 {
  171. work[0] = 1
  172. return 0
  173. }
  174. return impl.Dlahqr(wantt, wantz, n, ilo, ihi, h, ldh, wr, wi, iloz, ihiz, z, ldz)
  175. }
  176. // Use small bulge multi-shift QR with aggressive early deflation on
  177. // larger-than-tiny matrices.
  178. var jbcmpz string
  179. if wantt {
  180. jbcmpz = "S"
  181. } else {
  182. jbcmpz = "E"
  183. }
  184. if wantz {
  185. jbcmpz += "V"
  186. } else {
  187. jbcmpz += "N"
  188. }
  189. var fname string
  190. if recur > 0 {
  191. fname = "DLAQR0"
  192. } else {
  193. fname = "DLAQR4"
  194. }
  195. // nwr is the recommended deflation window size. n is greater than 11,
  196. // so there is enough subdiagonal workspace for nwr >= 2 as required.
  197. // (In fact, there is enough subdiagonal space for nwr >= 3.)
  198. // TODO(vladimir-ch): If there is enough space for nwr >= 3, should we
  199. // use it?
  200. nwr := impl.Ilaenv(13, fname, jbcmpz, n, ilo, ihi, lwork)
  201. nwr = max(2, nwr)
  202. nwr = min(ihi-ilo+1, min((n-1)/3, nwr))
  203. // nsr is the recommended number of simultaneous shifts. n is greater
  204. // than 11, so there is enough subdiagonal workspace for nsr to be even
  205. // and greater than or equal to two as required.
  206. nsr := impl.Ilaenv(15, fname, jbcmpz, n, ilo, ihi, lwork)
  207. nsr = min(nsr, min((n+6)/9, ihi-ilo))
  208. nsr = max(2, nsr&^1)
  209. // Workspace query call to Dlaqr23.
  210. impl.Dlaqr23(wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz, ihiz, z, ldz,
  211. wr, wi, h, ldh, n, h, ldh, n, h, ldh, work, -1, recur)
  212. // Optimal workspace is max(Dlaqr5, Dlaqr23).
  213. lwkopt := max(3*nsr/2, int(work[0]))
  214. // Quick return in case of workspace query.
  215. if lwork == -1 {
  216. work[0] = float64(lwkopt)
  217. return 0
  218. }
  219. // Dlahqr/Dlaqr04 crossover point.
  220. nmin := impl.Ilaenv(12, fname, jbcmpz, n, ilo, ihi, lwork)
  221. nmin = max(ntiny, nmin)
  222. // Nibble determines when to skip a multi-shift QR sweep (Dlaqr5).
  223. nibble := impl.Ilaenv(14, fname, jbcmpz, n, ilo, ihi, lwork)
  224. nibble = max(0, nibble)
  225. // Computation mode of far-from-diagonal orthogonal updates in Dlaqr5.
  226. kacc22 := impl.Ilaenv(16, fname, jbcmpz, n, ilo, ihi, lwork)
  227. kacc22 = max(0, min(kacc22, 2))
  228. // nwmax is the largest possible deflation window for which there is
  229. // sufficient workspace.
  230. nwmax := min((n-1)/3, lwork/2)
  231. nw := nwmax // Start with maximum deflation window size.
  232. // nsmax is the largest number of simultaneous shifts for which there is
  233. // sufficient workspace.
  234. nsmax := min((n+6)/9, 2*lwork/3) &^ 1
  235. ndfl := 1 // Number of iterations since last deflation.
  236. ndec := 0 // Deflation window size decrement.
  237. // Main loop.
  238. var (
  239. itmax = max(30, 2*kexsh) * max(10, (ihi-ilo+1))
  240. it = 0
  241. )
  242. for kbot := ihi; kbot >= ilo; {
  243. if it == itmax {
  244. unconverged = kbot + 1
  245. break
  246. }
  247. it++
  248. // Locate active block.
  249. ktop := ilo
  250. for k := kbot; k >= ilo+1; k-- {
  251. if h[k*ldh+k-1] == 0 {
  252. ktop = k
  253. break
  254. }
  255. }
  256. // Select deflation window size nw.
  257. //
  258. // Typical Case:
  259. // If possible and advisable, nibble the entire active block.
  260. // If not, use size min(nwr,nwmax) or min(nwr+1,nwmax)
  261. // depending upon which has the smaller corresponding
  262. // subdiagonal entry (a heuristic).
  263. //
  264. // Exceptional Case:
  265. // If there have been no deflations in kexnw or more
  266. // iterations, then vary the deflation window size. At first,
  267. // because larger windows are, in general, more powerful than
  268. // smaller ones, rapidly increase the window to the maximum
  269. // possible. Then, gradually reduce the window size.
  270. nh := kbot - ktop + 1
  271. nwupbd := min(nh, nwmax)
  272. if ndfl < kexnw {
  273. nw = min(nwupbd, nwr)
  274. } else {
  275. nw = min(nwupbd, 2*nw)
  276. }
  277. if nw < nwmax {
  278. if nw >= nh-1 {
  279. nw = nh
  280. } else {
  281. kwtop := kbot - nw + 1
  282. if math.Abs(h[kwtop*ldh+kwtop-1]) > math.Abs(h[(kwtop-1)*ldh+kwtop-2]) {
  283. nw++
  284. }
  285. }
  286. }
  287. if ndfl < kexnw {
  288. ndec = -1
  289. } else if ndec >= 0 || nw >= nwupbd {
  290. ndec++
  291. if nw-ndec < 2 {
  292. ndec = 0
  293. }
  294. nw -= ndec
  295. }
  296. // Split workspace under the subdiagonal of H into:
  297. // - an nw×nw work array V in the lower left-hand corner,
  298. // - an nw×nhv horizontal work array along the bottom edge (nhv
  299. // must be at least nw but more is better),
  300. // - an nve×nw vertical work array along the left-hand-edge
  301. // (nhv can be any positive integer but more is better).
  302. kv := n - nw
  303. kt := nw
  304. kwv := nw + 1
  305. nhv := n - kwv - kt
  306. // Aggressive early deflation.
  307. ls, ld := impl.Dlaqr23(wantt, wantz, n, ktop, kbot, nw,
  308. h, ldh, iloz, ihiz, z, ldz, wr[:kbot+1], wi[:kbot+1],
  309. h[kv*ldh:], ldh, nhv, h[kv*ldh+kt:], ldh, nhv, h[kwv*ldh:], ldh, work, lwork, recur)
  310. // Adjust kbot accounting for new deflations.
  311. kbot -= ld
  312. // ks points to the shifts.
  313. ks := kbot - ls + 1
  314. // Skip an expensive QR sweep if there is a (partly heuristic)
  315. // reason to expect that many eigenvalues will deflate without
  316. // it. Here, the QR sweep is skipped if many eigenvalues have
  317. // just been deflated or if the remaining active block is small.
  318. if ld > 0 && (100*ld > nw*nibble || kbot-ktop+1 <= min(nmin, nwmax)) {
  319. // ld is positive, note progress.
  320. ndfl = 1
  321. continue
  322. }
  323. // ns is the nominal number of simultaneous shifts. This may be
  324. // lowered (slightly) if Dlaqr23 did not provide that many
  325. // shifts.
  326. ns := min(min(nsmax, nsr), max(2, kbot-ktop)) &^ 1
  327. // If there have been no deflations in a multiple of kexsh
  328. // iterations, then try exceptional shifts. Otherwise use shifts
  329. // provided by Dlaqr23 above or from the eigenvalues of a
  330. // trailing principal submatrix.
  331. if ndfl%kexsh == 0 {
  332. ks = kbot - ns + 1
  333. for i := kbot; i > max(ks, ktop+1); i -= 2 {
  334. ss := math.Abs(h[i*ldh+i-1]) + math.Abs(h[(i-1)*ldh+i-2])
  335. aa := wilk1*ss + h[i*ldh+i]
  336. _, _, _, _, wr[i-1], wi[i-1], wr[i], wi[i], _, _ =
  337. impl.Dlanv2(aa, ss, wilk2*ss, aa)
  338. }
  339. if ks == ktop {
  340. wr[ks+1] = h[(ks+1)*ldh+ks+1]
  341. wi[ks+1] = 0
  342. wr[ks] = wr[ks+1]
  343. wi[ks] = wi[ks+1]
  344. }
  345. } else {
  346. // If we got ns/2 or fewer shifts, use Dlahqr or recur
  347. // into Dlaqr04 on a trailing principal submatrix to get
  348. // more. Since ns <= nsmax <=(n+6)/9, there is enough
  349. // space below the subdiagonal to fit an ns×ns scratch
  350. // array.
  351. if kbot-ks+1 <= ns/2 {
  352. ks = kbot - ns + 1
  353. kt = n - ns
  354. impl.Dlacpy(blas.All, ns, ns, h[ks*ldh+ks:], ldh, h[kt*ldh:], ldh)
  355. if ns > nmin && recur > 0 {
  356. ks += impl.Dlaqr04(false, false, ns, 1, ns-1, h[kt*ldh:], ldh,
  357. wr[ks:ks+ns], wi[ks:ks+ns], 0, 0, nil, 0, work, lwork, recur-1)
  358. } else {
  359. ks += impl.Dlahqr(false, false, ns, 0, ns-1, h[kt*ldh:], ldh,
  360. wr[ks:ks+ns], wi[ks:ks+ns], 0, 0, nil, 1)
  361. }
  362. // In case of a rare QR failure use eigenvalues
  363. // of the trailing 2×2 principal submatrix.
  364. if ks >= kbot {
  365. aa := h[(kbot-1)*ldh+kbot-1]
  366. bb := h[(kbot-1)*ldh+kbot]
  367. cc := h[kbot*ldh+kbot-1]
  368. dd := h[kbot*ldh+kbot]
  369. _, _, _, _, wr[kbot-1], wi[kbot-1], wr[kbot], wi[kbot], _, _ =
  370. impl.Dlanv2(aa, bb, cc, dd)
  371. ks = kbot - 1
  372. }
  373. }
  374. if kbot-ks+1 > ns {
  375. // Sorting the shifts helps a little. Bubble
  376. // sort keeps complex conjugate pairs together.
  377. sorted := false
  378. for k := kbot; k > ks; k-- {
  379. if sorted {
  380. break
  381. }
  382. sorted = true
  383. for i := ks; i < k; i++ {
  384. if math.Abs(wr[i])+math.Abs(wi[i]) >= math.Abs(wr[i+1])+math.Abs(wi[i+1]) {
  385. continue
  386. }
  387. sorted = false
  388. wr[i], wr[i+1] = wr[i+1], wr[i]
  389. wi[i], wi[i+1] = wi[i+1], wi[i]
  390. }
  391. }
  392. }
  393. // Shuffle shifts into pairs of real shifts and pairs of
  394. // complex conjugate shifts using the fact that complex
  395. // conjugate shifts are already adjacent to one another.
  396. // TODO(vladimir-ch): The shuffling here could probably
  397. // be removed but I'm not sure right now and it's safer
  398. // to leave it.
  399. for i := kbot; i > ks+1; i -= 2 {
  400. if wi[i] == -wi[i-1] {
  401. continue
  402. }
  403. wr[i], wr[i-1], wr[i-2] = wr[i-1], wr[i-2], wr[i]
  404. wi[i], wi[i-1], wi[i-2] = wi[i-1], wi[i-2], wi[i]
  405. }
  406. }
  407. // If there are only two shifts and both are real, then use only one.
  408. if kbot-ks+1 == 2 && wi[kbot] == 0 {
  409. if math.Abs(wr[kbot]-h[kbot*ldh+kbot]) < math.Abs(wr[kbot-1]-h[kbot*ldh+kbot]) {
  410. wr[kbot-1] = wr[kbot]
  411. } else {
  412. wr[kbot] = wr[kbot-1]
  413. }
  414. }
  415. // Use up to ns of the smallest magnitude shifts. If there
  416. // aren't ns shifts available, then use them all, possibly
  417. // dropping one to make the number of shifts even.
  418. ns = min(ns, kbot-ks+1) &^ 1
  419. ks = kbot - ns + 1
  420. // Split workspace under the subdiagonal into:
  421. // - a kdu×kdu work array U in the lower left-hand-corner,
  422. // - a kdu×nhv horizontal work array WH along the bottom edge
  423. // (nhv must be at least kdu but more is better),
  424. // - an nhv×kdu vertical work array WV along the left-hand-edge
  425. // (nhv must be at least kdu but more is better).
  426. kdu := 3*ns - 3
  427. ku := n - kdu
  428. kwh := kdu
  429. kwv = kdu + 3
  430. nhv = n - kwv - kdu
  431. // Small-bulge multi-shift QR sweep.
  432. impl.Dlaqr5(wantt, wantz, kacc22, n, ktop, kbot, ns,
  433. wr[ks:ks+ns], wi[ks:ks+ns], h, ldh, iloz, ihiz, z, ldz,
  434. work, 3, h[ku*ldh:], ldh, nhv, h[kwv*ldh:], ldh, nhv, h[ku*ldh+kwh:], ldh)
  435. // Note progress (or the lack of it).
  436. if ld > 0 {
  437. ndfl = 1
  438. } else {
  439. ndfl++
  440. }
  441. }
  442. work[0] = float64(lwkopt)
  443. return unconverged
  444. }