1 // Copyright ©2016 The Gonum Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
10 "gonum.org/v1/gonum/blas"
13 // Dlaqr04 computes the eigenvalues of a block of an n×n upper Hessenberg matrix
14 // H, and optionally the matrices T and Z from the Schur decomposition
16 // where T is an upper quasi-triangular matrix (the Schur form), and Z is the
17 // orthogonal matrix of Schur vectors.
19 // wantt indicates whether the full Schur form T is required. If wantt is false,
20 // then only enough of H will be updated to preserve the eigenvalues.
22 // wantz indicates whether the n×n matrix of Schur vectors Z is required. If it
23 // is true, the orthogonal similarity transformation will be accumulated into
24 // Z[iloz:ihiz+1,ilo:ihi+1], otherwise Z will not be referenced.
26 // ilo and ihi determine the block of H on which Dlaqr04 operates. It must hold that
27 // 0 <= ilo <= ihi < n, if n > 0,
28 // ilo == 0 and ihi == -1, if n == 0,
29 // and the block must be isolated, that is,
30 // ilo == 0 or H[ilo,ilo-1] == 0,
31 // ihi == n-1 or H[ihi+1,ihi] == 0,
32 // otherwise Dlaqr04 will panic.
34 // wr and wi must have length ihi+1.
36 // iloz and ihiz specify the rows of Z to which transformations will be applied
37 // if wantz is true. It must hold that
38 // 0 <= iloz <= ilo, and ihi <= ihiz < n,
39 // otherwise Dlaqr04 will panic.
41 // work must have length at least lwork and lwork must be
42 // lwork >= 1, if n <= 11,
43 // lwork >= n, if n > 11,
44 // otherwise Dlaqr04 will panic. lwork as large as 6*n may be required for
45 // optimal performance. On return, work[0] will contain the optimal value of
48 // If lwork is -1, instead of performing Dlaqr04, the function only estimates the
49 // optimal workspace size and stores it into work[0]. Neither h nor z are
52 // recur is the non-negative recursion depth. For recur > 0, Dlaqr04 behaves
53 // as DLAQR0, for recur == 0 it behaves as DLAQR4.
55 // unconverged indicates whether Dlaqr04 computed all the eigenvalues of H[ilo:ihi+1,ilo:ihi+1].
57 // If unconverged is zero and wantt is true, H will contain on return the upper
58 // quasi-triangular matrix T from the Schur decomposition. 2×2 diagonal blocks
59 // (corresponding to complex conjugate pairs of eigenvalues) will be returned in
60 // standard form, with H[i,i] == H[i+1,i+1] and H[i+1,i]*H[i,i+1] < 0.
62 // If unconverged is zero and if wantt is false, the contents of h on return is
65 // If unconverged is zero, all the eigenvalues have been computed and their real
66 // and imaginary parts will be stored on return in wr[ilo:ihi+1] and
67 // wi[ilo:ihi+1], respectively. If two eigenvalues are computed as a complex
68 // conjugate pair, they are stored in consecutive elements of wr and wi, say the
69 // i-th and (i+1)th, with wi[i] > 0 and wi[i+1] < 0. If wantt is true, then the
70 // eigenvalues are stored in the same order as on the diagonal of the Schur form
71 // returned in H, with wr[i] = H[i,i] and, if H[i:i+2,i:i+2] is a 2×2 diagonal
72 // block, wi[i] = sqrt(-H[i+1,i]*H[i,i+1]) and wi[i+1] = -wi[i].
74 // If unconverged is positive, some eigenvalues have not converged, and
75 // wr[unconverged:ihi+1] and wi[unconverged:ihi+1] will contain those
76 // eigenvalues which have been successfully computed. Failures are rare.
78 // If unconverged is positive and wantt is true, then on return
79 // (initial H)*U = U*(final H), (*)
80 // where U is an orthogonal matrix. The final H is upper Hessenberg and
81 // H[unconverged:ihi+1,unconverged:ihi+1] is upper quasi-triangular.
83 // If unconverged is positive and wantt is false, on return the remaining
84 // unconverged eigenvalues are the eigenvalues of the upper Hessenberg matrix
85 // H[ilo:unconverged,ilo:unconverged].
87 // If unconverged is positive and wantz is true, then on return
88 // (final Z) = (initial Z)*U,
89 // where U is the orthogonal matrix in (*) regardless of the value of wantt.
92 // [1] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part I:
93 // Maintaining Well-Focused Shifts and Level 3 Performance. SIAM J. Matrix
94 // Anal. Appl. 23(4) (2002), pp. 929—947
95 // URL: http://dx.doi.org/10.1137/S0895479801384573
96 // [2] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part II:
97 // Aggressive Early Deflation. SIAM J. Matrix Anal. Appl. 23(4) (2002), pp. 948—973
98 // URL: http://dx.doi.org/10.1137/S0895479801384585
100 // Dlaqr04 is an internal routine. It is exported for testing purposes.
101 func (impl Implementation) Dlaqr04(wantt, wantz bool, n, ilo, ihi int, h []float64, ldh int, wr, wi []float64, iloz, ihiz int, z []float64, ldz int, work []float64, lwork int, recur int) (unconverged int) {
103 // Matrices of order ntiny or smaller must be processed by
104 // Dlahqr because of insufficient subdiagonal scratch space.
105 // This is a hard limit.
107 // Exceptional deflation windows: try to cure rare slow
108 // convergence by varying the size of the deflation window after
111 // Exceptional shifts: try to cure rare slow convergence with
112 // ad-hoc exceptional shifts every kexsh iterations.
115 // See https://github.com/gonum/lapack/pull/151#discussion_r68162802
116 // and the surrounding discussion for an explanation where these
117 // constants come from.
118 // TODO(vladimir-ch): Similar constants for exceptional shifts
119 // are used also in dlahqr.go. The first constant is different
120 // there, it is equal to 3. Why? And does it matter?
126 case ilo < 0 || max(0, n-1) < ilo:
128 case ihi < min(ilo, n-1) || n <= ihi:
130 case lwork < 1 && n <= ntiny && lwork != -1:
132 // TODO(vladimir-ch): Enable if and when we figure out what the minimum
133 // necessary lwork value is. Dlaqr04 says that the minimum is n which
134 // clashes with Dlaqr23's opinion about optimal work when nw <= 2
135 // (independent of n).
136 // case lwork < n && n > ntiny && lwork != -1:
138 case len(work) < lwork:
141 panic("lapack: recur is negative")
144 if iloz < 0 || ilo < iloz {
145 panic("lapack: invalid value of iloz")
147 if ihiz < ihi || n <= ihiz {
148 panic("lapack: invalid value of ihiz")
152 checkMatrix(n, n, h, ldh)
154 checkMatrix(n, n, z, ldz)
157 case ilo > 0 && h[ilo*ldh+ilo-1] != 0:
158 panic("lapack: block not isolated")
159 case ihi+1 < n && h[(ihi+1)*ldh+ihi] != 0:
160 panic("lapack: block not isolated")
161 case len(wr) != ihi+1:
162 panic("lapack: bad length of wr")
163 case len(wi) != ihi+1:
164 panic("lapack: bad length of wi")
175 // Tiny matrices must use Dlahqr.
180 return impl.Dlahqr(wantt, wantz, n, ilo, ihi, h, ldh, wr, wi, iloz, ihiz, z, ldz)
183 // Use small bulge multi-shift QR with aggressive early deflation on
184 // larger-than-tiny matrices.
203 // nwr is the recommended deflation window size. n is greater than 11,
204 // so there is enough subdiagonal workspace for nwr >= 2 as required.
205 // (In fact, there is enough subdiagonal space for nwr >= 3.)
206 // TODO(vladimir-ch): If there is enough space for nwr >= 3, should we
208 nwr := impl.Ilaenv(13, fname, jbcmpz, n, ilo, ihi, lwork)
210 nwr = min(ihi-ilo+1, min((n-1)/3, nwr))
212 // nsr is the recommended number of simultaneous shifts. n is greater
213 // than 11, so there is enough subdiagonal workspace for nsr to be even
214 // and greater than or equal to two as required.
215 nsr := impl.Ilaenv(15, fname, jbcmpz, n, ilo, ihi, lwork)
216 nsr = min(nsr, min((n+6)/9, ihi-ilo))
219 // Workspace query call to Dlaqr23.
220 impl.Dlaqr23(wantt, wantz, n, ilo, ihi, nwr+1, nil, 0, iloz, ihiz, nil, 0,
221 nil, nil, nil, 0, n, nil, 0, n, nil, 0, work, -1, recur)
222 // Optimal workspace is max(Dlaqr5, Dlaqr23).
223 lwkopt := max(3*nsr/2, int(work[0]))
224 // Quick return in case of workspace query.
226 work[0] = float64(lwkopt)
230 // Dlahqr/Dlaqr04 crossover point.
231 nmin := impl.Ilaenv(12, fname, jbcmpz, n, ilo, ihi, lwork)
232 nmin = max(ntiny, nmin)
234 // Nibble determines when to skip a multi-shift QR sweep (Dlaqr5).
235 nibble := impl.Ilaenv(14, fname, jbcmpz, n, ilo, ihi, lwork)
236 nibble = max(0, nibble)
238 // Computation mode of far-from-diagonal orthogonal updates in Dlaqr5.
239 kacc22 := impl.Ilaenv(16, fname, jbcmpz, n, ilo, ihi, lwork)
240 kacc22 = max(0, min(kacc22, 2))
242 // nwmax is the largest possible deflation window for which there is
243 // sufficient workspace.
244 nwmax := min((n-1)/3, lwork/2)
245 nw := nwmax // Start with maximum deflation window size.
247 // nsmax is the largest number of simultaneous shifts for which there is
248 // sufficient workspace.
249 nsmax := min((n+6)/9, 2*lwork/3) &^ 1
251 ndfl := 1 // Number of iterations since last deflation.
252 ndec := 0 // Deflation window size decrement.
256 itmax = max(30, 2*kexsh) * max(10, (ihi-ilo+1))
259 for kbot := ihi; kbot >= ilo; {
261 unconverged = kbot + 1
266 // Locate active block.
268 for k := kbot; k >= ilo+1; k-- {
269 if h[k*ldh+k-1] == 0 {
275 // Select deflation window size nw.
278 // If possible and advisable, nibble the entire active block.
279 // If not, use size min(nwr,nwmax) or min(nwr+1,nwmax)
280 // depending upon which has the smaller corresponding
281 // subdiagonal entry (a heuristic).
284 // If there have been no deflations in kexnw or more
285 // iterations, then vary the deflation window size. At first,
286 // because larger windows are, in general, more powerful than
287 // smaller ones, rapidly increase the window to the maximum
288 // possible. Then, gradually reduce the window size.
289 nh := kbot - ktop + 1
290 nwupbd := min(nh, nwmax)
292 nw = min(nwupbd, nwr)
294 nw = min(nwupbd, 2*nw)
300 kwtop := kbot - nw + 1
301 if math.Abs(h[kwtop*ldh+kwtop-1]) > math.Abs(h[(kwtop-1)*ldh+kwtop-2]) {
308 } else if ndec >= 0 || nw >= nwupbd {
316 // Split workspace under the subdiagonal of H into:
317 // - an nw×nw work array V in the lower left-hand corner,
318 // - an nw×nhv horizontal work array along the bottom edge (nhv
319 // must be at least nw but more is better),
320 // - an nve×nw vertical work array along the left-hand-edge
321 // (nhv can be any positive integer but more is better).
326 // Aggressive early deflation.
327 ls, ld := impl.Dlaqr23(wantt, wantz, n, ktop, kbot, nw,
328 h, ldh, iloz, ihiz, z, ldz, wr[:kbot+1], wi[:kbot+1],
329 h[kv*ldh:], ldh, nhv, h[kv*ldh+kt:], ldh, nhv, h[kwv*ldh:], ldh, work, lwork, recur)
331 // Adjust kbot accounting for new deflations.
333 // ks points to the shifts.
336 // Skip an expensive QR sweep if there is a (partly heuristic)
337 // reason to expect that many eigenvalues will deflate without
338 // it. Here, the QR sweep is skipped if many eigenvalues have
339 // just been deflated or if the remaining active block is small.
340 if ld > 0 && (100*ld > nw*nibble || kbot-ktop+1 <= min(nmin, nwmax)) {
341 // ld is positive, note progress.
346 // ns is the nominal number of simultaneous shifts. This may be
347 // lowered (slightly) if Dlaqr23 did not provide that many
349 ns := min(min(nsmax, nsr), max(2, kbot-ktop)) &^ 1
351 // If there have been no deflations in a multiple of kexsh
352 // iterations, then try exceptional shifts. Otherwise use shifts
353 // provided by Dlaqr23 above or from the eigenvalues of a
354 // trailing principal submatrix.
357 for i := kbot; i > max(ks, ktop+1); i -= 2 {
358 ss := math.Abs(h[i*ldh+i-1]) + math.Abs(h[(i-1)*ldh+i-2])
359 aa := wilk1*ss + h[i*ldh+i]
360 _, _, _, _, wr[i-1], wi[i-1], wr[i], wi[i], _, _ =
361 impl.Dlanv2(aa, ss, wilk2*ss, aa)
364 wr[ks+1] = h[(ks+1)*ldh+ks+1]
370 // If we got ns/2 or fewer shifts, use Dlahqr or recur
371 // into Dlaqr04 on a trailing principal submatrix to get
372 // more. Since ns <= nsmax <=(n+6)/9, there is enough
373 // space below the subdiagonal to fit an ns×ns scratch
375 if kbot-ks+1 <= ns/2 {
378 impl.Dlacpy(blas.All, ns, ns, h[ks*ldh+ks:], ldh, h[kt*ldh:], ldh)
379 if ns > nmin && recur > 0 {
380 ks += impl.Dlaqr04(false, false, ns, 1, ns-1, h[kt*ldh:], ldh,
381 wr[ks:ks+ns], wi[ks:ks+ns], 0, 0, nil, 0, work, lwork, recur-1)
383 ks += impl.Dlahqr(false, false, ns, 0, ns-1, h[kt*ldh:], ldh,
384 wr[ks:ks+ns], wi[ks:ks+ns], 0, 0, nil, 0)
386 // In case of a rare QR failure use eigenvalues
387 // of the trailing 2×2 principal submatrix.
389 aa := h[(kbot-1)*ldh+kbot-1]
390 bb := h[(kbot-1)*ldh+kbot]
391 cc := h[kbot*ldh+kbot-1]
392 dd := h[kbot*ldh+kbot]
393 _, _, _, _, wr[kbot-1], wi[kbot-1], wr[kbot], wi[kbot], _, _ =
394 impl.Dlanv2(aa, bb, cc, dd)
400 // Sorting the shifts helps a little. Bubble
401 // sort keeps complex conjugate pairs together.
403 for k := kbot; k > ks; k-- {
408 for i := ks; i < k; i++ {
409 if math.Abs(wr[i])+math.Abs(wi[i]) >= math.Abs(wr[i+1])+math.Abs(wi[i+1]) {
413 wr[i], wr[i+1] = wr[i+1], wr[i]
414 wi[i], wi[i+1] = wi[i+1], wi[i]
419 // Shuffle shifts into pairs of real shifts and pairs of
420 // complex conjugate shifts using the fact that complex
421 // conjugate shifts are already adjacent to one another.
422 // TODO(vladimir-ch): The shuffling here could probably
423 // be removed but I'm not sure right now and it's safer
425 for i := kbot; i > ks+1; i -= 2 {
426 if wi[i] == -wi[i-1] {
429 wr[i], wr[i-1], wr[i-2] = wr[i-1], wr[i-2], wr[i]
430 wi[i], wi[i-1], wi[i-2] = wi[i-1], wi[i-2], wi[i]
434 // If there are only two shifts and both are real, then use only one.
435 if kbot-ks+1 == 2 && wi[kbot] == 0 {
436 if math.Abs(wr[kbot]-h[kbot*ldh+kbot]) < math.Abs(wr[kbot-1]-h[kbot*ldh+kbot]) {
437 wr[kbot-1] = wr[kbot]
439 wr[kbot] = wr[kbot-1]
443 // Use up to ns of the the smallest magnitude shifts. If there
444 // aren't ns shifts available, then use them all, possibly
445 // dropping one to make the number of shifts even.
446 ns = min(ns, kbot-ks+1) &^ 1
449 // Split workspace under the subdiagonal into:
450 // - a kdu×kdu work array U in the lower left-hand-corner,
451 // - a kdu×nhv horizontal work array WH along the bottom edge
452 // (nhv must be at least kdu but more is better),
453 // - an nhv×kdu vertical work array WV along the left-hand-edge
454 // (nhv must be at least kdu but more is better).
460 // Small-bulge multi-shift QR sweep.
461 impl.Dlaqr5(wantt, wantz, kacc22, n, ktop, kbot, ns,
462 wr[ks:ks+ns], wi[ks:ks+ns], h, ldh, iloz, ihiz, z, ldz,
463 work, 3, h[ku*ldh:], ldh, nhv, h[kwv*ldh:], ldh, nhv, h[ku*ldh+kwh:], ldh)
465 // Note progress (or the lack of it).
473 work[0] = float64(lwkopt)