1 // Copyright ©2016 The Gonum Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
10 "gonum.org/v1/gonum/blas"
11 "gonum.org/v1/gonum/blas/blas64"
12 "gonum.org/v1/gonum/lapack"
15 // Dlaqr23 performs the orthogonal similarity transformation of an n×n upper
16 // Hessenberg matrix to detect and deflate fully converged eigenvalues from a
17 // trailing principal submatrix using aggressive early deflation [1].
19 // On return, H will be overwritten by a new Hessenberg matrix that is a
20 // perturbation of an orthogonal similarity transformation of H. It is hoped
21 // that on output H will have many zero subdiagonal entries.
23 // If wantt is true, the matrix H will be fully updated so that the
24 // quasi-triangular Schur factor can be computed. If wantt is false, then only
25 // enough of H will be updated to preserve the eigenvalues.
27 // If wantz is true, the orthogonal similarity transformation will be
28 // accumulated into Z[iloz:ihiz+1,ktop:kbot+1], otherwise Z is not referenced.
30 // ktop and kbot determine a block [ktop:kbot+1,ktop:kbot+1] along the diagonal
31 // of H. It must hold that
32 // 0 <= ilo <= ihi < n, if n > 0,
33 // ilo == 0 and ihi == -1, if n == 0,
34 // and the block must be isolated, that is, it must hold that
35 // ktop == 0 or H[ktop,ktop-1] == 0,
36 // kbot == n-1 or H[kbot+1,kbot] == 0,
37 // otherwise Dlaqr23 will panic.
39 // nw is the deflation window size. It must hold that
40 // 0 <= nw <= kbot-ktop+1,
41 // otherwise Dlaqr23 will panic.
43 // iloz and ihiz specify the rows of the n×n matrix Z to which transformations
44 // will be applied if wantz is true. It must hold that
45 // 0 <= iloz <= ktop, and kbot <= ihiz < n,
46 // otherwise Dlaqr23 will panic.
48 // sr and si must have length kbot+1, otherwise Dlaqr23 will panic.
50 // v and ldv represent an nw×nw work matrix.
51 // t and ldt represent an nw×nh work matrix, and nh must be at least nw.
52 // wv and ldwv represent an nv×nw work matrix.
54 // work must have length at least lwork and lwork must be at least max(1,2*nw),
55 // otherwise Dlaqr23 will panic. Larger values of lwork may result in greater
56 // efficiency. On return, work[0] will contain the optimal value of lwork.
58 // If lwork is -1, instead of performing Dlaqr23, the function only estimates the
59 // optimal workspace size and stores it into work[0]. Neither h nor z are
62 // recur is the non-negative recursion depth. For recur > 0, Dlaqr23 behaves
63 // as DLAQR3, for recur == 0 it behaves as DLAQR2.
65 // On return, ns and nd will contain respectively the number of unconverged
66 // (i.e., approximate) eigenvalues and converged eigenvalues that are stored in
69 // On return, the real and imaginary parts of approximate eigenvalues that may
70 // be used for shifts will be stored respectively in sr[kbot-nd-ns+1:kbot-nd+1]
71 // and si[kbot-nd-ns+1:kbot-nd+1].
73 // On return, the real and imaginary parts of converged eigenvalues will be
74 // stored respectively in sr[kbot-nd+1:kbot+1] and si[kbot-nd+1:kbot+1].
77 // [1] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part II:
78 // Aggressive Early Deflation. SIAM J. Matrix Anal. Appl 23(4) (2002), pp. 948—973
79 // URL: http://dx.doi.org/10.1137/S0895479801384585
81 func (impl Implementation) Dlaqr23(wantt, wantz bool, n, ktop, kbot, nw int, h []float64, ldh int, iloz, ihiz int, z []float64, ldz int, sr, si []float64, v []float64, ldv int, nh int, t []float64, ldt int, nv int, wv []float64, ldwv int, work []float64, lwork int, recur int) (ns, nd int) {
83 case ktop < 0 || max(0, n-1) < ktop:
84 panic("lapack: invalid value of ktop")
85 case kbot < min(ktop, n-1) || n <= kbot:
86 panic("lapack: invalid value of kbot")
87 case (nw < 0 || kbot-ktop+1 < nw) && lwork != -1:
88 panic("lapack: invalid value of nw")
90 panic("lapack: invalid value of nh")
91 case lwork < max(1, 2*nw) && lwork != -1:
93 case len(work) < lwork:
96 panic("lapack: recur is negative")
100 case iloz < 0 || ktop < iloz:
101 panic("lapack: invalid value of iloz")
102 case ihiz < kbot || n <= ihiz:
103 panic("lapack: invalid value of ihiz")
107 // Check input slices only if not doing workspace query.
108 checkMatrix(n, n, h, ldh)
109 checkMatrix(nw, nw, v, ldv)
110 checkMatrix(nw, nh, t, ldt)
111 checkMatrix(nv, nw, wv, ldwv)
113 checkMatrix(n, n, z, ldz)
116 case ktop > 0 && h[ktop*ldh+ktop-1] != 0:
117 panic("lapack: block not isolated")
118 case kbot+1 < n && h[(kbot+1)*ldh+kbot] != 0:
119 panic("lapack: block not isolated")
120 case len(sr) != kbot+1:
121 panic("lapack: bad length of sr")
122 case len(si) != kbot+1:
123 panic("lapack: bad length of si")
127 // Quick return for zero window size.
133 // LAPACK code does not enforce the documented behavior
135 // but we do (we panic above).
137 lwkopt := max(1, 2*nw)
139 // Workspace query call to Dgehrd.
140 impl.Dgehrd(jw, 0, jw-2, nil, 0, nil, work, -1)
142 // Workspace query call to Dormhr.
143 impl.Dormhr(blas.Right, blas.NoTrans, jw, jw, 0, jw-2, nil, 0, nil, nil, 0, work, -1)
146 // Workspace query call to Dlaqr04.
147 impl.Dlaqr04(true, true, jw, 0, jw-1, nil, 0, nil, nil, 0, jw-1, nil, 0, work, -1, recur-1)
149 // Optimal workspace.
150 lwkopt = max(jw+max(lwk1, lwk2), lwk3)
152 // Optimal workspace.
153 lwkopt = jw + max(lwk1, lwk2)
156 // Quick return in case of workspace query.
158 work[0] = float64(lwkopt)
162 // Machine constants.
164 smlnum := float64(n) / ulp * dlamchS
166 // Setup deflation window.
168 kwtop := kbot - jw + 1
170 s = h[kwtop*ldh+kwtop-1]
173 // 1×1 deflation window.
174 sr[kwtop] = h[kwtop*ldh+kwtop]
178 if math.Abs(s) <= math.Max(smlnum, ulp*math.Abs(h[kwtop*ldh+kwtop])) {
182 h[kwtop*ldh+kwtop-1] = 0
189 // Convert to spike-triangular form. In case of a rare QR failure, this
190 // routine continues to do aggressive early deflation using that part of
191 // the deflation window that converged using infqr here and there to
193 impl.Dlacpy(blas.Upper, jw, jw, h[kwtop*ldh+kwtop:], ldh, t, ldt)
194 bi := blas64.Implementation()
195 bi.Dcopy(jw-1, h[(kwtop+1)*ldh+kwtop:], ldh+1, t[ldt:], ldt+1)
196 impl.Dlaset(blas.All, jw, jw, 0, 1, v, ldv)
197 nmin := impl.Ilaenv(12, "DLAQR3", "SV", jw, 0, jw-1, lwork)
199 if recur > 0 && jw > nmin {
200 infqr = impl.Dlaqr04(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv, work, lwork, recur-1)
202 infqr = impl.Dlahqr(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv)
204 // Note that ilo == 0 which conveniently coincides with the success
205 // value of infqr, that is, infqr as an index always points to the first
206 // converged eigenvalue.
208 // Dtrexc needs a clean margin near the diagonal.
209 for j := 0; j < jw-3; j++ {
214 t[(jw-1)*ldt+jw-3] = 0
219 // Deflation detection loop.
223 bulge = t[(ns-1)*ldt+ns-2] != 0
227 abst := math.Abs(t[(ns-1)*ldt+ns-1])
231 if math.Abs(s*v[ns-1]) <= math.Max(smlnum, ulp*abst) {
235 // Undeflatable, move it up out of the way.
236 // Dtrexc can not fail in this case.
237 _, ilst, _ = impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, ns-1, ilst, work)
242 // Complex conjugate pair.
243 abst := math.Abs(t[(ns-1)*ldt+ns-1]) + math.Sqrt(math.Abs(t[(ns-1)*ldt+ns-2]))*math.Sqrt(math.Abs(t[(ns-2)*ldt+ns-1]))
247 if math.Max(math.Abs(s*v[ns-1]), math.Abs(s*v[ns-2])) <= math.Max(smlnum, ulp*abst) {
251 // Undeflatable, move them up out of the way.
252 // Dtrexc does the right thing with ilst in case of a
253 // rare exchange failure.
254 _, ilst, _ = impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, ns-1, ilst, work)
259 // Return to Hessenberg form.
264 // Sorting diagonal blocks of T improves accuracy for graded
265 // matrices. Bubble sort deals well with exchange failures.
273 if i == ns-1 || t[(i+1)*ldt+i] == 0 {
281 evi = math.Abs(t[i*ldt+i])
283 evi = math.Abs(t[i*ldt+i]) + math.Sqrt(math.Abs(t[(i+1)*ldt+i]))*math.Sqrt(math.Abs(t[i*ldt+i+1]))
287 if k == kend || t[(k+1)*ldt+k] == 0 {
288 evk = math.Abs(t[k*ldt+k])
290 evk = math.Abs(t[k*ldt+k]) + math.Sqrt(math.Abs(t[(k+1)*ldt+k]))*math.Sqrt(math.Abs(t[k*ldt+k+1]))
297 _, ilst, ok := impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, i, k, work)
304 if i == kend || t[(i+1)*ldt+i] == 0 {
313 // Restore shift/eigenvalue array from T.
314 for i := jw - 1; i >= infqr; {
315 if i == infqr || t[i*ldt+i-1] == 0 {
316 sr[kwtop+i] = t[i*ldt+i]
321 aa := t[(i-1)*ldt+i-1]
325 _, _, _, _, sr[kwtop+i-1], si[kwtop+i-1], sr[kwtop+i], si[kwtop+i], _, _ = impl.Dlanv2(aa, bb, cc, dd)
329 if ns < jw || s == 0 {
330 if ns > 1 && s != 0 {
331 // Reflect spike back into lower triangle.
332 bi.Dcopy(ns, v[:ns], 1, work[:ns], 1)
333 _, tau := impl.Dlarfg(ns, work[0], work[1:ns], 1)
335 impl.Dlaset(blas.Lower, jw-2, jw-2, 0, 0, t[2*ldt:], ldt)
336 impl.Dlarf(blas.Left, ns, jw, work[:ns], 1, tau, t, ldt, work[jw:])
337 impl.Dlarf(blas.Right, ns, ns, work[:ns], 1, tau, t, ldt, work[jw:])
338 impl.Dlarf(blas.Right, jw, ns, work[:ns], 1, tau, v, ldv, work[jw:])
339 impl.Dgehrd(jw, 0, ns-1, t, ldt, work[:jw-1], work[jw:], lwork-jw)
342 // Copy updated reduced window into place.
344 h[kwtop*ldh+kwtop-1] = s * v[0]
346 impl.Dlacpy(blas.Upper, jw, jw, t, ldt, h[kwtop*ldh+kwtop:], ldh)
347 bi.Dcopy(jw-1, t[ldt:], ldt+1, h[(kwtop+1)*ldh+kwtop:], ldh+1)
349 // Accumulate orthogonal matrix in order to update H and Z, if
351 if ns > 1 && s != 0 {
352 // work[:ns-1] contains the elementary reflectors stored
353 // by a call to Dgehrd above.
354 impl.Dormhr(blas.Right, blas.NoTrans, jw, ns, 0, ns-1,
355 t, ldt, work[:ns-1], v, ldv, work[jw:], lwork-jw)
358 // Update vertical slab in H.
363 for krow := ltop; krow < kwtop; krow += nv {
364 kln := min(nv, kwtop-krow)
365 bi.Dgemm(blas.NoTrans, blas.NoTrans, kln, jw, jw,
366 1, h[krow*ldh+kwtop:], ldh, v, ldv,
368 impl.Dlacpy(blas.All, kln, jw, wv, ldwv, h[krow*ldh+kwtop:], ldh)
371 // Update horizontal slab in H.
373 for kcol := kbot + 1; kcol < n; kcol += nh {
374 kln := min(nh, n-kcol)
375 bi.Dgemm(blas.Trans, blas.NoTrans, jw, kln, jw,
376 1, v, ldv, h[kwtop*ldh+kcol:], ldh,
378 impl.Dlacpy(blas.All, jw, kln, t, ldt, h[kwtop*ldh+kcol:], ldh)
382 // Update vertical slab in Z.
384 for krow := iloz; krow <= ihiz; krow += nv {
385 kln := min(nv, ihiz-krow+1)
386 bi.Dgemm(blas.NoTrans, blas.NoTrans, kln, jw, jw,
387 1, z[krow*ldz+kwtop:], ldz, v, ldv,
389 impl.Dlacpy(blas.All, kln, jw, wv, ldwv, z[krow*ldz+kwtop:], ldz)
394 // The number of deflations.
396 // Shifts are converged eigenvalues that could not be deflated.
397 // Subtracting infqr from the spike length takes care of the case of a
398 // rare QR failure while calculating eigenvalues of the deflation
401 work[0] = float64(lwkopt)