vendor/gonum.org/v1/gonum/lapack/gonum/dlaqr23.go

   1 // Copyright ©2016 The Gonum Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package gonum
   6
   7 import (
   8         "math"
   9
  10         "gonum.org/v1/gonum/blas"
  11         "gonum.org/v1/gonum/blas/blas64"
  12         "gonum.org/v1/gonum/lapack"
  13 )
  14
  15 // Dlaqr23 performs the orthogonal similarity transformation of an n×n upper
  16 // Hessenberg matrix to detect and deflate fully converged eigenvalues from a
  17 // trailing principal submatrix using aggressive early deflation [1].
  18 //
  19 // On return, H will be overwritten by a new Hessenberg matrix that is a
  20 // perturbation of an orthogonal similarity transformation of H. It is hoped
  21 // that on output H will have many zero subdiagonal entries.
  22 //
  23 // If wantt is true, the matrix H will be fully updated so that the
  24 // quasi-triangular Schur factor can be computed. If wantt is false, then only
  25 // enough of H will be updated to preserve the eigenvalues.
  26 //
  27 // If wantz is true, the orthogonal similarity transformation will be
  28 // accumulated into Z[iloz:ihiz+1,ktop:kbot+1], otherwise Z is not referenced.
  29 //
  30 // ktop and kbot determine a block [ktop:kbot+1,ktop:kbot+1] along the diagonal
  31 // of H. It must hold that
  32 //  0 <= ilo <= ihi < n,     if n > 0,
  33 //  ilo == 0 and ihi == -1,  if n == 0,
  34 // and the block must be isolated, that is, it must hold that
  35 //  ktop == 0   or H[ktop,ktop-1] == 0,
  36 //  kbot == n-1 or H[kbot+1,kbot] == 0,
  37 // otherwise Dlaqr23 will panic.
  38 //
  39 // nw is the deflation window size. It must hold that
  40 //  0 <= nw <= kbot-ktop+1,
  41 // otherwise Dlaqr23 will panic.
  42 //
  43 // iloz and ihiz specify the rows of the n×n matrix Z to which transformations
  44 // will be applied if wantz is true. It must hold that
  45 //  0 <= iloz <= ktop,  and  kbot <= ihiz < n,
  46 // otherwise Dlaqr23 will panic.
  47 //
  48 // sr and si must have length kbot+1, otherwise Dlaqr23 will panic.
  49 //
  50 // v and ldv represent an nw×nw work matrix.
  51 // t and ldt represent an nw×nh work matrix, and nh must be at least nw.
  52 // wv and ldwv represent an nv×nw work matrix.
  53 //
  54 // work must have length at least lwork and lwork must be at least max(1,2*nw),
  55 // otherwise Dlaqr23 will panic. Larger values of lwork may result in greater
  56 // efficiency. On return, work[0] will contain the optimal value of lwork.
  57 //
  58 // If lwork is -1, instead of performing Dlaqr23, the function only estimates the
  59 // optimal workspace size and stores it into work[0]. Neither h nor z are
  60 // accessed.
  61 //
  62 // recur is the non-negative recursion depth. For recur > 0, Dlaqr23 behaves
  63 // as DLAQR3, for recur == 0 it behaves as DLAQR2.
  64 //
  65 // On return, ns and nd will contain respectively the number of unconverged
  66 // (i.e., approximate) eigenvalues and converged eigenvalues that are stored in
  67 // sr and si.
  68 //
  69 // On return, the real and imaginary parts of approximate eigenvalues that may
  70 // be used for shifts will be stored respectively in sr[kbot-nd-ns+1:kbot-nd+1]
  71 // and si[kbot-nd-ns+1:kbot-nd+1].
  72 //
  73 // On return, the real and imaginary parts of converged eigenvalues will be
  74 // stored respectively in sr[kbot-nd+1:kbot+1] and si[kbot-nd+1:kbot+1].
  75 //
  76 // References:
  77 //  [1] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part II:
  78 //      Aggressive Early Deflation. SIAM J. Matrix Anal. Appl 23(4) (2002), pp. 948—973
  79 //      URL: http://dx.doi.org/10.1137/S0895479801384585
  80 //
  81 func (impl Implementation) Dlaqr23(wantt, wantz bool, n, ktop, kbot, nw int, h []float64, ldh int, iloz, ihiz int, z []float64, ldz int, sr, si []float64, v []float64, ldv int, nh int, t []float64, ldt int, nv int, wv []float64, ldwv int, work []float64, lwork int, recur int) (ns, nd int) {
  82         switch {
  83         case ktop < 0 || max(0, n-1) < ktop:
  84                 panic("lapack: invalid value of ktop")
  85         case kbot < min(ktop, n-1) || n <= kbot:
  86                 panic("lapack: invalid value of kbot")
  87         case (nw < 0 || kbot-ktop+1 < nw) && lwork != -1:
  88                 panic("lapack: invalid value of nw")
  89         case nh < nw:
  90                 panic("lapack: invalid value of nh")
  91         case lwork < max(1, 2*nw) && lwork != -1:
  92                 panic(badWork)
  93         case len(work) < lwork:
  94                 panic(shortWork)
  95         case recur < 0:
  96                 panic("lapack: recur is negative")
  97         }
  98         if wantz {
  99                 switch {
 100                 case iloz < 0 || ktop < iloz:
 101                         panic("lapack: invalid value of iloz")
 102                 case ihiz < kbot || n <= ihiz:
 103                         panic("lapack: invalid value of ihiz")
 104                 }
 105         }
 106         if lwork != -1 {
 107                 // Check input slices only if not doing workspace query.
 108                 checkMatrix(n, n, h, ldh)
 109                 checkMatrix(nw, nw, v, ldv)
 110                 checkMatrix(nw, nh, t, ldt)
 111                 checkMatrix(nv, nw, wv, ldwv)
 112                 if wantz {
 113                         checkMatrix(n, n, z, ldz)
 114                 }
 115                 switch {
 116                 case ktop > 0 && h[ktop*ldh+ktop-1] != 0:
 117                         panic("lapack: block not isolated")
 118                 case kbot+1 < n && h[(kbot+1)*ldh+kbot] != 0:
 119                         panic("lapack: block not isolated")
 120                 case len(sr) != kbot+1:
 121                         panic("lapack: bad length of sr")
 122                 case len(si) != kbot+1:
 123                         panic("lapack: bad length of si")
 124                 }
 125         }
 126
 127         // Quick return for zero window size.
 128         if nw == 0 {
 129                 work[0] = 1
 130                 return 0, 0
 131         }
 132
 133         // LAPACK code does not enforce the documented behavior
 134         //  nw <= kbot-ktop+1
 135         // but we do (we panic above).
 136         jw := nw
 137         lwkopt := max(1, 2*nw)
 138         if jw > 2 {
 139                 // Workspace query call to Dgehrd.
 140                 impl.Dgehrd(jw, 0, jw-2, nil, 0, nil, work, -1)
 141                 lwk1 := int(work[0])
 142                 // Workspace query call to Dormhr.
 143                 impl.Dormhr(blas.Right, blas.NoTrans, jw, jw, 0, jw-2, nil, 0, nil, nil, 0, work, -1)
 144                 lwk2 := int(work[0])
 145                 if recur > 0 {
 146                         // Workspace query call to Dlaqr04.
 147                         impl.Dlaqr04(true, true, jw, 0, jw-1, nil, 0, nil, nil, 0, jw-1, nil, 0, work, -1, recur-1)
 148                         lwk3 := int(work[0])
 149                         // Optimal workspace.
 150                         lwkopt = max(jw+max(lwk1, lwk2), lwk3)
 151                 } else {
 152                         // Optimal workspace.
 153                         lwkopt = jw + max(lwk1, lwk2)
 154                 }
 155         }
 156         // Quick return in case of workspace query.
 157         if lwork == -1 {
 158                 work[0] = float64(lwkopt)
 159                 return 0, 0
 160         }
 161
 162         // Machine constants.
 163         ulp := dlamchP
 164         smlnum := float64(n) / ulp * dlamchS
 165
 166         // Setup deflation window.
 167         var s float64
 168         kwtop := kbot - jw + 1
 169         if kwtop != ktop {
 170                 s = h[kwtop*ldh+kwtop-1]
 171         }
 172         if kwtop == kbot {
 173                 // 1×1 deflation window.
 174                 sr[kwtop] = h[kwtop*ldh+kwtop]
 175                 si[kwtop] = 0
 176                 ns = 1
 177                 nd = 0
 178                 if math.Abs(s) <= math.Max(smlnum, ulp*math.Abs(h[kwtop*ldh+kwtop])) {
 179                         ns = 0
 180                         nd = 1
 181                         if kwtop > ktop {
 182                                 h[kwtop*ldh+kwtop-1] = 0
 183                         }
 184                 }
 185                 work[0] = 1
 186                 return ns, nd
 187         }
 188
 189         // Convert to spike-triangular form. In case of a rare QR failure, this
 190         // routine continues to do aggressive early deflation using that part of
 191         // the deflation window that converged using infqr here and there to
 192         // keep track.
 193         impl.Dlacpy(blas.Upper, jw, jw, h[kwtop*ldh+kwtop:], ldh, t, ldt)
 194         bi := blas64.Implementation()
 195         bi.Dcopy(jw-1, h[(kwtop+1)*ldh+kwtop:], ldh+1, t[ldt:], ldt+1)
 196         impl.Dlaset(blas.All, jw, jw, 0, 1, v, ldv)
 197         nmin := impl.Ilaenv(12, "DLAQR3", "SV", jw, 0, jw-1, lwork)
 198         var infqr int
 199         if recur > 0 && jw > nmin {
 200                 infqr = impl.Dlaqr04(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv, work, lwork, recur-1)
 201         } else {
 202                 infqr = impl.Dlahqr(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv)
 203         }
 204         // Note that ilo == 0 which conveniently coincides with the success
 205         // value of infqr, that is, infqr as an index always points to the first
 206         // converged eigenvalue.
 207
 208         // Dtrexc needs a clean margin near the diagonal.
 209         for j := 0; j < jw-3; j++ {
 210                 t[(j+2)*ldt+j] = 0
 211                 t[(j+3)*ldt+j] = 0
 212         }
 213         if jw >= 3 {
 214                 t[(jw-1)*ldt+jw-3] = 0
 215         }
 216
 217         ns = jw
 218         ilst := infqr
 219         // Deflation detection loop.
 220         for ilst < ns {
 221                 bulge := false
 222                 if ns >= 2 {
 223                         bulge = t[(ns-1)*ldt+ns-2] != 0
 224                 }
 225                 if !bulge {
 226                         // Real eigenvalue.
 227                         abst := math.Abs(t[(ns-1)*ldt+ns-1])
 228                         if abst == 0 {
 229                                 abst = math.Abs(s)
 230                         }
 231                         if math.Abs(s*v[ns-1]) <= math.Max(smlnum, ulp*abst) {
 232                                 // Deflatable.
 233                                 ns--
 234                         } else {
 235                                 // Undeflatable, move it up out of the way.
 236                                 // Dtrexc can not fail in this case.
 237                                 _, ilst, _ = impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, ns-1, ilst, work)
 238                                 ilst++
 239                         }
 240                         continue
 241                 }
 242                 // Complex conjugate pair.
 243                 abst := math.Abs(t[(ns-1)*ldt+ns-1]) + math.Sqrt(math.Abs(t[(ns-1)*ldt+ns-2]))*math.Sqrt(math.Abs(t[(ns-2)*ldt+ns-1]))
 244                 if abst == 0 {
 245                         abst = math.Abs(s)
 246                 }
 247                 if math.Max(math.Abs(s*v[ns-1]), math.Abs(s*v[ns-2])) <= math.Max(smlnum, ulp*abst) {
 248                         // Deflatable.
 249                         ns -= 2
 250                 } else {
 251                         // Undeflatable, move them up out of the way.
 252                         // Dtrexc does the right thing with ilst in case of a
 253                         // rare exchange failure.
 254                         _, ilst, _ = impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, ns-1, ilst, work)
 255                         ilst += 2
 256                 }
 257         }
 258
 259         // Return to Hessenberg form.
 260         if ns == 0 {
 261                 s = 0
 262         }
 263         if ns < jw {
 264                 // Sorting diagonal blocks of T improves accuracy for graded
 265                 // matrices. Bubble sort deals well with exchange failures.
 266                 sorted := false
 267                 i := ns
 268                 for !sorted {
 269                         sorted = true
 270                         kend := i - 1
 271                         i = infqr
 272                         var k int
 273                         if i == ns-1 || t[(i+1)*ldt+i] == 0 {
 274                                 k = i + 1
 275                         } else {
 276                                 k = i + 2
 277                         }
 278                         for k <= kend {
 279                                 var evi float64
 280                                 if k == i+1 {
 281                                         evi = math.Abs(t[i*ldt+i])
 282                                 } else {
 283                                         evi = math.Abs(t[i*ldt+i]) + math.Sqrt(math.Abs(t[(i+1)*ldt+i]))*math.Sqrt(math.Abs(t[i*ldt+i+1]))
 284                                 }
 285
 286                                 var evk float64
 287                                 if k == kend || t[(k+1)*ldt+k] == 0 {
 288                                         evk = math.Abs(t[k*ldt+k])
 289                                 } else {
 290                                         evk = math.Abs(t[k*ldt+k]) + math.Sqrt(math.Abs(t[(k+1)*ldt+k]))*math.Sqrt(math.Abs(t[k*ldt+k+1]))
 291                                 }
 292
 293                                 if evi >= evk {
 294                                         i = k
 295                                 } else {
 296                                         sorted = false
 297                                         _, ilst, ok := impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, i, k, work)
 298                                         if ok {
 299                                                 i = ilst
 300                                         } else {
 301                                                 i = k
 302                                         }
 303                                 }
 304                                 if i == kend || t[(i+1)*ldt+i] == 0 {
 305                                         k = i + 1
 306                                 } else {
 307                                         k = i + 2
 308                                 }
 309                         }
 310                 }
 311         }
 312
 313         // Restore shift/eigenvalue array from T.
 314         for i := jw - 1; i >= infqr; {
 315                 if i == infqr || t[i*ldt+i-1] == 0 {
 316                         sr[kwtop+i] = t[i*ldt+i]
 317                         si[kwtop+i] = 0
 318                         i--
 319                         continue
 320                 }
 321                 aa := t[(i-1)*ldt+i-1]
 322                 bb := t[(i-1)*ldt+i]
 323                 cc := t[i*ldt+i-1]
 324                 dd := t[i*ldt+i]
 325                 _, _, _, _, sr[kwtop+i-1], si[kwtop+i-1], sr[kwtop+i], si[kwtop+i], _, _ = impl.Dlanv2(aa, bb, cc, dd)
 326                 i -= 2
 327         }
 328
 329         if ns < jw || s == 0 {
 330                 if ns > 1 && s != 0 {
 331                         // Reflect spike back into lower triangle.
 332                         bi.Dcopy(ns, v[:ns], 1, work[:ns], 1)
 333                         _, tau := impl.Dlarfg(ns, work[0], work[1:ns], 1)
 334                         work[0] = 1
 335                         impl.Dlaset(blas.Lower, jw-2, jw-2, 0, 0, t[2*ldt:], ldt)
 336                         impl.Dlarf(blas.Left, ns, jw, work[:ns], 1, tau, t, ldt, work[jw:])
 337                         impl.Dlarf(blas.Right, ns, ns, work[:ns], 1, tau, t, ldt, work[jw:])
 338                         impl.Dlarf(blas.Right, jw, ns, work[:ns], 1, tau, v, ldv, work[jw:])
 339                         impl.Dgehrd(jw, 0, ns-1, t, ldt, work[:jw-1], work[jw:], lwork-jw)
 340                 }
 341
 342                 // Copy updated reduced window into place.
 343                 if kwtop > 0 {
 344                         h[kwtop*ldh+kwtop-1] = s * v[0]
 345                 }
 346                 impl.Dlacpy(blas.Upper, jw, jw, t, ldt, h[kwtop*ldh+kwtop:], ldh)
 347                 bi.Dcopy(jw-1, t[ldt:], ldt+1, h[(kwtop+1)*ldh+kwtop:], ldh+1)
 348
 349                 // Accumulate orthogonal matrix in order to update H and Z, if
 350                 // requested.
 351                 if ns > 1 && s != 0 {
 352                         // work[:ns-1] contains the elementary reflectors stored
 353                         // by a call to Dgehrd above.
 354                         impl.Dormhr(blas.Right, blas.NoTrans, jw, ns, 0, ns-1,
 355                                 t, ldt, work[:ns-1], v, ldv, work[jw:], lwork-jw)
 356                 }
 357
 358                 // Update vertical slab in H.
 359                 var ltop int
 360                 if !wantt {
 361                         ltop = ktop
 362                 }
 363                 for krow := ltop; krow < kwtop; krow += nv {
 364                         kln := min(nv, kwtop-krow)
 365                         bi.Dgemm(blas.NoTrans, blas.NoTrans, kln, jw, jw,
 366                                 1, h[krow*ldh+kwtop:], ldh, v, ldv,
 367                                 0, wv, ldwv)
 368                         impl.Dlacpy(blas.All, kln, jw, wv, ldwv, h[krow*ldh+kwtop:], ldh)
 369                 }
 370
 371                 // Update horizontal slab in H.
 372                 if wantt {
 373                         for kcol := kbot + 1; kcol < n; kcol += nh {
 374                                 kln := min(nh, n-kcol)
 375                                 bi.Dgemm(blas.Trans, blas.NoTrans, jw, kln, jw,
 376                                         1, v, ldv, h[kwtop*ldh+kcol:], ldh,
 377                                         0, t, ldt)
 378                                 impl.Dlacpy(blas.All, jw, kln, t, ldt, h[kwtop*ldh+kcol:], ldh)
 379                         }
 380                 }
 381
 382                 // Update vertical slab in Z.
 383                 if wantz {
 384                         for krow := iloz; krow <= ihiz; krow += nv {
 385                                 kln := min(nv, ihiz-krow+1)
 386                                 bi.Dgemm(blas.NoTrans, blas.NoTrans, kln, jw, jw,
 387                                         1, z[krow*ldz+kwtop:], ldz, v, ldv,
 388                                         0, wv, ldwv)
 389                                 impl.Dlacpy(blas.All, kln, jw, wv, ldwv, z[krow*ldz+kwtop:], ldz)
 390                         }
 391                 }
 392         }
 393
 394         // The number of deflations.
 395         nd = jw - ns
 396         // Shifts are converged eigenvalues that could not be deflated.
 397         // Subtracting infqr from the spike length takes care of the case of a
 398         // rare QR failure while calculating eigenvalues of the deflation
 399         // window.
 400         ns -= infqr
 401         work[0] = float64(lwkopt)
 402         return ns, nd
 403 }