vendor/gonum.org/v1/gonum/lapack/gonum/dlaqr04.go

   1 // Copyright ©2016 The Gonum Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package gonum
   6
   7 import (
   8         "math"
   9
  10         "gonum.org/v1/gonum/blas"
  11 )
  12
  13 // Dlaqr04 computes the eigenvalues of a block of an n×n upper Hessenberg matrix
  14 // H, and optionally the matrices T and Z from the Schur decomposition
  15 //  H = Z T Z^T
  16 // where T is an upper quasi-triangular matrix (the Schur form), and Z is the
  17 // orthogonal matrix of Schur vectors.
  18 //
  19 // wantt indicates whether the full Schur form T is required. If wantt is false,
  20 // then only enough of H will be updated to preserve the eigenvalues.
  21 //
  22 // wantz indicates whether the n×n matrix of Schur vectors Z is required. If it
  23 // is true, the orthogonal similarity transformation will be accumulated into
  24 // Z[iloz:ihiz+1,ilo:ihi+1], otherwise Z will not be referenced.
  25 //
  26 // ilo and ihi determine the block of H on which Dlaqr04 operates. It must hold that
  27 //  0 <= ilo <= ihi < n,     if n > 0,
  28 //  ilo == 0 and ihi == -1,  if n == 0,
  29 // and the block must be isolated, that is,
  30 //  ilo == 0   or H[ilo,ilo-1] == 0,
  31 //  ihi == n-1 or H[ihi+1,ihi] == 0,
  32 // otherwise Dlaqr04 will panic.
  33 //
  34 // wr and wi must have length ihi+1.
  35 //
  36 // iloz and ihiz specify the rows of Z to which transformations will be applied
  37 // if wantz is true. It must hold that
  38 //  0 <= iloz <= ilo,  and  ihi <= ihiz < n,
  39 // otherwise Dlaqr04 will panic.
  40 //
  41 // work must have length at least lwork and lwork must be
  42 //  lwork >= 1,  if n <= 11,
  43 //  lwork >= n,  if n > 11,
  44 // otherwise Dlaqr04 will panic. lwork as large as 6*n may be required for
  45 // optimal performance. On return, work[0] will contain the optimal value of
  46 // lwork.
  47 //
  48 // If lwork is -1, instead of performing Dlaqr04, the function only estimates the
  49 // optimal workspace size and stores it into work[0]. Neither h nor z are
  50 // accessed.
  51 //
  52 // recur is the non-negative recursion depth. For recur > 0, Dlaqr04 behaves
  53 // as DLAQR0, for recur == 0 it behaves as DLAQR4.
  54 //
  55 // unconverged indicates whether Dlaqr04 computed all the eigenvalues of H[ilo:ihi+1,ilo:ihi+1].
  56 //
  57 // If unconverged is zero and wantt is true, H will contain on return the upper
  58 // quasi-triangular matrix T from the Schur decomposition. 2×2 diagonal blocks
  59 // (corresponding to complex conjugate pairs of eigenvalues) will be returned in
  60 // standard form, with H[i,i] == H[i+1,i+1] and H[i+1,i]*H[i,i+1] < 0.
  61 //
  62 // If unconverged is zero and if wantt is false, the contents of h on return is
  63 // unspecified.
  64 //
  65 // If unconverged is zero, all the eigenvalues have been computed and their real
  66 // and imaginary parts will be stored on return in wr[ilo:ihi+1] and
  67 // wi[ilo:ihi+1], respectively. If two eigenvalues are computed as a complex
  68 // conjugate pair, they are stored in consecutive elements of wr and wi, say the
  69 // i-th and (i+1)th, with wi[i] > 0 and wi[i+1] < 0. If wantt is true, then the
  70 // eigenvalues are stored in the same order as on the diagonal of the Schur form
  71 // returned in H, with wr[i] = H[i,i] and, if H[i:i+2,i:i+2] is a 2×2 diagonal
  72 // block, wi[i] = sqrt(-H[i+1,i]*H[i,i+1]) and wi[i+1] = -wi[i].
  73 //
  74 // If unconverged is positive, some eigenvalues have not converged, and
  75 // wr[unconverged:ihi+1] and wi[unconverged:ihi+1] will contain those
  76 // eigenvalues which have been successfully computed. Failures are rare.
  77 //
  78 // If unconverged is positive and wantt is true, then on return
  79 //  (initial H)*U = U*(final H),   (*)
  80 // where U is an orthogonal matrix. The final H is upper Hessenberg and
  81 // H[unconverged:ihi+1,unconverged:ihi+1] is upper quasi-triangular.
  82 //
  83 // If unconverged is positive and wantt is false, on return the remaining
  84 // unconverged eigenvalues are the eigenvalues of the upper Hessenberg matrix
  85 // H[ilo:unconverged,ilo:unconverged].
  86 //
  87 // If unconverged is positive and wantz is true, then on return
  88 //  (final Z) = (initial Z)*U,
  89 // where U is the orthogonal matrix in (*) regardless of the value of wantt.
  90 //
  91 // References:
  92 //  [1] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part I:
  93 //      Maintaining Well-Focused Shifts and Level 3 Performance. SIAM J. Matrix
  94 //      Anal. Appl. 23(4) (2002), pp. 929—947
  95 //      URL: http://dx.doi.org/10.1137/S0895479801384573
  96 //  [2] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part II:
  97 //      Aggressive Early Deflation. SIAM J. Matrix Anal. Appl. 23(4) (2002), pp. 948—973
  98 //      URL: http://dx.doi.org/10.1137/S0895479801384585
  99 //
 100 // Dlaqr04 is an internal routine. It is exported for testing purposes.
 101 func (impl Implementation) Dlaqr04(wantt, wantz bool, n, ilo, ihi int, h []float64, ldh int, wr, wi []float64, iloz, ihiz int, z []float64, ldz int, work []float64, lwork int, recur int) (unconverged int) {
 102         const (
 103                 // Matrices of order ntiny or smaller must be processed by
 104                 // Dlahqr because of insufficient subdiagonal scratch space.
 105                 // This is a hard limit.
 106                 ntiny = 11
 107                 // Exceptional deflation windows: try to cure rare slow
 108                 // convergence by varying the size of the deflation window after
 109                 // kexnw iterations.
 110                 kexnw = 5
 111                 // Exceptional shifts: try to cure rare slow convergence with
 112                 // ad-hoc exceptional shifts every kexsh iterations.
 113                 kexsh = 6
 114
 115                 // See https://github.com/gonum/lapack/pull/151#discussion_r68162802
 116                 // and the surrounding discussion for an explanation where these
 117                 // constants come from.
 118                 // TODO(vladimir-ch): Similar constants for exceptional shifts
 119                 // are used also in dlahqr.go. The first constant is different
 120                 // there, it is equal to 3. Why? And does it matter?
 121                 wilk1 = 0.75
 122                 wilk2 = -0.4375
 123         )
 124
 125         switch {
 126         case ilo < 0 || max(0, n-1) < ilo:
 127                 panic(badIlo)
 128         case ihi < min(ilo, n-1) || n <= ihi:
 129                 panic(badIhi)
 130         case lwork < 1 && n <= ntiny && lwork != -1:
 131                 panic(badWork)
 132         // TODO(vladimir-ch): Enable if and when we figure out what the minimum
 133         // necessary lwork value is. Dlaqr04 says that the minimum is n which
 134         // clashes with Dlaqr23's opinion about optimal work when nw <= 2
 135         // (independent of n).
 136         // case lwork < n && n > ntiny && lwork != -1:
 137         //      panic(badWork)
 138         case len(work) < lwork:
 139                 panic(shortWork)
 140         case recur < 0:
 141                 panic("lapack: recur is negative")
 142         }
 143         if wantz {
 144                 if iloz < 0 || ilo < iloz {
 145                         panic("lapack: invalid value of iloz")
 146                 }
 147                 if ihiz < ihi || n <= ihiz {
 148                         panic("lapack: invalid value of ihiz")
 149                 }
 150         }
 151         if lwork != -1 {
 152                 checkMatrix(n, n, h, ldh)
 153                 if wantz {
 154                         checkMatrix(n, n, z, ldz)
 155                 }
 156                 switch {
 157                 case ilo > 0 && h[ilo*ldh+ilo-1] != 0:
 158                         panic("lapack: block not isolated")
 159                 case ihi+1 < n && h[(ihi+1)*ldh+ihi] != 0:
 160                         panic("lapack: block not isolated")
 161                 case len(wr) != ihi+1:
 162                         panic("lapack: bad length of wr")
 163                 case len(wi) != ihi+1:
 164                         panic("lapack: bad length of wi")
 165                 }
 166         }
 167
 168         // Quick return.
 169         if n == 0 {
 170                 work[0] = 1
 171                 return 0
 172         }
 173
 174         if n <= ntiny {
 175                 // Tiny matrices must use Dlahqr.
 176                 work[0] = 1
 177                 if lwork == -1 {
 178                         return 0
 179                 }
 180                 return impl.Dlahqr(wantt, wantz, n, ilo, ihi, h, ldh, wr, wi, iloz, ihiz, z, ldz)
 181         }
 182
 183         // Use small bulge multi-shift QR with aggressive early deflation on
 184         // larger-than-tiny matrices.
 185         var jbcmpz string
 186         if wantt {
 187                 jbcmpz = "S"
 188         } else {
 189                 jbcmpz = "E"
 190         }
 191         if wantz {
 192                 jbcmpz += "V"
 193         } else {
 194                 jbcmpz += "N"
 195         }
 196
 197         var fname string
 198         if recur > 0 {
 199                 fname = "DLAQR0"
 200         } else {
 201                 fname = "DLAQR4"
 202         }
 203         // nwr is the recommended deflation window size. n is greater than 11,
 204         // so there is enough subdiagonal workspace for nwr >= 2 as required.
 205         // (In fact, there is enough subdiagonal space for nwr >= 3.)
 206         // TODO(vladimir-ch): If there is enough space for nwr >= 3, should we
 207         // use it?
 208         nwr := impl.Ilaenv(13, fname, jbcmpz, n, ilo, ihi, lwork)
 209         nwr = max(2, nwr)
 210         nwr = min(ihi-ilo+1, min((n-1)/3, nwr))
 211
 212         // nsr is the recommended number of simultaneous shifts. n is greater
 213         // than 11, so there is enough subdiagonal workspace for nsr to be even
 214         // and greater than or equal to two as required.
 215         nsr := impl.Ilaenv(15, fname, jbcmpz, n, ilo, ihi, lwork)
 216         nsr = min(nsr, min((n+6)/9, ihi-ilo))
 217         nsr = max(2, nsr&^1)
 218
 219         // Workspace query call to Dlaqr23.
 220         impl.Dlaqr23(wantt, wantz, n, ilo, ihi, nwr+1, nil, 0, iloz, ihiz, nil, 0,
 221                 nil, nil, nil, 0, n, nil, 0, n, nil, 0, work, -1, recur)
 222         // Optimal workspace is max(Dlaqr5, Dlaqr23).
 223         lwkopt := max(3*nsr/2, int(work[0]))
 224         // Quick return in case of workspace query.
 225         if lwork == -1 {
 226                 work[0] = float64(lwkopt)
 227                 return 0
 228         }
 229
 230         // Dlahqr/Dlaqr04 crossover point.
 231         nmin := impl.Ilaenv(12, fname, jbcmpz, n, ilo, ihi, lwork)
 232         nmin = max(ntiny, nmin)
 233
 234         // Nibble determines when to skip a multi-shift QR sweep (Dlaqr5).
 235         nibble := impl.Ilaenv(14, fname, jbcmpz, n, ilo, ihi, lwork)
 236         nibble = max(0, nibble)
 237
 238         // Computation mode of far-from-diagonal orthogonal updates in Dlaqr5.
 239         kacc22 := impl.Ilaenv(16, fname, jbcmpz, n, ilo, ihi, lwork)
 240         kacc22 = max(0, min(kacc22, 2))
 241
 242         // nwmax is the largest possible deflation window for which there is
 243         // sufficient workspace.
 244         nwmax := min((n-1)/3, lwork/2)
 245         nw := nwmax // Start with maximum deflation window size.
 246
 247         // nsmax is the largest number of simultaneous shifts for which there is
 248         // sufficient workspace.
 249         nsmax := min((n+6)/9, 2*lwork/3) &^ 1
 250
 251         ndfl := 1 // Number of iterations since last deflation.
 252         ndec := 0 // Deflation window size decrement.
 253
 254         // Main loop.
 255         var (
 256                 itmax = max(30, 2*kexsh) * max(10, (ihi-ilo+1))
 257                 it    = 0
 258         )
 259         for kbot := ihi; kbot >= ilo; {
 260                 if it == itmax {
 261                         unconverged = kbot + 1
 262                         break
 263                 }
 264                 it++
 265
 266                 // Locate active block.
 267                 ktop := ilo
 268                 for k := kbot; k >= ilo+1; k-- {
 269                         if h[k*ldh+k-1] == 0 {
 270                                 ktop = k
 271                                 break
 272                         }
 273                 }
 274
 275                 // Select deflation window size nw.
 276                 //
 277                 // Typical Case:
 278                 //  If possible and advisable, nibble the entire active block.
 279                 //  If not, use size min(nwr,nwmax) or min(nwr+1,nwmax)
 280                 //  depending upon which has the smaller corresponding
 281                 //  subdiagonal entry (a heuristic).
 282                 //
 283                 // Exceptional Case:
 284                 //  If there have been no deflations in kexnw or more
 285                 //  iterations, then vary the deflation window size. At first,
 286                 //  because larger windows are, in general, more powerful than
 287                 //  smaller ones, rapidly increase the window to the maximum
 288                 //  possible. Then, gradually reduce the window size.
 289                 nh := kbot - ktop + 1
 290                 nwupbd := min(nh, nwmax)
 291                 if ndfl < kexnw {
 292                         nw = min(nwupbd, nwr)
 293                 } else {
 294                         nw = min(nwupbd, 2*nw)
 295                 }
 296                 if nw < nwmax {
 297                         if nw >= nh-1 {
 298                                 nw = nh
 299                         } else {
 300                                 kwtop := kbot - nw + 1
 301                                 if math.Abs(h[kwtop*ldh+kwtop-1]) > math.Abs(h[(kwtop-1)*ldh+kwtop-2]) {
 302                                         nw++
 303                                 }
 304                         }
 305                 }
 306                 if ndfl < kexnw {
 307                         ndec = -1
 308                 } else if ndec >= 0 || nw >= nwupbd {
 309                         ndec++
 310                         if nw-ndec < 2 {
 311                                 ndec = 0
 312                         }
 313                         nw -= ndec
 314                 }
 315
 316                 // Split workspace under the subdiagonal of H into:
 317                 //  - an nw×nw work array V in the lower left-hand corner,
 318                 //  - an nw×nhv horizontal work array along the bottom edge (nhv
 319                 //    must be at least nw but more is better),
 320                 //  - an nve×nw vertical work array along the left-hand-edge
 321                 //    (nhv can be any positive integer but more is better).
 322                 kv := n - nw
 323                 kt := nw
 324                 kwv := nw + 1
 325                 nhv := n - kwv - kt
 326                 // Aggressive early deflation.
 327                 ls, ld := impl.Dlaqr23(wantt, wantz, n, ktop, kbot, nw,
 328                         h, ldh, iloz, ihiz, z, ldz, wr[:kbot+1], wi[:kbot+1],
 329                         h[kv*ldh:], ldh, nhv, h[kv*ldh+kt:], ldh, nhv, h[kwv*ldh:], ldh, work, lwork, recur)
 330
 331                 // Adjust kbot accounting for new deflations.
 332                 kbot -= ld
 333                 // ks points to the shifts.
 334                 ks := kbot - ls + 1
 335
 336                 // Skip an expensive QR sweep if there is a (partly heuristic)
 337                 // reason to expect that many eigenvalues will deflate without
 338                 // it. Here, the QR sweep is skipped if many eigenvalues have
 339                 // just been deflated or if the remaining active block is small.
 340                 if ld > 0 && (100*ld > nw*nibble || kbot-ktop+1 <= min(nmin, nwmax)) {
 341                         // ld is positive, note progress.
 342                         ndfl = 1
 343                         continue
 344                 }
 345
 346                 // ns is the nominal number of simultaneous shifts. This may be
 347                 // lowered (slightly) if Dlaqr23 did not provide that many
 348                 // shifts.
 349                 ns := min(min(nsmax, nsr), max(2, kbot-ktop)) &^ 1
 350
 351                 // If there have been no deflations in a multiple of kexsh
 352                 // iterations, then try exceptional shifts. Otherwise use shifts
 353                 // provided by Dlaqr23 above or from the eigenvalues of a
 354                 // trailing principal submatrix.
 355                 if ndfl%kexsh == 0 {
 356                         ks = kbot - ns + 1
 357                         for i := kbot; i > max(ks, ktop+1); i -= 2 {
 358                                 ss := math.Abs(h[i*ldh+i-1]) + math.Abs(h[(i-1)*ldh+i-2])
 359                                 aa := wilk1*ss + h[i*ldh+i]
 360                                 _, _, _, _, wr[i-1], wi[i-1], wr[i], wi[i], _, _ =
 361                                         impl.Dlanv2(aa, ss, wilk2*ss, aa)
 362                         }
 363                         if ks == ktop {
 364                                 wr[ks+1] = h[(ks+1)*ldh+ks+1]
 365                                 wi[ks+1] = 0
 366                                 wr[ks] = wr[ks+1]
 367                                 wi[ks] = wi[ks+1]
 368                         }
 369                 } else {
 370                         // If we got ns/2 or fewer shifts, use Dlahqr or recur
 371                         // into Dlaqr04 on a trailing principal submatrix to get
 372                         // more. Since ns <= nsmax <=(n+6)/9, there is enough
 373                         // space below the subdiagonal to fit an ns×ns scratch
 374                         // array.
 375                         if kbot-ks+1 <= ns/2 {
 376                                 ks = kbot - ns + 1
 377                                 kt = n - ns
 378                                 impl.Dlacpy(blas.All, ns, ns, h[ks*ldh+ks:], ldh, h[kt*ldh:], ldh)
 379                                 if ns > nmin && recur > 0 {
 380                                         ks += impl.Dlaqr04(false, false, ns, 1, ns-1, h[kt*ldh:], ldh,
 381                                                 wr[ks:ks+ns], wi[ks:ks+ns], 0, 0, nil, 0, work, lwork, recur-1)
 382                                 } else {
 383                                         ks += impl.Dlahqr(false, false, ns, 0, ns-1, h[kt*ldh:], ldh,
 384                                                 wr[ks:ks+ns], wi[ks:ks+ns], 0, 0, nil, 0)
 385                                 }
 386                                 // In case of a rare QR failure use eigenvalues
 387                                 // of the trailing 2×2 principal submatrix.
 388                                 if ks >= kbot {
 389                                         aa := h[(kbot-1)*ldh+kbot-1]
 390                                         bb := h[(kbot-1)*ldh+kbot]
 391                                         cc := h[kbot*ldh+kbot-1]
 392                                         dd := h[kbot*ldh+kbot]
 393                                         _, _, _, _, wr[kbot-1], wi[kbot-1], wr[kbot], wi[kbot], _, _ =
 394                                                 impl.Dlanv2(aa, bb, cc, dd)
 395                                         ks = kbot - 1
 396                                 }
 397                         }
 398
 399                         if kbot-ks+1 > ns {
 400                                 // Sorting the shifts helps a little. Bubble
 401                                 // sort keeps complex conjugate pairs together.
 402                                 sorted := false
 403                                 for k := kbot; k > ks; k-- {
 404                                         if sorted {
 405                                                 break
 406                                         }
 407                                         sorted = true
 408                                         for i := ks; i < k; i++ {
 409                                                 if math.Abs(wr[i])+math.Abs(wi[i]) >= math.Abs(wr[i+1])+math.Abs(wi[i+1]) {
 410                                                         continue
 411                                                 }
 412                                                 sorted = false
 413                                                 wr[i], wr[i+1] = wr[i+1], wr[i]
 414                                                 wi[i], wi[i+1] = wi[i+1], wi[i]
 415                                         }
 416                                 }
 417                         }
 418
 419                         // Shuffle shifts into pairs of real shifts and pairs of
 420                         // complex conjugate shifts using the fact that complex
 421                         // conjugate shifts are already adjacent to one another.
 422                         // TODO(vladimir-ch): The shuffling here could probably
 423                         // be removed but I'm not sure right now and it's safer
 424                         // to leave it.
 425                         for i := kbot; i > ks+1; i -= 2 {
 426                                 if wi[i] == -wi[i-1] {
 427                                         continue
 428                                 }
 429                                 wr[i], wr[i-1], wr[i-2] = wr[i-1], wr[i-2], wr[i]
 430                                 wi[i], wi[i-1], wi[i-2] = wi[i-1], wi[i-2], wi[i]
 431                         }
 432                 }
 433
 434                 // If there are only two shifts and both are real, then use only one.
 435                 if kbot-ks+1 == 2 && wi[kbot] == 0 {
 436                         if math.Abs(wr[kbot]-h[kbot*ldh+kbot]) < math.Abs(wr[kbot-1]-h[kbot*ldh+kbot]) {
 437                                 wr[kbot-1] = wr[kbot]
 438                         } else {
 439                                 wr[kbot] = wr[kbot-1]
 440                         }
 441                 }
 442
 443                 // Use up to ns of the the smallest magnitude shifts. If there
 444                 // aren't ns shifts available, then use them all, possibly
 445                 // dropping one to make the number of shifts even.
 446                 ns = min(ns, kbot-ks+1) &^ 1
 447                 ks = kbot - ns + 1
 448
 449                 // Split workspace under the subdiagonal into:
 450                 // - a kdu×kdu work array U in the lower left-hand-corner,
 451                 // - a kdu×nhv horizontal work array WH along the bottom edge
 452                 //   (nhv must be at least kdu but more is better),
 453                 // - an nhv×kdu vertical work array WV along the left-hand-edge
 454                 //   (nhv must be at least kdu but more is better).
 455                 kdu := 3*ns - 3
 456                 ku := n - kdu
 457                 kwh := kdu
 458                 kwv = kdu + 3
 459                 nhv = n - kwv - kdu
 460                 // Small-bulge multi-shift QR sweep.
 461                 impl.Dlaqr5(wantt, wantz, kacc22, n, ktop, kbot, ns,
 462                         wr[ks:ks+ns], wi[ks:ks+ns], h, ldh, iloz, ihiz, z, ldz,
 463                         work, 3, h[ku*ldh:], ldh, nhv, h[kwv*ldh:], ldh, nhv, h[ku*ldh+kwh:], ldh)
 464
 465                 // Note progress (or the lack of it).
 466                 if ld > 0 {
 467                         ndfl = 1
 468                 } else {
 469                         ndfl++
 470                 }
 471         }
 472
 473         work[0] = float64(lwkopt)
 474         return unconverged
 475 }