vendor/gonum.org/v1/gonum/lapack/gonum/dgehrd.go

   1 // Copyright ©2016 The Gonum Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package gonum
   6
   7 import (
   8         "gonum.org/v1/gonum/blas"
   9         "gonum.org/v1/gonum/blas/blas64"
  10         "gonum.org/v1/gonum/lapack"
  11 )
  12
  13 // Dgehrd reduces a block of a real n×n general matrix A to upper Hessenberg
  14 // form H by an orthogonal similarity transformation Q^T * A * Q = H.
  15 //
  16 // The matrix Q is represented as a product of (ihi-ilo) elementary
  17 // reflectors
  18 //  Q = H_{ilo} H_{ilo+1} ... H_{ihi-1}.
  19 // Each H_i has the form
  20 //  H_i = I - tau[i] * v * v^T
  21 // where v is a real vector with v[0:i+1] = 0, v[i+1] = 1 and v[ihi+1:n] = 0.
  22 // v[i+2:ihi+1] is stored on exit in A[i+2:ihi+1,i].
  23 //
  24 // On entry, a contains the n×n general matrix to be reduced. On return, the
  25 // upper triangle and the first subdiagonal of A will be overwritten with the
  26 // upper Hessenberg matrix H, and the elements below the first subdiagonal, with
  27 // the slice tau, represent the orthogonal matrix Q as a product of elementary
  28 // reflectors.
  29 //
  30 // The contents of a are illustrated by the following example, with n = 7, ilo =
  31 // 1 and ihi = 5.
  32 // On entry,
  33 //  [ a   a   a   a   a   a   a ]
  34 //  [     a   a   a   a   a   a ]
  35 //  [     a   a   a   a   a   a ]
  36 //  [     a   a   a   a   a   a ]
  37 //  [     a   a   a   a   a   a ]
  38 //  [     a   a   a   a   a   a ]
  39 //  [                         a ]
  40 // on return,
  41 //  [ a   a   h   h   h   h   a ]
  42 //  [     a   h   h   h   h   a ]
  43 //  [     h   h   h   h   h   h ]
  44 //  [     v1  h   h   h   h   h ]
  45 //  [     v1  v2  h   h   h   h ]
  46 //  [     v1  v2  v3  h   h   h ]
  47 //  [                         a ]
  48 // where a denotes an element of the original matrix A, h denotes a
  49 // modified element of the upper Hessenberg matrix H, and vi denotes an
  50 // element of the vector defining H_i.
  51 //
  52 // ilo and ihi determine the block of A that will be reduced to upper Hessenberg
  53 // form. It must hold that 0 <= ilo <= ihi < n if n > 0, and ilo == 0 and ihi ==
  54 // -1 if n == 0, otherwise Dgehrd will panic.
  55 //
  56 // On return, tau will contain the scalar factors of the elementary reflectors.
  57 // Elements tau[:ilo] and tau[ihi:] will be set to zero. tau must have length
  58 // equal to n-1 if n > 0, otherwise Dgehrd will panic.
  59 //
  60 // work must have length at least lwork and lwork must be at least max(1,n),
  61 // otherwise Dgehrd will panic. On return, work[0] contains the optimal value of
  62 // lwork.
  63 //
  64 // If lwork == -1, instead of performing Dgehrd, only the optimal value of lwork
  65 // will be stored in work[0].
  66 //
  67 // Dgehrd is an internal routine. It is exported for testing purposes.
  68 func (impl Implementation) Dgehrd(n, ilo, ihi int, a []float64, lda int, tau, work []float64, lwork int) {
  69         switch {
  70         case ilo < 0 || max(0, n-1) < ilo:
  71                 panic(badIlo)
  72         case ihi < min(ilo, n-1) || n <= ihi:
  73                 panic(badIhi)
  74         case lwork < max(1, n) && lwork != -1:
  75                 panic(badWork)
  76         case len(work) < lwork:
  77                 panic(shortWork)
  78         }
  79         if lwork != -1 {
  80                 checkMatrix(n, n, a, lda)
  81                 if len(tau) != n-1 && n > 0 {
  82                         panic(badTau)
  83                 }
  84         }
  85
  86         const (
  87                 nbmax = 64
  88                 ldt   = nbmax + 1
  89                 tsize = ldt * nbmax
  90         )
  91         // Compute the workspace requirements.
  92         nb := min(nbmax, impl.Ilaenv(1, "DGEHRD", " ", n, ilo, ihi, -1))
  93         lwkopt := n*nb + tsize
  94         if lwork == -1 {
  95                 work[0] = float64(lwkopt)
  96                 return
  97         }
  98
  99         // Set tau[:ilo] and tau[ihi:] to zero.
 100         for i := 0; i < ilo; i++ {
 101                 tau[i] = 0
 102         }
 103         for i := ihi; i < n-1; i++ {
 104                 tau[i] = 0
 105         }
 106
 107         // Quick return if possible.
 108         nh := ihi - ilo + 1
 109         if nh <= 1 {
 110                 work[0] = 1
 111                 return
 112         }
 113
 114         // Determine the block size.
 115         nbmin := 2
 116         var nx int
 117         if 1 < nb && nb < nh {
 118                 // Determine when to cross over from blocked to unblocked code
 119                 // (last block is always handled by unblocked code).
 120                 nx = max(nb, impl.Ilaenv(3, "DGEHRD", " ", n, ilo, ihi, -1))
 121                 if nx < nh {
 122                         // Determine if workspace is large enough for blocked code.
 123                         if lwork < n*nb+tsize {
 124                                 // Not enough workspace to use optimal nb:
 125                                 // determine the minimum value of nb, and reduce
 126                                 // nb or force use of unblocked code.
 127                                 nbmin = max(2, impl.Ilaenv(2, "DGEHRD", " ", n, ilo, ihi, -1))
 128                                 if lwork >= n*nbmin+tsize {
 129                                         nb = (lwork - tsize) / n
 130                                 } else {
 131                                         nb = 1
 132                                 }
 133                         }
 134                 }
 135         }
 136         ldwork := nb // work is used as an n×nb matrix.
 137
 138         var i int
 139         if nb < nbmin || nh <= nb {
 140                 // Use unblocked code below.
 141                 i = ilo
 142         } else {
 143                 // Use blocked code.
 144                 bi := blas64.Implementation()
 145                 iwt := n * nb // Size of the matrix Y and index where the matrix T starts in work.
 146                 for i = ilo; i < ihi-nx; i += nb {
 147                         ib := min(nb, ihi-i)
 148
 149                         // Reduce columns [i:i+ib] to Hessenberg form, returning the
 150                         // matrices V and T of the block reflector H = I - V*T*V^T
 151                         // which performs the reduction, and also the matrix Y = A*V*T.
 152                         impl.Dlahr2(ihi+1, i+1, ib, a[i:], lda, tau[i:], work[iwt:], ldt, work, ldwork)
 153
 154                         // Apply the block reflector H to A[:ihi+1,i+ib:ihi+1] from the
 155                         // right, computing  A := A - Y * V^T. V[i+ib,i+ib-1] must be set
 156                         // to 1.
 157                         ei := a[(i+ib)*lda+i+ib-1]
 158                         a[(i+ib)*lda+i+ib-1] = 1
 159                         bi.Dgemm(blas.NoTrans, blas.Trans, ihi+1, ihi-i-ib+1, ib,
 160                                 -1, work, ldwork,
 161                                 a[(i+ib)*lda+i:], lda,
 162                                 1, a[i+ib:], lda)
 163                         a[(i+ib)*lda+i+ib-1] = ei
 164
 165                         // Apply the block reflector H to A[0:i+1,i+1:i+ib-1] from the
 166                         // right.
 167                         bi.Dtrmm(blas.Right, blas.Lower, blas.Trans, blas.Unit, i+1, ib-1,
 168                                 1, a[(i+1)*lda+i:], lda, work, ldwork)
 169                         for j := 0; j <= ib-2; j++ {
 170                                 bi.Daxpy(i+1, -1, work[j:], ldwork, a[i+j+1:], lda)
 171                         }
 172
 173                         // Apply the block reflector H to A[i+1:ihi+1,i+ib:n] from the
 174                         // left.
 175                         impl.Dlarfb(blas.Left, blas.Trans, lapack.Forward, lapack.ColumnWise,
 176                                 ihi-i, n-i-ib, ib,
 177                                 a[(i+1)*lda+i:], lda, work[iwt:], ldt, a[(i+1)*lda+i+ib:], lda, work, ldwork)
 178                 }
 179         }
 180         // Use unblocked code to reduce the rest of the matrix.
 181         impl.Dgehd2(n, i, ihi, a, lda, tau, work)
 182         work[0] = float64(lwkopt)
 183 }