vendor/gonum.org/v1/gonum/lapack/gonum/dgeqp3.go

   1 // Copyright ©2017 The Gonum Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package gonum
   6
   7 import (
   8         "gonum.org/v1/gonum/blas"
   9         "gonum.org/v1/gonum/blas/blas64"
  10 )
  11
  12 // Dgeqp3 computes a QR factorization with column pivoting of the
  13 // m×n matrix A: A*P = Q*R using Level 3 BLAS.
  14 //
  15 // The matrix Q is represented as a product of elementary reflectors
  16 //  Q = H_0 H_1 . . . H_{k-1}, where k = min(m,n).
  17 // Each H_i has the form
  18 //  H_i = I - tau * v * v^T
  19 // where tau and v are real vectors with v[0:i-1] = 0 and v[i] = 1;
  20 // v[i:m] is stored on exit in A[i:m, i], and tau in tau[i].
  21 //
  22 // jpvt specifies a column pivot to be applied to A. If
  23 // jpvt[j] is at least zero, the jth column of A is permuted
  24 // to the front of A*P (a leading column), if jpvt[j] is -1
  25 // the jth column of A is a free column. If jpvt[j] < -1, Dgeqp3
  26 // will panic. On return, jpvt holds the permutation that was
  27 // applied; the jth column of A*P was the jpvt[j] column of A.
  28 // jpvt must have length n or Dgeqp3 will panic.
  29 //
  30 // tau holds the scalar factors of the elementary reflectors.
  31 // It must have length min(m, n), otherwise Dgeqp3 will panic.
  32 //
  33 // work must have length at least max(1,lwork), and lwork must be at least
  34 // 3*n+1, otherwise Dgeqp3 will panic. For optimal performance lwork must
  35 // be at least 2*n+(n+1)*nb, where nb is the optimal blocksize. On return,
  36 // work[0] will contain the optimal value of lwork.
  37 //
  38 // If lwork == -1, instead of performing Dgeqp3, only the optimal value of lwork
  39 // will be stored in work[0].
  40 //
  41 // Dgeqp3 is an internal routine. It is exported for testing purposes.
  42 func (impl Implementation) Dgeqp3(m, n int, a []float64, lda int, jpvt []int, tau, work []float64, lwork int) {
  43         const (
  44                 inb    = 1
  45                 inbmin = 2
  46                 ixover = 3
  47         )
  48         checkMatrix(m, n, a, lda)
  49
  50         if len(jpvt) != n {
  51                 panic(badIpiv)
  52         }
  53         for _, v := range jpvt {
  54                 if v < -1 || n <= v {
  55                         panic("lapack: jpvt element out of range")
  56                 }
  57         }
  58         minmn := min(m, n)
  59         if len(work) < max(1, lwork) {
  60                 panic(badWork)
  61         }
  62
  63         var iws, lwkopt, nb int
  64         if minmn == 0 {
  65                 iws = 1
  66                 lwkopt = 1
  67         } else {
  68                 iws = 3*n + 1
  69                 nb = impl.Ilaenv(inb, "DGEQRF", " ", m, n, -1, -1)
  70                 lwkopt = 2*n + (n+1)*nb
  71         }
  72         work[0] = float64(lwkopt)
  73
  74         if lwork == -1 {
  75                 return
  76         }
  77
  78         if len(tau) < minmn {
  79                 panic(badTau)
  80         }
  81
  82         bi := blas64.Implementation()
  83
  84         // Move initial columns up front.
  85         var nfxd int
  86         for j := 0; j < n; j++ {
  87                 if jpvt[j] == -1 {
  88                         jpvt[j] = j
  89                         continue
  90                 }
  91                 if j != nfxd {
  92                         bi.Dswap(m, a[j:], lda, a[nfxd:], lda)
  93                         jpvt[j], jpvt[nfxd] = jpvt[nfxd], j
  94                 } else {
  95                         jpvt[j] = j
  96                 }
  97                 nfxd++
  98         }
  99
 100         // Factorize nfxd columns.
 101         //
 102         // Compute the QR factorization of nfxd columns and update remaining columns.
 103         if nfxd > 0 {
 104                 na := min(m, nfxd)
 105                 impl.Dgeqrf(m, na, a, lda, tau, work, lwork)
 106                 iws = max(iws, int(work[0]))
 107                 if na < n {
 108                         impl.Dormqr(blas.Left, blas.Trans, m, n-na, na, a, lda, tau[:na], a[na:], lda,
 109                                 work, lwork)
 110                         iws = max(iws, int(work[0]))
 111                 }
 112         }
 113
 114         if nfxd >= minmn {
 115                 work[0] = float64(iws)
 116                 return
 117         }
 118
 119         // Factorize free columns.
 120         sm := m - nfxd
 121         sn := n - nfxd
 122         sminmn := minmn - nfxd
 123
 124         // Determine the block size.
 125         nb = impl.Ilaenv(inb, "DGEQRF", " ", sm, sn, -1, -1)
 126         nbmin := 2
 127         nx := 0
 128
 129         if 1 < nb && nb < sminmn {
 130                 // Determine when to cross over from blocked to unblocked code.
 131                 nx = max(0, impl.Ilaenv(ixover, "DGEQRF", " ", sm, sn, -1, -1))
 132
 133                 if nx < sminmn {
 134                         // Determine if workspace is large enough for blocked code.
 135                         minws := 2*sn + (sn+1)*nb
 136                         iws = max(iws, minws)
 137                         if lwork < minws {
 138                                 // Not enough workspace to use optimal nb. Reduce
 139                                 // nb and determine the minimum value of nb.
 140                                 nb = (lwork - 2*sn) / (sn + 1)
 141                                 nbmin = max(2, impl.Ilaenv(inbmin, "DGEQRF", " ", sm, sn, -1, -1))
 142                         }
 143                 }
 144         }
 145
 146         // Initialize partial column norms.
 147         // The first n elements of work store the exact column norms.
 148         for j := nfxd; j < n; j++ {
 149                 work[j] = bi.Dnrm2(sm, a[nfxd*lda+j:], lda)
 150                 work[n+j] = work[j]
 151         }
 152         j := nfxd
 153         if nbmin <= nb && nb < sminmn && nx < sminmn {
 154                 // Use blocked code initially.
 155
 156                 // Compute factorization.
 157                 var fjb int
 158                 for topbmn := minmn - nx; j < topbmn; j += fjb {
 159                         jb := min(nb, topbmn-j)
 160
 161                         // Factorize jb columns among columns j:n.
 162                         fjb = impl.Dlaqps(m, n-j, j, jb, a[j:], lda, jpvt[j:], tau[j:],
 163                                 work[j:n], work[j+n:2*n], work[2*n:2*n+jb], work[2*n+jb:], jb)
 164                 }
 165         }
 166
 167         // Use unblocked code to factor the last or only block.
 168         if j < minmn {
 169                 impl.Dlaqp2(m, n-j, j, a[j:], lda, jpvt[j:], tau[j:],
 170                         work[j:n], work[j+n:2*n], work[2*n:])
 171         }
 172
 173         work[0] = float64(iws)
 174 }