--- /dev/null
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+ "math"
+
+ "gonum.org/v1/gonum/blas"
+ "gonum.org/v1/gonum/blas/blas64"
+ "gonum.org/v1/gonum/lapack"
+)
+
+const noSVDO = "dgesvd: not coded for overwrite"
+
+// Dgesvd computes the singular value decomposition of the input matrix A.
+//
+// The singular value decomposition is
+// A = U * Sigma * V^T
+// where Sigma is an m×n diagonal matrix containing the singular values of A,
+// U is an m×m orthogonal matrix and V is an n×n orthogonal matrix. The first
+// min(m,n) columns of U and V are the left and right singular vectors of A
+// respectively.
+//
+// jobU and jobVT are options for computing the singular vectors. The behavior
+// is as follows
+// jobU == lapack.SVDAll All m columns of U are returned in u
+// jobU == lapack.SVDInPlace The first min(m,n) columns are returned in u
+// jobU == lapack.SVDOverwrite The first min(m,n) columns of U are written into a
+// jobU == lapack.SVDNone The columns of U are not computed.
+// The behavior is the same for jobVT and the rows of V^T. At most one of jobU
+// and jobVT can equal lapack.SVDOverwrite, and Dgesvd will panic otherwise.
+//
+// On entry, a contains the data for the m×n matrix A. During the call to Dgesvd
+// the data is overwritten. On exit, A contains the appropriate singular vectors
+// if either job is lapack.SVDOverwrite.
+//
+// s is a slice of length at least min(m,n) and on exit contains the singular
+// values in decreasing order.
+//
+// u contains the left singular vectors on exit, stored column-wise. If
+// jobU == lapack.SVDAll, u is of size m×m. If jobU == lapack.SVDInPlace u is
+// of size m×min(m,n). If jobU == lapack.SVDOverwrite or lapack.SVDNone, u is
+// not used.
+//
+// vt contains the left singular vectors on exit, stored row-wise. If
+// jobV == lapack.SVDAll, vt is of size n×m. If jobVT == lapack.SVDInPlace vt is
+// of size min(m,n)×n. If jobVT == lapack.SVDOverwrite or lapack.SVDNone, vt is
+// not used.
+//
+// work is a slice for storing temporary memory, and lwork is the usable size of
+// the slice. lwork must be at least max(5*min(m,n), 3*min(m,n)+max(m,n)).
+// If lwork == -1, instead of performing Dgesvd, the optimal work length will be
+// stored into work[0]. Dgesvd will panic if the working memory has insufficient
+// storage.
+//
+// Dgesvd returns whether the decomposition successfully completed.
+func (impl Implementation) Dgesvd(jobU, jobVT lapack.SVDJob, m, n int, a []float64, lda int, s, u []float64, ldu int, vt []float64, ldvt int, work []float64, lwork int) (ok bool) {
+ minmn := min(m, n)
+ checkMatrix(m, n, a, lda)
+ if jobU == lapack.SVDAll {
+ checkMatrix(m, m, u, ldu)
+ } else if jobU == lapack.SVDInPlace {
+ checkMatrix(m, minmn, u, ldu)
+ }
+ if jobVT == lapack.SVDAll {
+ checkMatrix(n, n, vt, ldvt)
+ } else if jobVT == lapack.SVDInPlace {
+ checkMatrix(minmn, n, vt, ldvt)
+ }
+ if jobU == lapack.SVDOverwrite && jobVT == lapack.SVDOverwrite {
+ panic("lapack: both jobU and jobVT are lapack.SVDOverwrite")
+ }
+ if len(s) < minmn {
+ panic(badS)
+ }
+ if jobU == lapack.SVDOverwrite || jobVT == lapack.SVDOverwrite {
+ panic(noSVDO)
+ }
+ if m == 0 || n == 0 {
+ return true
+ }
+
+ wantua := jobU == lapack.SVDAll
+ wantus := jobU == lapack.SVDInPlace
+ wantuas := wantua || wantus
+ wantuo := jobU == lapack.SVDOverwrite
+ wantun := jobU == lapack.None
+
+ wantva := jobVT == lapack.SVDAll
+ wantvs := jobVT == lapack.SVDInPlace
+ wantvas := wantva || wantvs
+ wantvo := jobVT == lapack.SVDOverwrite
+ wantvn := jobVT == lapack.None
+
+ bi := blas64.Implementation()
+ var mnthr int
+
+ // Compute optimal space for subroutines.
+ maxwrk := 1
+ opts := string(jobU) + string(jobVT)
+ var wrkbl, bdspac int
+ if m >= n {
+ mnthr = impl.Ilaenv(6, "DGESVD", opts, m, n, 0, 0)
+ bdspac = 5 * n
+ impl.Dgeqrf(m, n, a, lda, nil, work, -1)
+ lwork_dgeqrf := int(work[0])
+ impl.Dorgqr(m, n, n, a, lda, nil, work, -1)
+ lwork_dorgqr_n := int(work[0])
+ impl.Dorgqr(m, m, n, a, lda, nil, work, -1)
+ lwork_dorgqr_m := int(work[0])
+ impl.Dgebrd(n, n, a, lda, s, nil, nil, nil, work, -1)
+ lwork_dgebrd := int(work[0])
+ impl.Dorgbr(lapack.ApplyP, n, n, n, a, lda, nil, work, -1)
+ lwork_dorgbr_p := int(work[0])
+ impl.Dorgbr(lapack.ApplyQ, n, n, n, a, lda, nil, work, -1)
+ lwork_dorgbr_q := int(work[0])
+
+ if m >= mnthr {
+ // m >> n
+ if wantun {
+ // Path 1
+ maxwrk = n + lwork_dgeqrf
+ maxwrk = max(maxwrk, 3*n+lwork_dgebrd)
+ if wantvo || wantvas {
+ maxwrk = max(maxwrk, 3*n+lwork_dorgbr_p)
+ }
+ maxwrk = max(maxwrk, bdspac)
+ } else if wantuo && wantvn {
+ // Path 2
+ wrkbl = n + lwork_dgeqrf
+ wrkbl = max(wrkbl, n+lwork_dorgqr_n)
+ wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
+ wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
+ wrkbl = max(wrkbl, bdspac)
+ maxwrk = max(n*n+wrkbl, n*n+m*n+n)
+ } else if wantuo && wantvs {
+ // Path 3
+ wrkbl = n + lwork_dgeqrf
+ wrkbl = max(wrkbl, n+lwork_dorgqr_n)
+ wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
+ wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
+ wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
+ wrkbl = max(wrkbl, bdspac)
+ maxwrk = max(n*n+wrkbl, n*n+m*n+n)
+ } else if wantus && wantvn {
+ // Path 4
+ wrkbl = n + lwork_dgeqrf
+ wrkbl = max(wrkbl, n+lwork_dorgqr_n)
+ wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
+ wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
+ wrkbl = max(wrkbl, bdspac)
+ maxwrk = n*n + wrkbl
+ } else if wantus && wantvo {
+ // Path 5
+ wrkbl = n + lwork_dgeqrf
+ wrkbl = max(wrkbl, n+lwork_dorgqr_n)
+ wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
+ wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
+ wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
+ wrkbl = max(wrkbl, bdspac)
+ maxwrk = 2*n*n + wrkbl
+ } else if wantus && wantvas {
+ // Path 6
+ wrkbl = n + lwork_dgeqrf
+ wrkbl = max(wrkbl, n+lwork_dorgqr_n)
+ wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
+ wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
+ wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
+ wrkbl = max(wrkbl, bdspac)
+ maxwrk = n*n + wrkbl
+ } else if wantua && wantvn {
+ // Path 7
+ wrkbl = n + lwork_dgeqrf
+ wrkbl = max(wrkbl, n+lwork_dorgqr_m)
+ wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
+ wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
+ wrkbl = max(wrkbl, bdspac)
+ maxwrk = n*n + wrkbl
+ } else if wantua && wantvo {
+ // Path 8
+ wrkbl = n + lwork_dgeqrf
+ wrkbl = max(wrkbl, n+lwork_dorgqr_m)
+ wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
+ wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
+ wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
+ wrkbl = max(wrkbl, bdspac)
+ maxwrk = 2*n*n + wrkbl
+ } else if wantua && wantvas {
+ // Path 9
+ wrkbl = n + lwork_dgeqrf
+ wrkbl = max(wrkbl, n+lwork_dorgqr_m)
+ wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
+ wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
+ wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
+ wrkbl = max(wrkbl, bdspac)
+ maxwrk = n*n + wrkbl
+ }
+ } else {
+ // Path 10: m > n
+ impl.Dgebrd(m, n, a, lda, s, nil, nil, nil, work, -1)
+ lwork_dgebrd := int(work[0])
+ maxwrk = 3*n + lwork_dgebrd
+ if wantus || wantuo {
+ impl.Dorgbr(lapack.ApplyQ, m, n, n, a, lda, nil, work, -1)
+ lwork_dorgbr_q = int(work[0])
+ maxwrk = max(maxwrk, 3*n+lwork_dorgbr_q)
+ }
+ if wantua {
+ impl.Dorgbr(lapack.ApplyQ, m, m, n, a, lda, nil, work, -1)
+ lwork_dorgbr_q := int(work[0])
+ maxwrk = max(maxwrk, 3*n+lwork_dorgbr_q)
+ }
+ if !wantvn {
+ maxwrk = max(maxwrk, 3*n+lwork_dorgbr_p)
+ }
+ maxwrk = max(maxwrk, bdspac)
+ }
+ } else {
+ mnthr = impl.Ilaenv(6, "DGESVD", opts, m, n, 0, 0)
+
+ bdspac = 5 * m
+ impl.Dgelqf(m, n, a, lda, nil, work, -1)
+ lwork_dgelqf := int(work[0])
+ impl.Dorglq(n, n, m, nil, n, nil, work, -1)
+ lwork_dorglq_n := int(work[0])
+ impl.Dorglq(m, n, m, a, lda, nil, work, -1)
+ lwork_dorglq_m := int(work[0])
+ impl.Dgebrd(m, m, a, lda, s, nil, nil, nil, work, -1)
+ lwork_dgebrd := int(work[0])
+ impl.Dorgbr(lapack.ApplyP, m, m, m, a, n, nil, work, -1)
+ lwork_dorgbr_p := int(work[0])
+ impl.Dorgbr(lapack.ApplyQ, m, m, m, a, n, nil, work, -1)
+ lwork_dorgbr_q := int(work[0])
+ if n >= mnthr {
+ // n >> m
+ if wantvn {
+ // Path 1t
+ maxwrk = m + lwork_dgelqf
+ maxwrk = max(maxwrk, 3*m+lwork_dgebrd)
+ if wantuo || wantuas {
+ maxwrk = max(maxwrk, 3*m+lwork_dorgbr_q)
+ }
+ maxwrk = max(maxwrk, bdspac)
+ } else if wantvo && wantun {
+ // Path 2t
+ wrkbl = m + lwork_dgelqf
+ wrkbl = max(wrkbl, m+lwork_dorglq_m)
+ wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
+ wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
+ wrkbl = max(wrkbl, bdspac)
+ maxwrk = max(m*m+wrkbl, m*m+m*n+m)
+ } else if wantvo && wantuas {
+ // Path 3t
+ wrkbl = m + lwork_dgelqf
+ wrkbl = max(wrkbl, m+lwork_dorglq_m)
+ wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
+ wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
+ wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
+ wrkbl = max(wrkbl, bdspac)
+ maxwrk = max(m*m+wrkbl, m*m+m*n+m)
+ } else if wantvs && wantun {
+ // Path 4t
+ wrkbl = m + lwork_dgelqf
+ wrkbl = max(wrkbl, m+lwork_dorglq_m)
+ wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
+ wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
+ wrkbl = max(wrkbl, bdspac)
+ maxwrk = m*m + wrkbl
+ } else if wantvs && wantuo {
+ // Path 5t
+ wrkbl = m + lwork_dgelqf
+ wrkbl = max(wrkbl, m+lwork_dorglq_m)
+ wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
+ wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
+ wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
+ wrkbl = max(wrkbl, bdspac)
+ maxwrk = 2*m*m + wrkbl
+ } else if wantvs && wantuas {
+ // Path 6t
+ wrkbl = m + lwork_dgelqf
+ wrkbl = max(wrkbl, m+lwork_dorglq_m)
+ wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
+ wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
+ wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
+ wrkbl = max(wrkbl, bdspac)
+ maxwrk = m*m + wrkbl
+ } else if wantva && wantun {
+ // Path 7t
+ wrkbl = m + lwork_dgelqf
+ wrkbl = max(wrkbl, m+lwork_dorglq_n)
+ wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
+ wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
+ wrkbl = max(wrkbl, bdspac)
+ maxwrk = m*m + wrkbl
+ } else if wantva && wantuo {
+ // Path 8t
+ wrkbl = m + lwork_dgelqf
+ wrkbl = max(wrkbl, m+lwork_dorglq_n)
+ wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
+ wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
+ wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
+ wrkbl = max(wrkbl, bdspac)
+ maxwrk = 2*m*m + wrkbl
+ } else if wantva && wantuas {
+ // Path 9t
+ wrkbl = m + lwork_dgelqf
+ wrkbl = max(wrkbl, m+lwork_dorglq_n)
+ wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
+ wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
+ wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
+ wrkbl = max(wrkbl, bdspac)
+ maxwrk = m*m + wrkbl
+ }
+ } else {
+ // Path 10t, n > m
+ impl.Dgebrd(m, n, a, lda, s, nil, nil, nil, work, -1)
+ lwork_dgebrd = int(work[0])
+ maxwrk = 3*m + lwork_dgebrd
+ if wantvs || wantvo {
+ impl.Dorgbr(lapack.ApplyP, m, n, m, a, n, nil, work, -1)
+ lwork_dorgbr_p = int(work[0])
+ maxwrk = max(maxwrk, 3*m+lwork_dorgbr_p)
+ }
+ if wantva {
+ impl.Dorgbr(lapack.ApplyP, n, n, m, a, n, nil, work, -1)
+ lwork_dorgbr_p = int(work[0])
+ maxwrk = max(maxwrk, 3*m+lwork_dorgbr_p)
+ }
+ if !wantun {
+ maxwrk = max(maxwrk, 3*m+lwork_dorgbr_q)
+ }
+ maxwrk = max(maxwrk, bdspac)
+ }
+ }
+
+ minWork := max(1, 5*minmn)
+ if !((wantun && m >= mnthr) || (wantvn && n >= mnthr)) {
+ minWork = max(minWork, 3*minmn+max(m, n))
+ }
+
+ if lwork != -1 {
+ if len(work) < lwork {
+ panic(badWork)
+ }
+ if lwork < minWork {
+ panic(badWork)
+ }
+ }
+ if m == 0 || n == 0 {
+ return true
+ }
+
+ maxwrk = max(maxwrk, minWork)
+ work[0] = float64(maxwrk)
+ if lwork == -1 {
+ return true
+ }
+
+ // Perform decomposition.
+ eps := dlamchE
+ smlnum := math.Sqrt(dlamchS) / eps
+ bignum := 1 / smlnum
+
+ // Scale A if max element outside range [smlnum, bignum].
+ anrm := impl.Dlange(lapack.MaxAbs, m, n, a, lda, nil)
+ var iscl bool
+ if anrm > 0 && anrm < smlnum {
+ iscl = true
+ impl.Dlascl(lapack.General, 0, 0, anrm, smlnum, m, n, a, lda)
+ } else if anrm > bignum {
+ iscl = true
+ impl.Dlascl(lapack.General, 0, 0, anrm, bignum, m, n, a, lda)
+ }
+
+ var ie int
+ if m >= n {
+ // If A has sufficiently more rows than columns, use the QR decomposition.
+ if m >= mnthr {
+ // m >> n
+ if wantun {
+ // Path 1.
+ itau := 0
+ iwork := itau + n
+
+ // Compute A = Q * R.
+ impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
+
+ // Zero out below R.
+ impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda)
+ ie = 0
+ itauq := ie + n
+ itaup := itauq + n
+ iwork = itaup + n
+ // Bidiagonalize R in A.
+ impl.Dgebrd(n, n, a, lda, s, work[ie:], work[itauq:],
+ work[itaup:], work[iwork:], lwork-iwork)
+ ncvt := 0
+ if wantvo || wantvas {
+ // Generate P^T.
+ impl.Dorgbr(lapack.ApplyP, n, n, n, a, lda, work[itaup:],
+ work[iwork:], lwork-iwork)
+ ncvt = n
+ }
+ iwork = ie + n
+
+ // Perform bidiagonal QR iteration computing right singular vectors
+ // of A in A if desired.
+ ok = impl.Dbdsqr(blas.Upper, n, ncvt, 0, 0, s, work[ie:],
+ a, lda, work, 1, work, 1, work[iwork:])
+
+ // If right singular vectors desired in VT, copy them there.
+ if wantvas {
+ impl.Dlacpy(blas.All, n, n, a, lda, vt, ldvt)
+ }
+ } else if wantuo && wantvn {
+ // Path 2
+ panic(noSVDO)
+ } else if wantuo && wantvas {
+ // Path 3
+ panic(noSVDO)
+ } else if wantus {
+ if wantvn {
+ // Path 4
+ if lwork >= n*n+max(4*n, bdspac) {
+ // Sufficient workspace for a fast algorithm.
+ ir := 0
+ var ldworkr int
+ if lwork >= wrkbl+lda*n {
+ ldworkr = lda
+ } else {
+ ldworkr = n
+ }
+ itau := ir + ldworkr*n
+ iwork := itau + n
+ // Compute A = Q * R.
+ impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
+
+ // Copy R to work[ir:], zeroing out below it.
+ impl.Dlacpy(blas.Upper, n, n, a, lda, work[ir:], ldworkr)
+ impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[ir+ldworkr:], ldworkr)
+
+ // Generate Q in A.
+ impl.Dorgqr(m, n, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
+ ie := itau
+ itauq := ie + n
+ itaup := itauq + n
+ iwork = itaup + n
+
+ // Bidiagonalize R in work[ir:].
+ impl.Dgebrd(n, n, work[ir:], ldworkr, s, work[ie:],
+ work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
+
+ // Generate left vectors bidiagonalizing R in work[ir:].
+ impl.Dorgbr(lapack.ApplyQ, n, n, n, work[ir:], ldworkr,
+ work[itauq:], work[iwork:], lwork-iwork)
+ iwork = ie + n
+
+ // Perform bidiagonal QR iteration, compuing left singular
+ // vectors of R in work[ir:].
+ ok = impl.Dbdsqr(blas.Upper, n, 0, n, 0, s, work[ie:], work, 1,
+ work[ir:], ldworkr, work, 1, work[iwork:])
+
+ // Multiply Q in A by left singular vectors of R in
+ // work[ir:], storing result in U.
+ bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, a, lda,
+ work[ir:], ldworkr, 0, u, ldu)
+ } else {
+ // Insufficient workspace for a fast algorithm.
+ itau := 0
+ iwork := itau + n
+
+ // Compute A = Q*R, copying result to U.
+ impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
+ impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
+
+ // Generate Q in U.
+ impl.Dorgqr(m, n, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
+ ie := itau
+ itauq := ie + n
+ itaup := itauq + n
+ iwork = itaup + n
+
+ // Zero out below R in A.
+ impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda)
+
+ // Bidiagonalize R in A.
+ impl.Dgebrd(n, n, a, lda, s, work[ie:],
+ work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
+
+ // Multiply Q in U by left vectors bidiagonalizing R.
+ impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n,
+ a, lda, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
+ iwork = ie + n
+
+ // Perform bidiagonal QR iteration, computing left
+ // singular vectors of A in U.
+ ok = impl.Dbdsqr(blas.Upper, n, 0, m, 0, s, work[ie:], work, 1,
+ u, ldu, work, 1, work[iwork:])
+ }
+ } else if wantvo {
+ // Path 5
+ panic(noSVDO)
+ } else if wantvas {
+ // Path 6
+ if lwork >= n*n+max(4*n, bdspac) {
+ // Sufficient workspace for a fast algorithm.
+ iu := 0
+ var ldworku int
+ if lwork >= wrkbl+lda*n {
+ ldworku = lda
+ } else {
+ ldworku = n
+ }
+ itau := iu + ldworku*n
+ iwork := itau + n
+
+ // Compute A = Q * R.
+ impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
+ // Copy R to work[iu:], zeroing out below it.
+ impl.Dlacpy(blas.Upper, n, n, a, lda, work[iu:], ldworku)
+ impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[iu+ldworku:], ldworku)
+
+ // Generate Q in A.
+ impl.Dorgqr(m, n, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
+
+ ie := itau
+ itauq := ie + n
+ itaup := itauq + n
+ iwork = itaup + n
+
+ // Bidiagonalize R in work[iu:], copying result to VT.
+ impl.Dgebrd(n, n, work[iu:], ldworku, s, work[ie:],
+ work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
+ impl.Dlacpy(blas.Upper, n, n, work[iu:], ldworku, vt, ldvt)
+
+ // Generate left bidiagonalizing vectors in work[iu:].
+ impl.Dorgbr(lapack.ApplyQ, n, n, n, work[iu:], ldworku,
+ work[itauq:], work[iwork:], lwork-iwork)
+
+ // Generate right bidiagonalizing vectors in VT.
+ impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt,
+ work[itaup:], work[iwork:], lwork-iwork)
+ iwork = ie + n
+
+ // Perform bidiagonal QR iteration, computing left singular
+ // vectors of R in work[iu:], and computing right singular
+ // vectors of R in VT.
+ ok = impl.Dbdsqr(blas.Upper, n, n, n, 0, s, work[ie:],
+ vt, ldvt, work[iu:], ldworku, work, 1, work[iwork:])
+
+ // Multiply Q in A by left singular vectors of R in
+ // work[iu:], storing result in U.
+ bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, a, lda,
+ work[iu:], ldworku, 0, u, ldu)
+ } else {
+ // Insufficient workspace for a fast algorithm.
+ itau := 0
+ iwork := itau + n
+
+ // Compute A = Q * R, copying result to U.
+ impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
+ impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
+
+ // Generate Q in U.
+ impl.Dorgqr(m, n, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
+
+ // Copy R to VT, zeroing out below it.
+ impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt)
+ impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, vt[ldvt:], ldvt)
+
+ ie := itau
+ itauq := ie + n
+ itaup := itauq + n
+ iwork = itaup + n
+
+ // Bidiagonalize R in VT.
+ impl.Dgebrd(n, n, vt, ldvt, s, work[ie:],
+ work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
+
+ // Multiply Q in U by left bidiagonalizing vectors in VT.
+ impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n,
+ vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
+
+ // Generate right bidiagonalizing vectors in VT.
+ impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt,
+ work[itaup:], work[iwork:], lwork-iwork)
+ iwork = ie + n
+
+ // Perform bidiagonal QR iteration, computing left singular
+ // vectors of A in U and computing right singular vectors
+ // of A in VT.
+ ok = impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:],
+ vt, ldvt, u, ldu, work, 1, work[iwork:])
+ }
+ }
+ } else if wantua {
+ if wantvn {
+ // Path 7
+ if lwork >= n*n+max(max(n+m, 4*n), bdspac) {
+ // Sufficient workspace for a fast algorithm.
+ ir := 0
+ var ldworkr int
+ if lwork >= wrkbl+lda*n {
+ ldworkr = lda
+ } else {
+ ldworkr = n
+ }
+ itau := ir + ldworkr*n
+ iwork := itau + n
+
+ // Compute A = Q*R, copying result to U.
+ impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
+ impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
+
+ // Copy R to work[ir:], zeroing out below it.
+ impl.Dlacpy(blas.Upper, n, n, a, lda, work[ir:], ldworkr)
+ impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[ir+ldworkr:], ldworkr)
+
+ // Generate Q in U.
+ impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
+ ie := itau
+ itauq := ie + n
+ itaup := itauq + n
+ iwork = itaup + n
+
+ // Bidiagonalize R in work[ir:].
+ impl.Dgebrd(n, n, work[ir:], ldworkr, s, work[ie:],
+ work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
+
+ // Generate left bidiagonalizing vectors in work[ir:].
+ impl.Dorgbr(lapack.ApplyQ, n, n, n, work[ir:], ldworkr,
+ work[itauq:], work[iwork:], lwork-iwork)
+ iwork = ie + n
+
+ // Perform bidiagonal QR iteration, computing left singular
+ // vectors of R in work[ir:].
+ ok = impl.Dbdsqr(blas.Upper, n, 0, n, 0, s, work[ie:], work, 1,
+ work[ir:], ldworkr, work, 1, work[iwork:])
+
+ // Multiply Q in U by left singular vectors of R in
+ // work[ir:], storing result in A.
+ bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, u, ldu,
+ work[ir:], ldworkr, 0, a, lda)
+
+ // Copy left singular vectors of A from A to U.
+ impl.Dlacpy(blas.All, m, n, a, lda, u, ldu)
+ } else {
+ // Insufficient workspace for a fast algorithm.
+ itau := 0
+ iwork := itau + n
+
+ // Compute A = Q*R, copying result to U.
+ impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
+ impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
+
+ // Generate Q in U.
+ impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
+ ie := itau
+ itauq := ie + n
+ itaup := itauq + n
+ iwork = itaup + n
+
+ // Zero out below R in A.
+ impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda)
+
+ // Bidiagonalize R in A.
+ impl.Dgebrd(n, n, a, lda, s, work[ie:],
+ work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
+
+ // Multiply Q in U by left bidiagonalizing vectors in A.
+ impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n,
+ a, lda, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
+ iwork = ie + n
+
+ // Perform bidiagonal QR iteration, computing left
+ // singular vectors of A in U.
+ ok = impl.Dbdsqr(blas.Upper, n, 0, m, 0, s, work[ie:],
+ work, 1, u, ldu, work, 1, work[iwork:])
+ }
+ } else if wantvo {
+ // Path 8.
+ panic(noSVDO)
+ } else if wantvas {
+ // Path 9.
+ if lwork >= n*n+max(max(n+m, 4*n), bdspac) {
+ // Sufficient workspace for a fast algorithm.
+ iu := 0
+ var ldworku int
+ if lwork >= wrkbl+lda*n {
+ ldworku = lda
+ } else {
+ ldworku = n
+ }
+ itau := iu + ldworku*n
+ iwork := itau + n
+
+ // Compute A = Q * R, copying result to U.
+ impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
+ impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
+
+ // Generate Q in U.
+ impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
+
+ // Copy R to work[iu:], zeroing out below it.
+ impl.Dlacpy(blas.Upper, n, n, a, lda, work[iu:], ldworku)
+ impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[iu+ldworku:], ldworku)
+
+ ie = itau
+ itauq := ie + n
+ itaup := itauq + n
+ iwork = itaup + n
+
+ // Bidiagonalize R in work[iu:], copying result to VT.
+ impl.Dgebrd(n, n, work[iu:], ldworku, s, work[ie:],
+ work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
+ impl.Dlacpy(blas.Upper, n, n, work[iu:], ldworku, vt, ldvt)
+
+ // Generate left bidiagonalizing vectors in work[iu:].
+ impl.Dorgbr(lapack.ApplyQ, n, n, n, work[iu:], ldworku,
+ work[itauq:], work[iwork:], lwork-iwork)
+
+ // Generate right bidiagonalizing vectors in VT.
+ impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt,
+ work[itaup:], work[iwork:], lwork-iwork)
+ iwork = ie + n
+
+ // Perform bidiagonal QR iteration, computing left singular
+ // vectors of R in work[iu:] and computing right
+ // singular vectors of R in VT.
+ ok = impl.Dbdsqr(blas.Upper, n, n, n, 0, s, work[ie:],
+ vt, ldvt, work[iu:], ldworku, work, 1, work[iwork:])
+
+ // Multiply Q in U by left singular vectors of R in
+ // work[iu:], storing result in A.
+ bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1,
+ u, ldu, work[iu:], ldworku, 0, a, lda)
+
+ // Copy left singular vectors of A from A to U.
+ impl.Dlacpy(blas.All, m, n, a, lda, u, ldu)
+
+ /*
+ // Bidiagonalize R in VT.
+ impl.Dgebrd(n, n, vt, ldvt, s, work[ie:],
+ work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
+
+ // Multiply Q in U by left bidiagonalizing vectors in VT.
+ impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans,
+ m, n, n, vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
+
+ // Generate right bidiagonalizing vectors in VT.
+ impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt,
+ work[itaup:], work[iwork:], lwork-iwork)
+ iwork = ie + n
+
+ // Perform bidiagonal QR iteration, computing left singular
+ // vectors of A in U and computing right singular vectors
+ // of A in VT.
+ ok = impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:],
+ vt, ldvt, u, ldu, work, 1, work[iwork:])
+ */
+ } else {
+ // Insufficient workspace for a fast algorithm.
+ itau := 0
+ iwork := itau + n
+
+ // Compute A = Q*R, copying result to U.
+ impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
+ impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
+
+ // Generate Q in U.
+ impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
+
+ // Copy R from A to VT, zeroing out below it.
+ impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt)
+ impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, vt[ldvt:], ldvt)
+
+ ie := itau
+ itauq := ie + n
+ itaup := itauq + n
+ iwork = itaup + n
+
+ // Bidiagonalize R in VT.
+ impl.Dgebrd(n, n, vt, ldvt, s, work[ie:],
+ work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
+
+ // Multiply Q in U by left bidiagonalizing vectors in VT.
+ impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans,
+ m, n, n, vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
+
+ // Generate right bidiagonizing vectors in VT.
+ impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt,
+ work[itaup:], work[iwork:], lwork-iwork)
+ iwork = ie + n
+
+ // Perform bidiagonal QR iteration, computing left singular
+ // vectors of A in U and computing right singular vectors
+ // of A in VT.
+ impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:],
+ vt, ldvt, u, ldu, work, 1, work[iwork:])
+ }
+ }
+ }
+ } else {
+ // Path 10.
+ // M at least N, but not much larger.
+ ie = 0
+ itauq := ie + n
+ itaup := itauq + n
+ iwork := itaup + n
+
+ // Bidiagonalize A.
+ impl.Dgebrd(m, n, a, lda, s, work[ie:], work[itauq:],
+ work[itaup:], work[iwork:], lwork-iwork)
+ if wantuas {
+ // Left singular vectors are desired in U. Copy result to U and
+ // generate left biadiagonalizing vectors in U.
+ impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
+ var ncu int
+ if wantus {
+ ncu = n
+ }
+ if wantua {
+ ncu = m
+ }
+ impl.Dorgbr(lapack.ApplyQ, m, ncu, n, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
+ }
+ if wantvas {
+ // Right singular vectors are desired in VT. Copy result to VT and
+ // generate left biadiagonalizing vectors in VT.
+ impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt)
+ impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt, work[itaup:], work[iwork:], lwork-iwork)
+ }
+ if wantuo {
+ panic(noSVDO)
+ }
+ if wantvo {
+ panic(noSVDO)
+ }
+ iwork = ie + n
+ var nru, ncvt int
+ if wantuas || wantuo {
+ nru = m
+ }
+ if wantun {
+ nru = 0
+ }
+ if wantvas || wantvo {
+ ncvt = n
+ }
+ if wantvn {
+ ncvt = 0
+ }
+ if !wantuo && !wantvo {
+ // Perform bidiagonal QR iteration, if desired, computing left
+ // singular vectors in U and right singular vectors in VT.
+ ok = impl.Dbdsqr(blas.Upper, n, ncvt, nru, 0, s, work[ie:],
+ vt, ldvt, u, ldu, work, 1, work[iwork:])
+ } else {
+ // There will be two branches when the implementation is complete.
+ panic(noSVDO)
+ }
+ }
+ } else {
+ // A has more columns than rows. If A has sufficiently more columns than
+ // rows, first reduce using the LQ decomposition.
+ if n >= mnthr {
+ // n >> m.
+ if wantvn {
+ // Path 1t.
+ itau := 0
+ iwork := itau + m
+
+ // Compute A = L*Q.
+ impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
+
+ // Zero out above L.
+ impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda)
+ ie := 0
+ itauq := ie + m
+ itaup := itauq + m
+ iwork = itaup + m
+
+ // Bidiagonalize L in A.
+ impl.Dgebrd(m, m, a, lda, s, work[ie:itauq],
+ work[itauq:itaup], work[itaup:iwork], work[iwork:], lwork-iwork)
+ if wantuo || wantuas {
+ impl.Dorgbr(lapack.ApplyQ, m, m, m, a, lda,
+ work[itauq:], work[iwork:], lwork-iwork)
+ }
+ iwork = ie + m
+ nru := 0
+ if wantuo || wantuas {
+ nru = m
+ }
+
+ // Perform bidiagonal QR iteration, computing left singular vectors
+ // of A in A if desired.
+ ok = impl.Dbdsqr(blas.Upper, m, 0, nru, 0, s, work[ie:],
+ work, 1, a, lda, work, 1, work[iwork:])
+
+ // If left singular vectors desired in U, copy them there.
+ if wantuas {
+ impl.Dlacpy(blas.All, m, m, a, lda, u, ldu)
+ }
+ } else if wantvo && wantun {
+ // Path 2t.
+ panic(noSVDO)
+ } else if wantvo && wantuas {
+ // Path 3t.
+ panic(noSVDO)
+ } else if wantvs {
+ if wantun {
+ // Path 4t.
+ if lwork >= m*m+max(4*m, bdspac) {
+ // Sufficient workspace for a fast algorithm.
+ ir := 0
+ var ldworkr int
+ if lwork >= wrkbl+lda*m {
+ ldworkr = lda
+ } else {
+ ldworkr = m
+ }
+ itau := ir + ldworkr*m
+ iwork := itau + m
+
+ // Compute A = L*Q.
+ impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
+
+ // Copy L to work[ir:], zeroing out above it.
+ impl.Dlacpy(blas.Lower, m, m, a, lda, work[ir:], ldworkr)
+ impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[ir+1:], ldworkr)
+
+ // Generate Q in A.
+ impl.Dorglq(m, n, m, a, lda, work[itau:], work[iwork:], lwork-iwork)
+ ie := itau
+ itauq := ie + m
+ itaup := itauq + m
+ iwork = itaup + m
+
+ // Bidiagonalize L in work[ir:].
+ impl.Dgebrd(m, m, work[ir:], ldworkr, s, work[ie:],
+ work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
+
+ // Generate right vectors bidiagonalizing L in work[ir:].
+ impl.Dorgbr(lapack.ApplyP, m, m, m, work[ir:], ldworkr,
+ work[itaup:], work[iwork:], lwork-iwork)
+ iwork = ie + m
+
+ // Perform bidiagonal QR iteration, computing right singular
+ // vectors of L in work[ir:].
+ ok = impl.Dbdsqr(blas.Upper, m, m, 0, 0, s, work[ie:],
+ work[ir:], ldworkr, work, 1, work, 1, work[iwork:])
+
+ // Multiply right singular vectors of L in work[ir:] by
+ // Q in A, storing result in VT.
+ bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1,
+ work[ir:], ldworkr, a, lda, 0, vt, ldvt)
+ } else {
+ // Insufficient workspace for a fast algorithm.
+ itau := 0
+ iwork := itau + m
+
+ // Compute A = L*Q.
+ impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
+
+ // Copy result to VT.
+ impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
+
+ // Generate Q in VT.
+ impl.Dorglq(m, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
+ ie := itau
+ itauq := ie + m
+ itaup := itauq + m
+ iwork = itaup + m
+
+ // Zero out above L in A.
+ impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda)
+
+ // Bidiagonalize L in A.
+ impl.Dgebrd(m, m, a, lda, s, work[ie:],
+ work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
+
+ // Multiply right vectors bidiagonalizing L by Q in VT.
+ impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m,
+ a, lda, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork)
+ iwork = ie + m
+
+ // Perform bidiagonal QR iteration, computing right
+ // singular vectors of A in VT.
+ ok = impl.Dbdsqr(blas.Upper, m, n, 0, 0, s, work[ie:],
+ vt, ldvt, work, 1, work, 1, work[iwork:])
+ }
+ } else if wantuo {
+ // Path 5t.
+ panic(noSVDO)
+ } else if wantuas {
+ // Path 6t.
+ if lwork >= m*m+max(4*m, bdspac) {
+ // Sufficient workspace for a fast algorithm.
+ iu := 0
+ var ldworku int
+ if lwork >= wrkbl+lda*m {
+ ldworku = lda
+ } else {
+ ldworku = m
+ }
+ itau := iu + ldworku*m
+ iwork := itau + m
+
+ // Compute A = L*Q.
+ impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
+
+ // Copy L to work[iu:], zeroing out above it.
+ impl.Dlacpy(blas.Lower, m, m, a, lda, work[iu:], ldworku)
+ impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[iu+1:], ldworku)
+
+ // Generate Q in A.
+ impl.Dorglq(m, n, m, a, lda, work[itau:], work[iwork:], lwork-iwork)
+ ie := itau
+ itauq := ie + m
+ itaup := itauq + m
+ iwork = itaup + m
+
+ // Bidiagonalize L in work[iu:], copying result to U.
+ impl.Dgebrd(m, m, work[iu:], ldworku, s, work[ie:],
+ work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
+ impl.Dlacpy(blas.Lower, m, m, work[iu:], ldworku, u, ldu)
+
+ // Generate right bidiagionalizing vectors in work[iu:].
+ impl.Dorgbr(lapack.ApplyP, m, m, m, work[iu:], ldworku,
+ work[itaup:], work[iwork:], lwork-iwork)
+
+ // Generate left bidiagonalizing vectors in U.
+ impl.Dorgbr(lapack.ApplyQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
+ iwork = ie + m
+
+ // Perform bidiagonal QR iteration, computing left singular
+ // vectors of L in U and computing right singular vectors of
+ // L in work[iu:].
+ ok = impl.Dbdsqr(blas.Upper, m, m, m, 0, s, work[ie:],
+ work[iu:], ldworku, u, ldu, work, 1, work[iwork:])
+
+ // Multiply right singular vectors of L in work[iu:] by
+ // Q in A, storing result in VT.
+ bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1,
+ work[iu:], ldworku, a, lda, 0, vt, ldvt)
+ } else {
+ // Insufficient workspace for a fast algorithm.
+ itau := 0
+ iwork := itau + m
+
+ // Compute A = L*Q, copying result to VT.
+ impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
+ impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
+
+ // Generate Q in VT.
+ impl.Dorglq(m, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
+
+ // Copy L to U, zeroing out above it.
+ impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu)
+ impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, u[1:], ldu)
+
+ ie := itau
+ itauq := ie + m
+ itaup := itauq + m
+ iwork = itaup + m
+
+ // Bidiagonalize L in U.
+ impl.Dgebrd(m, m, u, ldu, s, work[ie:],
+ work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
+
+ // Multiply right bidiagonalizing vectors in U by Q in VT.
+ impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m,
+ u, ldu, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork)
+
+ // Generate left bidiagonalizing vectors in U.
+ impl.Dorgbr(lapack.ApplyQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
+ iwork = ie + m
+
+ // Perform bidiagonal QR iteration, computing left singular
+ // vectors of A in U and computing right singular vectors
+ // of A in VT.
+ impl.Dbdsqr(blas.Upper, m, n, m, 0, s, work[ie:], vt, ldvt,
+ u, ldu, work, 1, work[iwork:])
+ }
+ }
+ } else if wantva {
+ if wantun {
+ // Path 7t.
+ if lwork >= m*m+max(max(n+m, 4*m), bdspac) {
+ // Sufficient workspace for a fast algorithm.
+ ir := 0
+ var ldworkr int
+ if lwork >= wrkbl+lda*m {
+ ldworkr = lda
+ } else {
+ ldworkr = m
+ }
+ itau := ir + ldworkr*m
+ iwork := itau + m
+
+ // Compute A = L*Q, copying result to VT.
+ impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
+ impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
+
+ // Copy L to work[ir:], zeroing out above it.
+ impl.Dlacpy(blas.Lower, m, m, a, lda, work[ir:], ldworkr)
+ impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[ir+1:], ldworkr)
+
+ // Generate Q in VT.
+ impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
+
+ ie := itau
+ itauq := ie + m
+ itaup := itauq + m
+ iwork = itaup + m
+
+ // Bidiagonalize L in work[ir:].
+ impl.Dgebrd(m, m, work[ir:], ldworkr, s, work[ie:],
+ work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
+
+ // Generate right bidiagonalizing vectors in work[ir:].
+ impl.Dorgbr(lapack.ApplyP, m, m, m, work[ir:], ldworkr,
+ work[itaup:], work[iwork:], lwork-iwork)
+ iwork = ie + m
+
+ // Perform bidiagonal QR iteration, computing right
+ // singular vectors of L in work[ir:].
+ ok = impl.Dbdsqr(blas.Upper, m, m, 0, 0, s, work[ie:],
+ work[ir:], ldworkr, work, 1, work, 1, work[iwork:])
+
+ // Multiply right singular vectors of L in work[ir:] by
+ // Q in VT, storing result in A.
+ bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1,
+ work[ir:], ldworkr, vt, ldvt, 0, a, lda)
+
+ // Copy right singular vectors of A from A to VT.
+ impl.Dlacpy(blas.All, m, n, a, lda, vt, ldvt)
+ } else {
+ // Insufficient workspace for a fast algorithm.
+ itau := 0
+ iwork := itau + m
+ // Compute A = L * Q, copying result to VT.
+ impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
+ impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
+
+ // Generate Q in VT.
+ impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
+
+ ie := itau
+ itauq := ie + m
+ itaup := itauq + m
+ iwork = itaup + m
+
+ // Zero out above L in A.
+ impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda)
+
+ // Bidiagonalize L in A.
+ impl.Dgebrd(m, m, a, lda, s, work[ie:], work[itauq:],
+ work[itaup:], work[iwork:], lwork-iwork)
+
+ // Multiply right bidiagonalizing vectors in A by Q in VT.
+ impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m,
+ a, lda, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork)
+ iwork = ie + m
+
+ // Perform bidiagonal QR iteration, computing right singular
+ // vectors of A in VT.
+ ok = impl.Dbdsqr(blas.Upper, m, n, 0, 0, s, work[ie:],
+ vt, ldvt, work, 1, work, 1, work[iwork:])
+ }
+ } else if wantuo {
+ panic(noSVDO)
+ } else if wantuas {
+ // Path 9t.
+ if lwork >= m*m+max(max(m+n, 4*m), bdspac) {
+ // Sufficient workspace for a fast algorithm.
+ iu := 0
+
+ var ldworku int
+ if lwork >= wrkbl+lda*m {
+ ldworku = lda
+ } else {
+ ldworku = m
+ }
+ itau := iu + ldworku*m
+ iwork := itau + m
+
+ // Generate A = L * Q copying result to VT.
+ impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
+ impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
+
+ // Generate Q in VT.
+ impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
+
+ // Copy L to work[iu:], zeroing out above it.
+ impl.Dlacpy(blas.Lower, m, m, a, lda, work[iu:], ldworku)
+ impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[iu+1:], ldworku)
+ ie = itau
+ itauq := ie + m
+ itaup := itauq + m
+ iwork = itaup + m
+
+ // Bidiagonalize L in work[iu:], copying result to U.
+ impl.Dgebrd(m, m, work[iu:], ldworku, s, work[ie:],
+ work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
+ impl.Dlacpy(blas.Lower, m, m, work[iu:], ldworku, u, ldu)
+
+ // Generate right bidiagonalizing vectors in work[iu:].
+ impl.Dorgbr(lapack.ApplyP, m, m, m, work[iu:], ldworku,
+ work[itaup:], work[iwork:], lwork-iwork)
+
+ // Generate left bidiagonalizing vectors in U.
+ impl.Dorgbr(lapack.ApplyQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
+ iwork = ie + m
+
+ // Perform bidiagonal QR iteration, computing left singular
+ // vectors of L in U and computing right singular vectors
+ // of L in work[iu:].
+ ok = impl.Dbdsqr(blas.Upper, m, m, m, 0, s, work[ie:],
+ work[iu:], ldworku, u, ldu, work, 1, work[iwork:])
+
+ // Multiply right singular vectors of L in work[iu:]
+ // Q in VT, storing result in A.
+ bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1,
+ work[iu:], ldworku, vt, ldvt, 0, a, lda)
+
+ // Copy right singular vectors of A from A to VT.
+ impl.Dlacpy(blas.All, m, n, a, lda, vt, ldvt)
+ } else {
+ // Insufficient workspace for a fast algorithm.
+ itau := 0
+ iwork := itau + m
+
+ // Compute A = L * Q, copying result to VT.
+ impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
+ impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
+
+ // Generate Q in VT.
+ impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
+
+ // Copy L to U, zeroing out above it.
+ impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu)
+ impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, u[1:], ldu)
+
+ ie = itau
+ itauq := ie + m
+ itaup := itauq + m
+ iwork = itaup + m
+
+ // Bidiagonalize L in U.
+ impl.Dgebrd(m, m, u, ldu, s, work[ie:], work[itauq:],
+ work[itaup:], work[iwork:], lwork-iwork)
+
+ // Multiply right bidiagonalizing vectors in U by Q in VT.
+ impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m,
+ u, ldu, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork)
+
+ // Generate left bidiagonalizing vectors in U.
+ impl.Dorgbr(lapack.ApplyQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
+ iwork = ie + m
+
+ // Perform bidiagonal QR iteration, computing left singular
+ // vectors of A in U and computing right singular vectors
+ // of A in VT.
+ ok = impl.Dbdsqr(blas.Upper, m, n, m, 0, s, work[ie:],
+ vt, ldvt, u, ldu, work, 1, work[iwork:])
+ }
+ }
+ }
+ } else {
+ // Path 10t.
+ // N at least M, but not much larger.
+ ie = 0
+ itauq := ie + m
+ itaup := itauq + m
+ iwork := itaup + m
+
+ // Bidiagonalize A.
+ impl.Dgebrd(m, n, a, lda, s, work[ie:], work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
+ if wantuas {
+ // If left singular vectors desired in U, copy result to U and
+ // generate left bidiagonalizing vectors in U.
+ impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu)
+ impl.Dorgbr(lapack.ApplyQ, m, m, n, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
+ }
+ if wantvas {
+ // If right singular vectors desired in VT, copy result to VT
+ // and generate right bidiagonalizing vectors in VT.
+ impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
+ var nrvt int
+ if wantva {
+ nrvt = n
+ } else {
+ nrvt = m
+ }
+ impl.Dorgbr(lapack.ApplyP, nrvt, n, m, vt, ldvt, work[itaup:], work[iwork:], lwork-iwork)
+ }
+ if wantuo {
+ panic(noSVDO)
+ }
+ if wantvo {
+ panic(noSVDO)
+ }
+ iwork = ie + m
+ var nru, ncvt int
+ if wantuas || wantuo {
+ nru = m
+ }
+ if wantvas || wantvo {
+ ncvt = n
+ }
+ if !wantuo && !wantvo {
+ // Perform bidiagonal QR iteration, if desired, computing left
+ // singular vectors in U and computing right singular vectors in
+ // VT.
+ ok = impl.Dbdsqr(blas.Lower, m, ncvt, nru, 0, s, work[ie:],
+ vt, ldvt, u, ldu, work, 1, work[iwork:])
+ } else {
+ // There will be two branches when the implementation is complete.
+ panic(noSVDO)
+ }
+ }
+ }
+ if !ok {
+ if ie > 1 {
+ for i := 0; i < minmn-1; i++ {
+ work[i+1] = work[i+ie]
+ }
+ }
+ if ie < 1 {
+ for i := minmn - 2; i >= 0; i-- {
+ work[i+1] = work[i+ie]
+ }
+ }
+ }
+ // Undo scaling if necessary.
+ if iscl {
+ if anrm > bignum {
+ impl.Dlascl(lapack.General, 0, 0, bignum, anrm, minmn, 1, s, minmn)
+ }
+ if !ok && anrm > bignum {
+ impl.Dlascl(lapack.General, 0, 0, bignum, anrm, minmn-1, 1, work[minmn:], minmn)
+ }
+ if anrm < smlnum {
+ impl.Dlascl(lapack.General, 0, 0, smlnum, anrm, minmn, 1, s, minmn)
+ }
+ if !ok && anrm < smlnum {
+ impl.Dlascl(lapack.General, 0, 0, smlnum, anrm, minmn-1, 1, work[minmn:], minmn)
+ }
+ }
+ work[0] = float64(maxwrk)
+ return ok
+}