OSDN Git Service

new repo
[bytom/vapor.git] / vendor / gonum.org / v1 / gonum / lapack / gonum / dgesvd.go
1 // Copyright ©2015 The Gonum Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package gonum
6
7 import (
8         "math"
9
10         "gonum.org/v1/gonum/blas"
11         "gonum.org/v1/gonum/blas/blas64"
12         "gonum.org/v1/gonum/lapack"
13 )
14
15 const noSVDO = "dgesvd: not coded for overwrite"
16
17 // Dgesvd computes the singular value decomposition of the input matrix A.
18 //
19 // The singular value decomposition is
20 //  A = U * Sigma * V^T
21 // where Sigma is an m×n diagonal matrix containing the singular values of A,
22 // U is an m×m orthogonal matrix and V is an n×n orthogonal matrix. The first
23 // min(m,n) columns of U and V are the left and right singular vectors of A
24 // respectively.
25 //
26 // jobU and jobVT are options for computing the singular vectors. The behavior
27 // is as follows
28 //  jobU == lapack.SVDAll       All m columns of U are returned in u
29 //  jobU == lapack.SVDInPlace   The first min(m,n) columns are returned in u
30 //  jobU == lapack.SVDOverwrite The first min(m,n) columns of U are written into a
31 //  jobU == lapack.SVDNone      The columns of U are not computed.
32 // The behavior is the same for jobVT and the rows of V^T. At most one of jobU
33 // and jobVT can equal lapack.SVDOverwrite, and Dgesvd will panic otherwise.
34 //
35 // On entry, a contains the data for the m×n matrix A. During the call to Dgesvd
36 // the data is overwritten. On exit, A contains the appropriate singular vectors
37 // if either job is lapack.SVDOverwrite.
38 //
39 // s is a slice of length at least min(m,n) and on exit contains the singular
40 // values in decreasing order.
41 //
42 // u contains the left singular vectors on exit, stored column-wise. If
43 // jobU == lapack.SVDAll, u is of size m×m. If jobU == lapack.SVDInPlace u is
44 // of size m×min(m,n). If jobU == lapack.SVDOverwrite or lapack.SVDNone, u is
45 // not used.
46 //
47 // vt contains the left singular vectors on exit, stored row-wise. If
48 // jobV == lapack.SVDAll, vt is of size n×m. If jobVT == lapack.SVDInPlace vt is
49 // of size min(m,n)×n. If jobVT == lapack.SVDOverwrite or lapack.SVDNone, vt is
50 // not used.
51 //
52 // work is a slice for storing temporary memory, and lwork is the usable size of
53 // the slice. lwork must be at least max(5*min(m,n), 3*min(m,n)+max(m,n)).
54 // If lwork == -1, instead of performing Dgesvd, the optimal work length will be
55 // stored into work[0]. Dgesvd will panic if the working memory has insufficient
56 // storage.
57 //
58 // Dgesvd returns whether the decomposition successfully completed.
59 func (impl Implementation) Dgesvd(jobU, jobVT lapack.SVDJob, m, n int, a []float64, lda int, s, u []float64, ldu int, vt []float64, ldvt int, work []float64, lwork int) (ok bool) {
60         minmn := min(m, n)
61         checkMatrix(m, n, a, lda)
62         if jobU == lapack.SVDAll {
63                 checkMatrix(m, m, u, ldu)
64         } else if jobU == lapack.SVDInPlace {
65                 checkMatrix(m, minmn, u, ldu)
66         }
67         if jobVT == lapack.SVDAll {
68                 checkMatrix(n, n, vt, ldvt)
69         } else if jobVT == lapack.SVDInPlace {
70                 checkMatrix(minmn, n, vt, ldvt)
71         }
72         if jobU == lapack.SVDOverwrite && jobVT == lapack.SVDOverwrite {
73                 panic("lapack: both jobU and jobVT are lapack.SVDOverwrite")
74         }
75         if len(s) < minmn {
76                 panic(badS)
77         }
78         if jobU == lapack.SVDOverwrite || jobVT == lapack.SVDOverwrite {
79                 panic(noSVDO)
80         }
81         if m == 0 || n == 0 {
82                 return true
83         }
84
85         wantua := jobU == lapack.SVDAll
86         wantus := jobU == lapack.SVDInPlace
87         wantuas := wantua || wantus
88         wantuo := jobU == lapack.SVDOverwrite
89         wantun := jobU == lapack.None
90
91         wantva := jobVT == lapack.SVDAll
92         wantvs := jobVT == lapack.SVDInPlace
93         wantvas := wantva || wantvs
94         wantvo := jobVT == lapack.SVDOverwrite
95         wantvn := jobVT == lapack.None
96
97         bi := blas64.Implementation()
98         var mnthr int
99
100         // Compute optimal space for subroutines.
101         maxwrk := 1
102         opts := string(jobU) + string(jobVT)
103         var wrkbl, bdspac int
104         if m >= n {
105                 mnthr = impl.Ilaenv(6, "DGESVD", opts, m, n, 0, 0)
106                 bdspac = 5 * n
107                 impl.Dgeqrf(m, n, a, lda, nil, work, -1)
108                 lwork_dgeqrf := int(work[0])
109                 impl.Dorgqr(m, n, n, a, lda, nil, work, -1)
110                 lwork_dorgqr_n := int(work[0])
111                 impl.Dorgqr(m, m, n, a, lda, nil, work, -1)
112                 lwork_dorgqr_m := int(work[0])
113                 impl.Dgebrd(n, n, a, lda, s, nil, nil, nil, work, -1)
114                 lwork_dgebrd := int(work[0])
115                 impl.Dorgbr(lapack.ApplyP, n, n, n, a, lda, nil, work, -1)
116                 lwork_dorgbr_p := int(work[0])
117                 impl.Dorgbr(lapack.ApplyQ, n, n, n, a, lda, nil, work, -1)
118                 lwork_dorgbr_q := int(work[0])
119
120                 if m >= mnthr {
121                         // m >> n
122                         if wantun {
123                                 // Path 1
124                                 maxwrk = n + lwork_dgeqrf
125                                 maxwrk = max(maxwrk, 3*n+lwork_dgebrd)
126                                 if wantvo || wantvas {
127                                         maxwrk = max(maxwrk, 3*n+lwork_dorgbr_p)
128                                 }
129                                 maxwrk = max(maxwrk, bdspac)
130                         } else if wantuo && wantvn {
131                                 // Path 2
132                                 wrkbl = n + lwork_dgeqrf
133                                 wrkbl = max(wrkbl, n+lwork_dorgqr_n)
134                                 wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
135                                 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
136                                 wrkbl = max(wrkbl, bdspac)
137                                 maxwrk = max(n*n+wrkbl, n*n+m*n+n)
138                         } else if wantuo && wantvs {
139                                 // Path 3
140                                 wrkbl = n + lwork_dgeqrf
141                                 wrkbl = max(wrkbl, n+lwork_dorgqr_n)
142                                 wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
143                                 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
144                                 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
145                                 wrkbl = max(wrkbl, bdspac)
146                                 maxwrk = max(n*n+wrkbl, n*n+m*n+n)
147                         } else if wantus && wantvn {
148                                 // Path 4
149                                 wrkbl = n + lwork_dgeqrf
150                                 wrkbl = max(wrkbl, n+lwork_dorgqr_n)
151                                 wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
152                                 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
153                                 wrkbl = max(wrkbl, bdspac)
154                                 maxwrk = n*n + wrkbl
155                         } else if wantus && wantvo {
156                                 // Path 5
157                                 wrkbl = n + lwork_dgeqrf
158                                 wrkbl = max(wrkbl, n+lwork_dorgqr_n)
159                                 wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
160                                 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
161                                 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
162                                 wrkbl = max(wrkbl, bdspac)
163                                 maxwrk = 2*n*n + wrkbl
164                         } else if wantus && wantvas {
165                                 // Path 6
166                                 wrkbl = n + lwork_dgeqrf
167                                 wrkbl = max(wrkbl, n+lwork_dorgqr_n)
168                                 wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
169                                 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
170                                 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
171                                 wrkbl = max(wrkbl, bdspac)
172                                 maxwrk = n*n + wrkbl
173                         } else if wantua && wantvn {
174                                 // Path 7
175                                 wrkbl = n + lwork_dgeqrf
176                                 wrkbl = max(wrkbl, n+lwork_dorgqr_m)
177                                 wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
178                                 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
179                                 wrkbl = max(wrkbl, bdspac)
180                                 maxwrk = n*n + wrkbl
181                         } else if wantua && wantvo {
182                                 // Path 8
183                                 wrkbl = n + lwork_dgeqrf
184                                 wrkbl = max(wrkbl, n+lwork_dorgqr_m)
185                                 wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
186                                 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
187                                 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
188                                 wrkbl = max(wrkbl, bdspac)
189                                 maxwrk = 2*n*n + wrkbl
190                         } else if wantua && wantvas {
191                                 // Path 9
192                                 wrkbl = n + lwork_dgeqrf
193                                 wrkbl = max(wrkbl, n+lwork_dorgqr_m)
194                                 wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
195                                 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
196                                 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
197                                 wrkbl = max(wrkbl, bdspac)
198                                 maxwrk = n*n + wrkbl
199                         }
200                 } else {
201                         // Path 10: m > n
202                         impl.Dgebrd(m, n, a, lda, s, nil, nil, nil, work, -1)
203                         lwork_dgebrd := int(work[0])
204                         maxwrk = 3*n + lwork_dgebrd
205                         if wantus || wantuo {
206                                 impl.Dorgbr(lapack.ApplyQ, m, n, n, a, lda, nil, work, -1)
207                                 lwork_dorgbr_q = int(work[0])
208                                 maxwrk = max(maxwrk, 3*n+lwork_dorgbr_q)
209                         }
210                         if wantua {
211                                 impl.Dorgbr(lapack.ApplyQ, m, m, n, a, lda, nil, work, -1)
212                                 lwork_dorgbr_q := int(work[0])
213                                 maxwrk = max(maxwrk, 3*n+lwork_dorgbr_q)
214                         }
215                         if !wantvn {
216                                 maxwrk = max(maxwrk, 3*n+lwork_dorgbr_p)
217                         }
218                         maxwrk = max(maxwrk, bdspac)
219                 }
220         } else {
221                 mnthr = impl.Ilaenv(6, "DGESVD", opts, m, n, 0, 0)
222
223                 bdspac = 5 * m
224                 impl.Dgelqf(m, n, a, lda, nil, work, -1)
225                 lwork_dgelqf := int(work[0])
226                 impl.Dorglq(n, n, m, nil, n, nil, work, -1)
227                 lwork_dorglq_n := int(work[0])
228                 impl.Dorglq(m, n, m, a, lda, nil, work, -1)
229                 lwork_dorglq_m := int(work[0])
230                 impl.Dgebrd(m, m, a, lda, s, nil, nil, nil, work, -1)
231                 lwork_dgebrd := int(work[0])
232                 impl.Dorgbr(lapack.ApplyP, m, m, m, a, n, nil, work, -1)
233                 lwork_dorgbr_p := int(work[0])
234                 impl.Dorgbr(lapack.ApplyQ, m, m, m, a, n, nil, work, -1)
235                 lwork_dorgbr_q := int(work[0])
236                 if n >= mnthr {
237                         // n >> m
238                         if wantvn {
239                                 // Path 1t
240                                 maxwrk = m + lwork_dgelqf
241                                 maxwrk = max(maxwrk, 3*m+lwork_dgebrd)
242                                 if wantuo || wantuas {
243                                         maxwrk = max(maxwrk, 3*m+lwork_dorgbr_q)
244                                 }
245                                 maxwrk = max(maxwrk, bdspac)
246                         } else if wantvo && wantun {
247                                 // Path 2t
248                                 wrkbl = m + lwork_dgelqf
249                                 wrkbl = max(wrkbl, m+lwork_dorglq_m)
250                                 wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
251                                 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
252                                 wrkbl = max(wrkbl, bdspac)
253                                 maxwrk = max(m*m+wrkbl, m*m+m*n+m)
254                         } else if wantvo && wantuas {
255                                 // Path 3t
256                                 wrkbl = m + lwork_dgelqf
257                                 wrkbl = max(wrkbl, m+lwork_dorglq_m)
258                                 wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
259                                 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
260                                 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
261                                 wrkbl = max(wrkbl, bdspac)
262                                 maxwrk = max(m*m+wrkbl, m*m+m*n+m)
263                         } else if wantvs && wantun {
264                                 // Path 4t
265                                 wrkbl = m + lwork_dgelqf
266                                 wrkbl = max(wrkbl, m+lwork_dorglq_m)
267                                 wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
268                                 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
269                                 wrkbl = max(wrkbl, bdspac)
270                                 maxwrk = m*m + wrkbl
271                         } else if wantvs && wantuo {
272                                 // Path 5t
273                                 wrkbl = m + lwork_dgelqf
274                                 wrkbl = max(wrkbl, m+lwork_dorglq_m)
275                                 wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
276                                 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
277                                 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
278                                 wrkbl = max(wrkbl, bdspac)
279                                 maxwrk = 2*m*m + wrkbl
280                         } else if wantvs && wantuas {
281                                 // Path 6t
282                                 wrkbl = m + lwork_dgelqf
283                                 wrkbl = max(wrkbl, m+lwork_dorglq_m)
284                                 wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
285                                 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
286                                 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
287                                 wrkbl = max(wrkbl, bdspac)
288                                 maxwrk = m*m + wrkbl
289                         } else if wantva && wantun {
290                                 // Path 7t
291                                 wrkbl = m + lwork_dgelqf
292                                 wrkbl = max(wrkbl, m+lwork_dorglq_n)
293                                 wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
294                                 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
295                                 wrkbl = max(wrkbl, bdspac)
296                                 maxwrk = m*m + wrkbl
297                         } else if wantva && wantuo {
298                                 // Path 8t
299                                 wrkbl = m + lwork_dgelqf
300                                 wrkbl = max(wrkbl, m+lwork_dorglq_n)
301                                 wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
302                                 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
303                                 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
304                                 wrkbl = max(wrkbl, bdspac)
305                                 maxwrk = 2*m*m + wrkbl
306                         } else if wantva && wantuas {
307                                 // Path 9t
308                                 wrkbl = m + lwork_dgelqf
309                                 wrkbl = max(wrkbl, m+lwork_dorglq_n)
310                                 wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
311                                 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
312                                 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
313                                 wrkbl = max(wrkbl, bdspac)
314                                 maxwrk = m*m + wrkbl
315                         }
316                 } else {
317                         // Path 10t, n > m
318                         impl.Dgebrd(m, n, a, lda, s, nil, nil, nil, work, -1)
319                         lwork_dgebrd = int(work[0])
320                         maxwrk = 3*m + lwork_dgebrd
321                         if wantvs || wantvo {
322                                 impl.Dorgbr(lapack.ApplyP, m, n, m, a, n, nil, work, -1)
323                                 lwork_dorgbr_p = int(work[0])
324                                 maxwrk = max(maxwrk, 3*m+lwork_dorgbr_p)
325                         }
326                         if wantva {
327                                 impl.Dorgbr(lapack.ApplyP, n, n, m, a, n, nil, work, -1)
328                                 lwork_dorgbr_p = int(work[0])
329                                 maxwrk = max(maxwrk, 3*m+lwork_dorgbr_p)
330                         }
331                         if !wantun {
332                                 maxwrk = max(maxwrk, 3*m+lwork_dorgbr_q)
333                         }
334                         maxwrk = max(maxwrk, bdspac)
335                 }
336         }
337
338         minWork := max(1, 5*minmn)
339         if !((wantun && m >= mnthr) || (wantvn && n >= mnthr)) {
340                 minWork = max(minWork, 3*minmn+max(m, n))
341         }
342
343         if lwork != -1 {
344                 if len(work) < lwork {
345                         panic(badWork)
346                 }
347                 if lwork < minWork {
348                         panic(badWork)
349                 }
350         }
351         if m == 0 || n == 0 {
352                 return true
353         }
354
355         maxwrk = max(maxwrk, minWork)
356         work[0] = float64(maxwrk)
357         if lwork == -1 {
358                 return true
359         }
360
361         // Perform decomposition.
362         eps := dlamchE
363         smlnum := math.Sqrt(dlamchS) / eps
364         bignum := 1 / smlnum
365
366         // Scale A if max element outside range [smlnum, bignum].
367         anrm := impl.Dlange(lapack.MaxAbs, m, n, a, lda, nil)
368         var iscl bool
369         if anrm > 0 && anrm < smlnum {
370                 iscl = true
371                 impl.Dlascl(lapack.General, 0, 0, anrm, smlnum, m, n, a, lda)
372         } else if anrm > bignum {
373                 iscl = true
374                 impl.Dlascl(lapack.General, 0, 0, anrm, bignum, m, n, a, lda)
375         }
376
377         var ie int
378         if m >= n {
379                 // If A has sufficiently more rows than columns, use the QR decomposition.
380                 if m >= mnthr {
381                         // m >> n
382                         if wantun {
383                                 // Path 1.
384                                 itau := 0
385                                 iwork := itau + n
386
387                                 // Compute A = Q * R.
388                                 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
389
390                                 // Zero out below R.
391                                 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda)
392                                 ie = 0
393                                 itauq := ie + n
394                                 itaup := itauq + n
395                                 iwork = itaup + n
396                                 // Bidiagonalize R in A.
397                                 impl.Dgebrd(n, n, a, lda, s, work[ie:], work[itauq:],
398                                         work[itaup:], work[iwork:], lwork-iwork)
399                                 ncvt := 0
400                                 if wantvo || wantvas {
401                                         // Generate P^T.
402                                         impl.Dorgbr(lapack.ApplyP, n, n, n, a, lda, work[itaup:],
403                                                 work[iwork:], lwork-iwork)
404                                         ncvt = n
405                                 }
406                                 iwork = ie + n
407
408                                 // Perform bidiagonal QR iteration computing right singular vectors
409                                 // of A in A if desired.
410                                 ok = impl.Dbdsqr(blas.Upper, n, ncvt, 0, 0, s, work[ie:],
411                                         a, lda, work, 1, work, 1, work[iwork:])
412
413                                 // If right singular vectors desired in VT, copy them there.
414                                 if wantvas {
415                                         impl.Dlacpy(blas.All, n, n, a, lda, vt, ldvt)
416                                 }
417                         } else if wantuo && wantvn {
418                                 // Path 2
419                                 panic(noSVDO)
420                         } else if wantuo && wantvas {
421                                 // Path 3
422                                 panic(noSVDO)
423                         } else if wantus {
424                                 if wantvn {
425                                         // Path 4
426                                         if lwork >= n*n+max(4*n, bdspac) {
427                                                 // Sufficient workspace for a fast algorithm.
428                                                 ir := 0
429                                                 var ldworkr int
430                                                 if lwork >= wrkbl+lda*n {
431                                                         ldworkr = lda
432                                                 } else {
433                                                         ldworkr = n
434                                                 }
435                                                 itau := ir + ldworkr*n
436                                                 iwork := itau + n
437                                                 // Compute A = Q * R.
438                                                 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
439
440                                                 // Copy R to work[ir:], zeroing out below it.
441                                                 impl.Dlacpy(blas.Upper, n, n, a, lda, work[ir:], ldworkr)
442                                                 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[ir+ldworkr:], ldworkr)
443
444                                                 // Generate Q in A.
445                                                 impl.Dorgqr(m, n, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
446                                                 ie := itau
447                                                 itauq := ie + n
448                                                 itaup := itauq + n
449                                                 iwork = itaup + n
450
451                                                 // Bidiagonalize R in work[ir:].
452                                                 impl.Dgebrd(n, n, work[ir:], ldworkr, s, work[ie:],
453                                                         work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
454
455                                                 // Generate left vectors bidiagonalizing R in work[ir:].
456                                                 impl.Dorgbr(lapack.ApplyQ, n, n, n, work[ir:], ldworkr,
457                                                         work[itauq:], work[iwork:], lwork-iwork)
458                                                 iwork = ie + n
459
460                                                 // Perform bidiagonal QR iteration, compuing left singular
461                                                 // vectors of R in work[ir:].
462                                                 ok = impl.Dbdsqr(blas.Upper, n, 0, n, 0, s, work[ie:], work, 1,
463                                                         work[ir:], ldworkr, work, 1, work[iwork:])
464
465                                                 // Multiply Q in A by left singular vectors of R in
466                                                 // work[ir:], storing result in U.
467                                                 bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, a, lda,
468                                                         work[ir:], ldworkr, 0, u, ldu)
469                                         } else {
470                                                 // Insufficient workspace for a fast algorithm.
471                                                 itau := 0
472                                                 iwork := itau + n
473
474                                                 // Compute A = Q*R, copying result to U.
475                                                 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
476                                                 impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
477
478                                                 // Generate Q in U.
479                                                 impl.Dorgqr(m, n, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
480                                                 ie := itau
481                                                 itauq := ie + n
482                                                 itaup := itauq + n
483                                                 iwork = itaup + n
484
485                                                 // Zero out below R in A.
486                                                 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda)
487
488                                                 // Bidiagonalize R in A.
489                                                 impl.Dgebrd(n, n, a, lda, s, work[ie:],
490                                                         work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
491
492                                                 // Multiply Q in U by left vectors bidiagonalizing R.
493                                                 impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n,
494                                                         a, lda, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
495                                                 iwork = ie + n
496
497                                                 // Perform bidiagonal QR iteration, computing left
498                                                 // singular vectors of A in U.
499                                                 ok = impl.Dbdsqr(blas.Upper, n, 0, m, 0, s, work[ie:], work, 1,
500                                                         u, ldu, work, 1, work[iwork:])
501                                         }
502                                 } else if wantvo {
503                                         // Path 5
504                                         panic(noSVDO)
505                                 } else if wantvas {
506                                         // Path 6
507                                         if lwork >= n*n+max(4*n, bdspac) {
508                                                 // Sufficient workspace for a fast algorithm.
509                                                 iu := 0
510                                                 var ldworku int
511                                                 if lwork >= wrkbl+lda*n {
512                                                         ldworku = lda
513                                                 } else {
514                                                         ldworku = n
515                                                 }
516                                                 itau := iu + ldworku*n
517                                                 iwork := itau + n
518
519                                                 // Compute A = Q * R.
520                                                 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
521                                                 // Copy R to work[iu:], zeroing out below it.
522                                                 impl.Dlacpy(blas.Upper, n, n, a, lda, work[iu:], ldworku)
523                                                 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[iu+ldworku:], ldworku)
524
525                                                 // Generate Q in A.
526                                                 impl.Dorgqr(m, n, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
527
528                                                 ie := itau
529                                                 itauq := ie + n
530                                                 itaup := itauq + n
531                                                 iwork = itaup + n
532
533                                                 // Bidiagonalize R in work[iu:], copying result to VT.
534                                                 impl.Dgebrd(n, n, work[iu:], ldworku, s, work[ie:],
535                                                         work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
536                                                 impl.Dlacpy(blas.Upper, n, n, work[iu:], ldworku, vt, ldvt)
537
538                                                 // Generate left bidiagonalizing vectors in work[iu:].
539                                                 impl.Dorgbr(lapack.ApplyQ, n, n, n, work[iu:], ldworku,
540                                                         work[itauq:], work[iwork:], lwork-iwork)
541
542                                                 // Generate right bidiagonalizing vectors in VT.
543                                                 impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt,
544                                                         work[itaup:], work[iwork:], lwork-iwork)
545                                                 iwork = ie + n
546
547                                                 // Perform bidiagonal QR iteration, computing left singular
548                                                 // vectors of R in work[iu:], and computing right singular
549                                                 // vectors of R in VT.
550                                                 ok = impl.Dbdsqr(blas.Upper, n, n, n, 0, s, work[ie:],
551                                                         vt, ldvt, work[iu:], ldworku, work, 1, work[iwork:])
552
553                                                 // Multiply Q in A by left singular vectors of R in
554                                                 // work[iu:], storing result in U.
555                                                 bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, a, lda,
556                                                         work[iu:], ldworku, 0, u, ldu)
557                                         } else {
558                                                 // Insufficient workspace for a fast algorithm.
559                                                 itau := 0
560                                                 iwork := itau + n
561
562                                                 // Compute A = Q * R, copying result to U.
563                                                 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
564                                                 impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
565
566                                                 // Generate Q in U.
567                                                 impl.Dorgqr(m, n, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
568
569                                                 // Copy R to VT, zeroing out below it.
570                                                 impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt)
571                                                 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, vt[ldvt:], ldvt)
572
573                                                 ie := itau
574                                                 itauq := ie + n
575                                                 itaup := itauq + n
576                                                 iwork = itaup + n
577
578                                                 // Bidiagonalize R in VT.
579                                                 impl.Dgebrd(n, n, vt, ldvt, s, work[ie:],
580                                                         work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
581
582                                                 // Multiply Q in U by left bidiagonalizing vectors in VT.
583                                                 impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n,
584                                                         vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
585
586                                                 // Generate right bidiagonalizing vectors in VT.
587                                                 impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt,
588                                                         work[itaup:], work[iwork:], lwork-iwork)
589                                                 iwork = ie + n
590
591                                                 // Perform bidiagonal QR iteration, computing left singular
592                                                 // vectors of A in U and computing right singular vectors
593                                                 // of A in VT.
594                                                 ok = impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:],
595                                                         vt, ldvt, u, ldu, work, 1, work[iwork:])
596                                         }
597                                 }
598                         } else if wantua {
599                                 if wantvn {
600                                         // Path 7
601                                         if lwork >= n*n+max(max(n+m, 4*n), bdspac) {
602                                                 // Sufficient workspace for a fast algorithm.
603                                                 ir := 0
604                                                 var ldworkr int
605                                                 if lwork >= wrkbl+lda*n {
606                                                         ldworkr = lda
607                                                 } else {
608                                                         ldworkr = n
609                                                 }
610                                                 itau := ir + ldworkr*n
611                                                 iwork := itau + n
612
613                                                 // Compute A = Q*R, copying result to U.
614                                                 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
615                                                 impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
616
617                                                 // Copy R to work[ir:], zeroing out below it.
618                                                 impl.Dlacpy(blas.Upper, n, n, a, lda, work[ir:], ldworkr)
619                                                 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[ir+ldworkr:], ldworkr)
620
621                                                 // Generate Q in U.
622                                                 impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
623                                                 ie := itau
624                                                 itauq := ie + n
625                                                 itaup := itauq + n
626                                                 iwork = itaup + n
627
628                                                 // Bidiagonalize R in work[ir:].
629                                                 impl.Dgebrd(n, n, work[ir:], ldworkr, s, work[ie:],
630                                                         work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
631
632                                                 // Generate left bidiagonalizing vectors in work[ir:].
633                                                 impl.Dorgbr(lapack.ApplyQ, n, n, n, work[ir:], ldworkr,
634                                                         work[itauq:], work[iwork:], lwork-iwork)
635                                                 iwork = ie + n
636
637                                                 // Perform bidiagonal QR iteration, computing left singular
638                                                 // vectors of R in work[ir:].
639                                                 ok = impl.Dbdsqr(blas.Upper, n, 0, n, 0, s, work[ie:], work, 1,
640                                                         work[ir:], ldworkr, work, 1, work[iwork:])
641
642                                                 // Multiply Q in U by left singular vectors of R in
643                                                 // work[ir:], storing result in A.
644                                                 bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, u, ldu,
645                                                         work[ir:], ldworkr, 0, a, lda)
646
647                                                 // Copy left singular vectors of A from A to U.
648                                                 impl.Dlacpy(blas.All, m, n, a, lda, u, ldu)
649                                         } else {
650                                                 // Insufficient workspace for a fast algorithm.
651                                                 itau := 0
652                                                 iwork := itau + n
653
654                                                 // Compute A = Q*R, copying result to U.
655                                                 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
656                                                 impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
657
658                                                 // Generate Q in U.
659                                                 impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
660                                                 ie := itau
661                                                 itauq := ie + n
662                                                 itaup := itauq + n
663                                                 iwork = itaup + n
664
665                                                 // Zero out below R in A.
666                                                 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda)
667
668                                                 // Bidiagonalize R in A.
669                                                 impl.Dgebrd(n, n, a, lda, s, work[ie:],
670                                                         work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
671
672                                                 // Multiply Q in U by left bidiagonalizing vectors in A.
673                                                 impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n,
674                                                         a, lda, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
675                                                 iwork = ie + n
676
677                                                 // Perform bidiagonal QR iteration, computing left
678                                                 // singular vectors of A in U.
679                                                 ok = impl.Dbdsqr(blas.Upper, n, 0, m, 0, s, work[ie:],
680                                                         work, 1, u, ldu, work, 1, work[iwork:])
681                                         }
682                                 } else if wantvo {
683                                         // Path 8.
684                                         panic(noSVDO)
685                                 } else if wantvas {
686                                         // Path 9.
687                                         if lwork >= n*n+max(max(n+m, 4*n), bdspac) {
688                                                 // Sufficient workspace for a fast algorithm.
689                                                 iu := 0
690                                                 var ldworku int
691                                                 if lwork >= wrkbl+lda*n {
692                                                         ldworku = lda
693                                                 } else {
694                                                         ldworku = n
695                                                 }
696                                                 itau := iu + ldworku*n
697                                                 iwork := itau + n
698
699                                                 // Compute A = Q * R, copying result to U.
700                                                 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
701                                                 impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
702
703                                                 // Generate Q in U.
704                                                 impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
705
706                                                 // Copy R to work[iu:], zeroing out below it.
707                                                 impl.Dlacpy(blas.Upper, n, n, a, lda, work[iu:], ldworku)
708                                                 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[iu+ldworku:], ldworku)
709
710                                                 ie = itau
711                                                 itauq := ie + n
712                                                 itaup := itauq + n
713                                                 iwork = itaup + n
714
715                                                 // Bidiagonalize R in work[iu:], copying result to VT.
716                                                 impl.Dgebrd(n, n, work[iu:], ldworku, s, work[ie:],
717                                                         work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
718                                                 impl.Dlacpy(blas.Upper, n, n, work[iu:], ldworku, vt, ldvt)
719
720                                                 // Generate left bidiagonalizing vectors in work[iu:].
721                                                 impl.Dorgbr(lapack.ApplyQ, n, n, n, work[iu:], ldworku,
722                                                         work[itauq:], work[iwork:], lwork-iwork)
723
724                                                 // Generate right bidiagonalizing vectors in VT.
725                                                 impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt,
726                                                         work[itaup:], work[iwork:], lwork-iwork)
727                                                 iwork = ie + n
728
729                                                 // Perform bidiagonal QR iteration, computing left singular
730                                                 // vectors of R in work[iu:] and computing right
731                                                 // singular vectors of R in VT.
732                                                 ok = impl.Dbdsqr(blas.Upper, n, n, n, 0, s, work[ie:],
733                                                         vt, ldvt, work[iu:], ldworku, work, 1, work[iwork:])
734
735                                                 // Multiply Q in U by left singular vectors of R in
736                                                 // work[iu:], storing result in A.
737                                                 bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1,
738                                                         u, ldu, work[iu:], ldworku, 0, a, lda)
739
740                                                 // Copy left singular vectors of A from A to U.
741                                                 impl.Dlacpy(blas.All, m, n, a, lda, u, ldu)
742
743                                                 /*
744                                                         // Bidiagonalize R in VT.
745                                                         impl.Dgebrd(n, n, vt, ldvt, s, work[ie:],
746                                                                 work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
747
748                                                         // Multiply Q in U by left bidiagonalizing vectors in VT.
749                                                         impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans,
750                                                                 m, n, n, vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
751
752                                                         // Generate right bidiagonalizing vectors in VT.
753                                                         impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt,
754                                                                 work[itaup:], work[iwork:], lwork-iwork)
755                                                         iwork = ie + n
756
757                                                         // Perform bidiagonal QR iteration, computing left singular
758                                                         // vectors of A in U and computing right singular vectors
759                                                         // of A in VT.
760                                                         ok = impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:],
761                                                                 vt, ldvt, u, ldu, work, 1, work[iwork:])
762                                                 */
763                                         } else {
764                                                 // Insufficient workspace for a fast algorithm.
765                                                 itau := 0
766                                                 iwork := itau + n
767
768                                                 // Compute A = Q*R, copying result to U.
769                                                 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
770                                                 impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
771
772                                                 // Generate Q in U.
773                                                 impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
774
775                                                 // Copy R from A to VT, zeroing out below it.
776                                                 impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt)
777                                                 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, vt[ldvt:], ldvt)
778
779                                                 ie := itau
780                                                 itauq := ie + n
781                                                 itaup := itauq + n
782                                                 iwork = itaup + n
783
784                                                 // Bidiagonalize R in VT.
785                                                 impl.Dgebrd(n, n, vt, ldvt, s, work[ie:],
786                                                         work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
787
788                                                 // Multiply Q in U by left bidiagonalizing vectors in VT.
789                                                 impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans,
790                                                         m, n, n, vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
791
792                                                 // Generate right bidiagonizing vectors in VT.
793                                                 impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt,
794                                                         work[itaup:], work[iwork:], lwork-iwork)
795                                                 iwork = ie + n
796
797                                                 // Perform bidiagonal QR iteration, computing left singular
798                                                 // vectors of A in U and computing right singular vectors
799                                                 // of A in VT.
800                                                 impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:],
801                                                         vt, ldvt, u, ldu, work, 1, work[iwork:])
802                                         }
803                                 }
804                         }
805                 } else {
806                         // Path 10.
807                         // M at least N, but not much larger.
808                         ie = 0
809                         itauq := ie + n
810                         itaup := itauq + n
811                         iwork := itaup + n
812
813                         // Bidiagonalize A.
814                         impl.Dgebrd(m, n, a, lda, s, work[ie:], work[itauq:],
815                                 work[itaup:], work[iwork:], lwork-iwork)
816                         if wantuas {
817                                 // Left singular vectors are desired in U. Copy result to U and
818                                 // generate left biadiagonalizing vectors in U.
819                                 impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
820                                 var ncu int
821                                 if wantus {
822                                         ncu = n
823                                 }
824                                 if wantua {
825                                         ncu = m
826                                 }
827                                 impl.Dorgbr(lapack.ApplyQ, m, ncu, n, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
828                         }
829                         if wantvas {
830                                 // Right singular vectors are desired in VT. Copy result to VT and
831                                 // generate left biadiagonalizing vectors in VT.
832                                 impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt)
833                                 impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt, work[itaup:], work[iwork:], lwork-iwork)
834                         }
835                         if wantuo {
836                                 panic(noSVDO)
837                         }
838                         if wantvo {
839                                 panic(noSVDO)
840                         }
841                         iwork = ie + n
842                         var nru, ncvt int
843                         if wantuas || wantuo {
844                                 nru = m
845                         }
846                         if wantun {
847                                 nru = 0
848                         }
849                         if wantvas || wantvo {
850                                 ncvt = n
851                         }
852                         if wantvn {
853                                 ncvt = 0
854                         }
855                         if !wantuo && !wantvo {
856                                 // Perform bidiagonal QR iteration, if desired, computing left
857                                 // singular vectors in U and right singular vectors in VT.
858                                 ok = impl.Dbdsqr(blas.Upper, n, ncvt, nru, 0, s, work[ie:],
859                                         vt, ldvt, u, ldu, work, 1, work[iwork:])
860                         } else {
861                                 // There will be two branches when the implementation is complete.
862                                 panic(noSVDO)
863                         }
864                 }
865         } else {
866                 // A has more columns than rows. If A has sufficiently more columns than
867                 // rows, first reduce using the LQ decomposition.
868                 if n >= mnthr {
869                         // n >> m.
870                         if wantvn {
871                                 // Path 1t.
872                                 itau := 0
873                                 iwork := itau + m
874
875                                 // Compute A = L*Q.
876                                 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
877
878                                 // Zero out above L.
879                                 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda)
880                                 ie := 0
881                                 itauq := ie + m
882                                 itaup := itauq + m
883                                 iwork = itaup + m
884
885                                 // Bidiagonalize L in A.
886                                 impl.Dgebrd(m, m, a, lda, s, work[ie:itauq],
887                                         work[itauq:itaup], work[itaup:iwork], work[iwork:], lwork-iwork)
888                                 if wantuo || wantuas {
889                                         impl.Dorgbr(lapack.ApplyQ, m, m, m, a, lda,
890                                                 work[itauq:], work[iwork:], lwork-iwork)
891                                 }
892                                 iwork = ie + m
893                                 nru := 0
894                                 if wantuo || wantuas {
895                                         nru = m
896                                 }
897
898                                 // Perform bidiagonal QR iteration, computing left singular vectors
899                                 // of A in A if desired.
900                                 ok = impl.Dbdsqr(blas.Upper, m, 0, nru, 0, s, work[ie:],
901                                         work, 1, a, lda, work, 1, work[iwork:])
902
903                                 // If left singular vectors desired in U, copy them there.
904                                 if wantuas {
905                                         impl.Dlacpy(blas.All, m, m, a, lda, u, ldu)
906                                 }
907                         } else if wantvo && wantun {
908                                 // Path 2t.
909                                 panic(noSVDO)
910                         } else if wantvo && wantuas {
911                                 // Path 3t.
912                                 panic(noSVDO)
913                         } else if wantvs {
914                                 if wantun {
915                                         // Path 4t.
916                                         if lwork >= m*m+max(4*m, bdspac) {
917                                                 // Sufficient workspace for a fast algorithm.
918                                                 ir := 0
919                                                 var ldworkr int
920                                                 if lwork >= wrkbl+lda*m {
921                                                         ldworkr = lda
922                                                 } else {
923                                                         ldworkr = m
924                                                 }
925                                                 itau := ir + ldworkr*m
926                                                 iwork := itau + m
927
928                                                 // Compute A = L*Q.
929                                                 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
930
931                                                 // Copy L to work[ir:], zeroing out above it.
932                                                 impl.Dlacpy(blas.Lower, m, m, a, lda, work[ir:], ldworkr)
933                                                 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[ir+1:], ldworkr)
934
935                                                 // Generate Q in A.
936                                                 impl.Dorglq(m, n, m, a, lda, work[itau:], work[iwork:], lwork-iwork)
937                                                 ie := itau
938                                                 itauq := ie + m
939                                                 itaup := itauq + m
940                                                 iwork = itaup + m
941
942                                                 // Bidiagonalize L in work[ir:].
943                                                 impl.Dgebrd(m, m, work[ir:], ldworkr, s, work[ie:],
944                                                         work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
945
946                                                 // Generate right vectors bidiagonalizing L in work[ir:].
947                                                 impl.Dorgbr(lapack.ApplyP, m, m, m, work[ir:], ldworkr,
948                                                         work[itaup:], work[iwork:], lwork-iwork)
949                                                 iwork = ie + m
950
951                                                 // Perform bidiagonal QR iteration, computing right singular
952                                                 // vectors of L in work[ir:].
953                                                 ok = impl.Dbdsqr(blas.Upper, m, m, 0, 0, s, work[ie:],
954                                                         work[ir:], ldworkr, work, 1, work, 1, work[iwork:])
955
956                                                 // Multiply right singular vectors of L in work[ir:] by
957                                                 // Q in A, storing result in VT.
958                                                 bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1,
959                                                         work[ir:], ldworkr, a, lda, 0, vt, ldvt)
960                                         } else {
961                                                 // Insufficient workspace for a fast algorithm.
962                                                 itau := 0
963                                                 iwork := itau + m
964
965                                                 // Compute A = L*Q.
966                                                 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
967
968                                                 // Copy result to VT.
969                                                 impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
970
971                                                 // Generate Q in VT.
972                                                 impl.Dorglq(m, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
973                                                 ie := itau
974                                                 itauq := ie + m
975                                                 itaup := itauq + m
976                                                 iwork = itaup + m
977
978                                                 // Zero out above L in A.
979                                                 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda)
980
981                                                 // Bidiagonalize L in A.
982                                                 impl.Dgebrd(m, m, a, lda, s, work[ie:],
983                                                         work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
984
985                                                 // Multiply right vectors bidiagonalizing L by Q in VT.
986                                                 impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m,
987                                                         a, lda, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork)
988                                                 iwork = ie + m
989
990                                                 // Perform bidiagonal QR iteration, computing right
991                                                 // singular vectors of A in VT.
992                                                 ok = impl.Dbdsqr(blas.Upper, m, n, 0, 0, s, work[ie:],
993                                                         vt, ldvt, work, 1, work, 1, work[iwork:])
994                                         }
995                                 } else if wantuo {
996                                         // Path 5t.
997                                         panic(noSVDO)
998                                 } else if wantuas {
999                                         // Path 6t.
1000                                         if lwork >= m*m+max(4*m, bdspac) {
1001                                                 // Sufficient workspace for a fast algorithm.
1002                                                 iu := 0
1003                                                 var ldworku int
1004                                                 if lwork >= wrkbl+lda*m {
1005                                                         ldworku = lda
1006                                                 } else {
1007                                                         ldworku = m
1008                                                 }
1009                                                 itau := iu + ldworku*m
1010                                                 iwork := itau + m
1011
1012                                                 // Compute A = L*Q.
1013                                                 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
1014
1015                                                 // Copy L to work[iu:], zeroing out above it.
1016                                                 impl.Dlacpy(blas.Lower, m, m, a, lda, work[iu:], ldworku)
1017                                                 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[iu+1:], ldworku)
1018
1019                                                 // Generate Q in A.
1020                                                 impl.Dorglq(m, n, m, a, lda, work[itau:], work[iwork:], lwork-iwork)
1021                                                 ie := itau
1022                                                 itauq := ie + m
1023                                                 itaup := itauq + m
1024                                                 iwork = itaup + m
1025
1026                                                 // Bidiagonalize L in work[iu:], copying result to U.
1027                                                 impl.Dgebrd(m, m, work[iu:], ldworku, s, work[ie:],
1028                                                         work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
1029                                                 impl.Dlacpy(blas.Lower, m, m, work[iu:], ldworku, u, ldu)
1030
1031                                                 // Generate right bidiagionalizing vectors in work[iu:].
1032                                                 impl.Dorgbr(lapack.ApplyP, m, m, m, work[iu:], ldworku,
1033                                                         work[itaup:], work[iwork:], lwork-iwork)
1034
1035                                                 // Generate left bidiagonalizing vectors in U.
1036                                                 impl.Dorgbr(lapack.ApplyQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
1037                                                 iwork = ie + m
1038
1039                                                 // Perform bidiagonal QR iteration, computing left singular
1040                                                 // vectors of L in U and computing right singular vectors of
1041                                                 // L in work[iu:].
1042                                                 ok = impl.Dbdsqr(blas.Upper, m, m, m, 0, s, work[ie:],
1043                                                         work[iu:], ldworku, u, ldu, work, 1, work[iwork:])
1044
1045                                                 // Multiply right singular vectors of L in work[iu:] by
1046                                                 // Q in A, storing result in VT.
1047                                                 bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1,
1048                                                         work[iu:], ldworku, a, lda, 0, vt, ldvt)
1049                                         } else {
1050                                                 // Insufficient workspace for a fast algorithm.
1051                                                 itau := 0
1052                                                 iwork := itau + m
1053
1054                                                 // Compute A = L*Q, copying result to VT.
1055                                                 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
1056                                                 impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
1057
1058                                                 // Generate Q in VT.
1059                                                 impl.Dorglq(m, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
1060
1061                                                 // Copy L to U, zeroing out above it.
1062                                                 impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu)
1063                                                 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, u[1:], ldu)
1064
1065                                                 ie := itau
1066                                                 itauq := ie + m
1067                                                 itaup := itauq + m
1068                                                 iwork = itaup + m
1069
1070                                                 // Bidiagonalize L in U.
1071                                                 impl.Dgebrd(m, m, u, ldu, s, work[ie:],
1072                                                         work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
1073
1074                                                 // Multiply right bidiagonalizing vectors in U by Q in VT.
1075                                                 impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m,
1076                                                         u, ldu, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork)
1077
1078                                                 // Generate left bidiagonalizing vectors in U.
1079                                                 impl.Dorgbr(lapack.ApplyQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
1080                                                 iwork = ie + m
1081
1082                                                 // Perform bidiagonal QR iteration, computing left singular
1083                                                 // vectors of A in U and computing right singular vectors
1084                                                 // of A in VT.
1085                                                 impl.Dbdsqr(blas.Upper, m, n, m, 0, s, work[ie:], vt, ldvt,
1086                                                         u, ldu, work, 1, work[iwork:])
1087                                         }
1088                                 }
1089                         } else if wantva {
1090                                 if wantun {
1091                                         // Path 7t.
1092                                         if lwork >= m*m+max(max(n+m, 4*m), bdspac) {
1093                                                 // Sufficient workspace for a fast algorithm.
1094                                                 ir := 0
1095                                                 var ldworkr int
1096                                                 if lwork >= wrkbl+lda*m {
1097                                                         ldworkr = lda
1098                                                 } else {
1099                                                         ldworkr = m
1100                                                 }
1101                                                 itau := ir + ldworkr*m
1102                                                 iwork := itau + m
1103
1104                                                 // Compute A = L*Q, copying result to VT.
1105                                                 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
1106                                                 impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
1107
1108                                                 // Copy L to work[ir:], zeroing out above it.
1109                                                 impl.Dlacpy(blas.Lower, m, m, a, lda, work[ir:], ldworkr)
1110                                                 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[ir+1:], ldworkr)
1111
1112                                                 // Generate Q in VT.
1113                                                 impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
1114
1115                                                 ie := itau
1116                                                 itauq := ie + m
1117                                                 itaup := itauq + m
1118                                                 iwork = itaup + m
1119
1120                                                 // Bidiagonalize L in work[ir:].
1121                                                 impl.Dgebrd(m, m, work[ir:], ldworkr, s, work[ie:],
1122                                                         work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
1123
1124                                                 // Generate right bidiagonalizing vectors in work[ir:].
1125                                                 impl.Dorgbr(lapack.ApplyP, m, m, m, work[ir:], ldworkr,
1126                                                         work[itaup:], work[iwork:], lwork-iwork)
1127                                                 iwork = ie + m
1128
1129                                                 // Perform bidiagonal QR iteration, computing right
1130                                                 // singular vectors of L in work[ir:].
1131                                                 ok = impl.Dbdsqr(blas.Upper, m, m, 0, 0, s, work[ie:],
1132                                                         work[ir:], ldworkr, work, 1, work, 1, work[iwork:])
1133
1134                                                 // Multiply right singular vectors of L in work[ir:] by
1135                                                 // Q in VT, storing result in A.
1136                                                 bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1,
1137                                                         work[ir:], ldworkr, vt, ldvt, 0, a, lda)
1138
1139                                                 // Copy right singular vectors of A from A to VT.
1140                                                 impl.Dlacpy(blas.All, m, n, a, lda, vt, ldvt)
1141                                         } else {
1142                                                 // Insufficient workspace for a fast algorithm.
1143                                                 itau := 0
1144                                                 iwork := itau + m
1145                                                 // Compute A = L * Q, copying result to VT.
1146                                                 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
1147                                                 impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
1148
1149                                                 // Generate Q in VT.
1150                                                 impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
1151
1152                                                 ie := itau
1153                                                 itauq := ie + m
1154                                                 itaup := itauq + m
1155                                                 iwork = itaup + m
1156
1157                                                 // Zero out above L in A.
1158                                                 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda)
1159
1160                                                 // Bidiagonalize L in A.
1161                                                 impl.Dgebrd(m, m, a, lda, s, work[ie:], work[itauq:],
1162                                                         work[itaup:], work[iwork:], lwork-iwork)
1163
1164                                                 // Multiply right bidiagonalizing vectors in A by Q in VT.
1165                                                 impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m,
1166                                                         a, lda, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork)
1167                                                 iwork = ie + m
1168
1169                                                 // Perform bidiagonal QR iteration, computing right singular
1170                                                 // vectors of A in VT.
1171                                                 ok = impl.Dbdsqr(blas.Upper, m, n, 0, 0, s, work[ie:],
1172                                                         vt, ldvt, work, 1, work, 1, work[iwork:])
1173                                         }
1174                                 } else if wantuo {
1175                                         panic(noSVDO)
1176                                 } else if wantuas {
1177                                         // Path 9t.
1178                                         if lwork >= m*m+max(max(m+n, 4*m), bdspac) {
1179                                                 // Sufficient workspace for a fast algorithm.
1180                                                 iu := 0
1181
1182                                                 var ldworku int
1183                                                 if lwork >= wrkbl+lda*m {
1184                                                         ldworku = lda
1185                                                 } else {
1186                                                         ldworku = m
1187                                                 }
1188                                                 itau := iu + ldworku*m
1189                                                 iwork := itau + m
1190
1191                                                 // Generate A = L * Q copying result to VT.
1192                                                 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
1193                                                 impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
1194
1195                                                 // Generate Q in VT.
1196                                                 impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
1197
1198                                                 // Copy L to work[iu:], zeroing out above it.
1199                                                 impl.Dlacpy(blas.Lower, m, m, a, lda, work[iu:], ldworku)
1200                                                 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[iu+1:], ldworku)
1201                                                 ie = itau
1202                                                 itauq := ie + m
1203                                                 itaup := itauq + m
1204                                                 iwork = itaup + m
1205
1206                                                 // Bidiagonalize L in work[iu:], copying result to U.
1207                                                 impl.Dgebrd(m, m, work[iu:], ldworku, s, work[ie:],
1208                                                         work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
1209                                                 impl.Dlacpy(blas.Lower, m, m, work[iu:], ldworku, u, ldu)
1210
1211                                                 // Generate right bidiagonalizing vectors in work[iu:].
1212                                                 impl.Dorgbr(lapack.ApplyP, m, m, m, work[iu:], ldworku,
1213                                                         work[itaup:], work[iwork:], lwork-iwork)
1214
1215                                                 // Generate left bidiagonalizing vectors in U.
1216                                                 impl.Dorgbr(lapack.ApplyQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
1217                                                 iwork = ie + m
1218
1219                                                 // Perform bidiagonal QR iteration, computing left singular
1220                                                 // vectors of L in U and computing right singular vectors
1221                                                 // of L in work[iu:].
1222                                                 ok = impl.Dbdsqr(blas.Upper, m, m, m, 0, s, work[ie:],
1223                                                         work[iu:], ldworku, u, ldu, work, 1, work[iwork:])
1224
1225                                                 // Multiply right singular vectors of L in work[iu:]
1226                                                 // Q in VT, storing result in A.
1227                                                 bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1,
1228                                                         work[iu:], ldworku, vt, ldvt, 0, a, lda)
1229
1230                                                 // Copy right singular vectors of A from A to VT.
1231                                                 impl.Dlacpy(blas.All, m, n, a, lda, vt, ldvt)
1232                                         } else {
1233                                                 // Insufficient workspace for a fast algorithm.
1234                                                 itau := 0
1235                                                 iwork := itau + m
1236
1237                                                 // Compute A = L * Q, copying result to VT.
1238                                                 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
1239                                                 impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
1240
1241                                                 // Generate Q in VT.
1242                                                 impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
1243
1244                                                 // Copy L to U, zeroing out above it.
1245                                                 impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu)
1246                                                 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, u[1:], ldu)
1247
1248                                                 ie = itau
1249                                                 itauq := ie + m
1250                                                 itaup := itauq + m
1251                                                 iwork = itaup + m
1252
1253                                                 // Bidiagonalize L in U.
1254                                                 impl.Dgebrd(m, m, u, ldu, s, work[ie:], work[itauq:],
1255                                                         work[itaup:], work[iwork:], lwork-iwork)
1256
1257                                                 // Multiply right bidiagonalizing vectors in U by Q in VT.
1258                                                 impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m,
1259                                                         u, ldu, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork)
1260
1261                                                 // Generate left bidiagonalizing vectors in U.
1262                                                 impl.Dorgbr(lapack.ApplyQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
1263                                                 iwork = ie + m
1264
1265                                                 // Perform bidiagonal QR iteration, computing left singular
1266                                                 // vectors of A in U and computing right singular vectors
1267                                                 // of A in VT.
1268                                                 ok = impl.Dbdsqr(blas.Upper, m, n, m, 0, s, work[ie:],
1269                                                         vt, ldvt, u, ldu, work, 1, work[iwork:])
1270                                         }
1271                                 }
1272                         }
1273                 } else {
1274                         // Path 10t.
1275                         // N at least M, but not much larger.
1276                         ie = 0
1277                         itauq := ie + m
1278                         itaup := itauq + m
1279                         iwork := itaup + m
1280
1281                         // Bidiagonalize A.
1282                         impl.Dgebrd(m, n, a, lda, s, work[ie:], work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
1283                         if wantuas {
1284                                 // If left singular vectors desired in U, copy result to U and
1285                                 // generate left bidiagonalizing vectors in U.
1286                                 impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu)
1287                                 impl.Dorgbr(lapack.ApplyQ, m, m, n, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
1288                         }
1289                         if wantvas {
1290                                 // If right singular vectors desired in VT, copy result to VT
1291                                 // and generate right bidiagonalizing vectors in VT.
1292                                 impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
1293                                 var nrvt int
1294                                 if wantva {
1295                                         nrvt = n
1296                                 } else {
1297                                         nrvt = m
1298                                 }
1299                                 impl.Dorgbr(lapack.ApplyP, nrvt, n, m, vt, ldvt, work[itaup:], work[iwork:], lwork-iwork)
1300                         }
1301                         if wantuo {
1302                                 panic(noSVDO)
1303                         }
1304                         if wantvo {
1305                                 panic(noSVDO)
1306                         }
1307                         iwork = ie + m
1308                         var nru, ncvt int
1309                         if wantuas || wantuo {
1310                                 nru = m
1311                         }
1312                         if wantvas || wantvo {
1313                                 ncvt = n
1314                         }
1315                         if !wantuo && !wantvo {
1316                                 // Perform bidiagonal QR iteration, if desired, computing left
1317                                 // singular vectors in U and computing right singular vectors in
1318                                 // VT.
1319                                 ok = impl.Dbdsqr(blas.Lower, m, ncvt, nru, 0, s, work[ie:],
1320                                         vt, ldvt, u, ldu, work, 1, work[iwork:])
1321                         } else {
1322                                 // There will be two branches when the implementation is complete.
1323                                 panic(noSVDO)
1324                         }
1325                 }
1326         }
1327         if !ok {
1328                 if ie > 1 {
1329                         for i := 0; i < minmn-1; i++ {
1330                                 work[i+1] = work[i+ie]
1331                         }
1332                 }
1333                 if ie < 1 {
1334                         for i := minmn - 2; i >= 0; i-- {
1335                                 work[i+1] = work[i+ie]
1336                         }
1337                 }
1338         }
1339         // Undo scaling if necessary.
1340         if iscl {
1341                 if anrm > bignum {
1342                         impl.Dlascl(lapack.General, 0, 0, bignum, anrm, minmn, 1, s, minmn)
1343                 }
1344                 if !ok && anrm > bignum {
1345                         impl.Dlascl(lapack.General, 0, 0, bignum, anrm, minmn-1, 1, work[minmn:], minmn)
1346                 }
1347                 if anrm < smlnum {
1348                         impl.Dlascl(lapack.General, 0, 0, smlnum, anrm, minmn, 1, s, minmn)
1349                 }
1350                 if !ok && anrm < smlnum {
1351                         impl.Dlascl(lapack.General, 0, 0, smlnum, anrm, minmn-1, 1, work[minmn:], minmn)
1352                 }
1353         }
1354         work[0] = float64(maxwrk)
1355         return ok
1356 }