1 // Copyright ©2016 The Gonum Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "gonum.org/v1/gonum/blas"
9 "gonum.org/v1/gonum/blas/blas64"
12 // Dsytrd reduces a symmetric n×n matrix A to symmetric tridiagonal form by an
13 // orthogonal similarity transformation
15 // where Q is an orthonormal matrix and T is symmetric and tridiagonal.
17 // On entry, a contains the elements of the input matrix in the triangle specified
18 // by uplo. On exit, the diagonal and sub/super-diagonal are overwritten by the
19 // corresponding elements of the tridiagonal matrix T. The remaining elements in
20 // the triangle, along with the array tau, contain the data to construct Q as
21 // the product of elementary reflectors.
23 // If uplo == blas.Upper, Q is constructed with
24 // Q = H_{n-2} * ... * H_1 * H_0
26 // H_i = I - tau_i * v * v^T
27 // v is constructed as v[i+1:n] = 0, v[i] = 1, v[0:i-1] is stored in A[0:i-1, i+1].
28 // The elements of A are
35 // If uplo == blas.Lower, Q is constructed with
36 // Q = H_0 * H_1 * ... * H_{n-2}
38 // H_i = I - tau_i * v * v^T
39 // v is constructed as v[0:i+1] = 0, v[i+1] = 1, v[i+2:n] is stored in A[i+2:n, i].
40 // The elements of A are
47 // d must have length n, and e and tau must have length n-1. Dsytrd will panic if
48 // these conditions are not met.
50 // work is temporary storage, and lwork specifies the usable memory length. At minimum,
51 // lwork >= 1, and Dsytrd will panic otherwise. The amount of blocking is
52 // limited by the usable length.
53 // If lwork == -1, instead of computing Dsytrd the optimal work length is stored
56 // Dsytrd is an internal routine. It is exported for testing purposes.
57 func (impl Implementation) Dsytrd(uplo blas.Uplo, n int, a []float64, lda int, d, e, tau, work []float64, lwork int) {
58 checkMatrix(n, n, a, lda)
68 if len(work) < lwork {
71 if lwork != -1 && lwork < 1 {
92 nb := impl.Ilaenv(1, "DSYTRD", opts, n, -1, -1, -1)
95 work[0] = float64(lworkopt)
100 bi := blas64.Implementation()
102 if 1 < nb && nb < n {
103 // Determine when to cross over from blocked to unblocked code. The last
104 // block is always handled by unblocked code.
109 nx = max(nb, impl.Ilaenv(3, "DSYTRD", opts, n, -1, -1, -1))
111 // Determine if workspace is large enough for blocked code.
115 // Not enough workspace to use optimal nb: determine the minimum
116 // value of nb and reduce nb or force use of unblocked code by
119 nbmin := impl.Ilaenv(2, "DSYTRD", opts, n, -1, -1, -1)
133 // Reduce the upper triangle of A. Columns 0:kk are handled by the
136 kk := n - ((n-nx+nb-1)/nb)*nb
137 for i = n - nb; i >= kk; i -= nb {
138 // Reduce columns i:i+nb to tridiagonal form and form the matrix W
139 // which is needed to update the unreduced part of the matrix.
140 impl.Dlatrd(uplo, i+nb, nb, a, lda, e, tau, work, ldwork)
142 // Update the unreduced submatrix A[0:i-1,0:i-1], using an update
143 // of the form A = A - V*W^T - W*V^T.
144 bi.Dsyr2k(uplo, blas.NoTrans, i, nb, -1, a[i:], lda, work, ldwork, 1, a, lda)
146 // Copy superdiagonal elements back into A, and diagonal elements into D.
147 for j := i; j < i+nb; j++ {
148 a[(j-1)*lda+j] = e[j-1]
152 // Use unblocked code to reduce the last or only block
153 // check that i == kk.
154 impl.Dsytd2(uplo, kk, a, lda, d, e, tau)
157 // Reduce the lower triangle of A.
158 for i = 0; i < n-nx; i += nb {
159 // Reduce columns 0:i+nb to tridiagonal form and form the matrix W
160 // which is needed to update the unreduced part of the matrix.
161 impl.Dlatrd(uplo, n-i, nb, a[i*lda+i:], lda, e[i:], tau[i:], work, ldwork)
163 // Update the unreduced submatrix A[i+ib:n, i+ib:n], using an update
164 // of the form A = A + V*W^T - W*V^T.
165 bi.Dsyr2k(uplo, blas.NoTrans, n-i-nb, nb, -1, a[(i+nb)*lda+i:], lda,
166 work[nb*ldwork:], ldwork, 1, a[(i+nb)*lda+i+nb:], lda)
168 // Copy subdiagonal elements back into A, and diagonal elements into D.
169 for j := i; j < i+nb; j++ {
170 a[(j+1)*lda+j] = e[j]
174 // Use unblocked code to reduce the last or only block.
175 impl.Dsytd2(uplo, n-i, a[i*lda+i:], lda, d[i:], e[i:], tau[i:])
177 work[0] = float64(lworkopt)