1 // Copyright ©2014 The Gonum Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
8 "gonum.org/v1/gonum/blas"
9 "gonum.org/v1/gonum/internal/asm/f64"
12 var _ blas.Float64Level2 = Implementation{}
15 // y = alpha * A * x + beta * y if tA = blas.NoTrans
16 // y = alpha * A^T * x + beta * y if tA = blas.Trans or blas.ConjTrans
17 // where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars.
18 func (Implementation) Dgemv(tA blas.Transpose, m, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) {
19 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
41 if tA == blas.NoTrans {
45 if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) {
48 if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) {
51 if lda*(m-1)+n > len(a) || lda < max(1, n) {
55 // Quick return if possible
56 if m == 0 || n == 0 || (alpha == 0 && beta == 1) {
64 kx = -(lenX - 1) * incX
69 ky = -(lenY - 1) * incY
72 // First form y = beta * y
74 Implementation{}.Dscal(lenY, beta, y, incY)
76 Implementation{}.Dscal(lenY, beta, y, -incY)
83 // Form y = alpha * A * x + y
84 if tA == blas.NoTrans {
85 if incX == 1 && incY == 1 {
86 for i := 0; i < m; i++ {
87 y[i] += alpha * f64.DotUnitary(a[lda*i:lda*i+n], x)
92 for i := 0; i < m; i++ {
93 y[iy] += alpha * f64.DotInc(x, a[lda*i:lda*i+n], uintptr(n), uintptr(incX), 1, uintptr(kx), 0)
98 // Cases where a is transposed.
99 if incX == 1 && incY == 1 {
100 for i := 0; i < m; i++ {
103 f64.AxpyUnitaryTo(y, tmp, a[lda*i:lda*i+n], y)
109 for i := 0; i < m; i++ {
112 f64.AxpyInc(tmp, a[lda*i:lda*i+n], y, uintptr(n), 1, uintptr(incY), 0, uintptr(ky))
118 // Dger performs the rank-one operation
119 // A += alpha * x * y^T
120 // where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar.
121 func (Implementation) Dger(m, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64, lda int) {
135 if (incX > 0 && (m-1)*incX >= len(x)) || (incX < 0 && (1-m)*incX >= len(x)) {
138 if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
141 if lda*(m-1)+n > len(a) || lda < max(1, n) {
148 // Quick return if possible
149 if m == 0 || n == 0 || alpha == 0 {
166 if incX == 1 && incY == 1 {
169 for i, xv := range x {
170 f64.AxpyUnitary(alpha*xv, y, a[i*lda:i*lda+n])
176 for i := 0; i < m; i++ {
177 f64.AxpyInc(alpha*x[ix], y, a[i*lda:i*lda+n], uintptr(n), uintptr(incY), 1, uintptr(ky), 0)
183 // y = alpha * A * x + beta * y if tA == blas.NoTrans
184 // y = alpha * A^T * x + beta * y if tA == blas.Trans or blas.ConjTrans
185 // where a is an m×n band matrix kL subdiagonals and kU super-diagonals, and
186 // m and n refer to the size of the full dense matrix it represents.
187 // x and y are vectors, and alpha and beta are scalars.
188 func (Implementation) Dgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) {
189 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
216 if tA == blas.NoTrans {
220 if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) {
223 if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) {
226 if lda*(min(m, n+kL)-1)+kL+kU+1 > len(a) || lda < kL+kU+1 {
230 // Quick return if possible
231 if m == 0 || n == 0 || (alpha == 0 && beta == 1) {
239 kx = -(lenX - 1) * incX
244 ky = -(lenY - 1) * incY
247 // First form y = beta * y
249 Implementation{}.Dscal(lenY, beta, y, incY)
251 Implementation{}.Dscal(lenY, beta, y, -incY)
258 // i and j are indices of the compacted banded matrix.
259 // off is the offset into the dense matrix (off + j = densej)
262 if tA == blas.NoTrans {
265 for i := 0; i < min(m, n+kL); i++ {
267 u := min(nCol, ld+kL-i)
269 atmp := a[i*lda+l : i*lda+u]
270 xtmp := x[off : off+u-l]
272 for j, v := range atmp {
280 for i := 0; i < min(m, n+kL); i++ {
282 u := min(nCol, ld+kL-i)
284 atmp := a[i*lda+l : i*lda+u]
287 for _, v := range atmp {
288 sum += x[off*incX+jx] * v
297 for i := 0; i < min(m, n+kL); i++ {
299 u := min(nCol, ld+kL-i)
301 atmp := a[i*lda+l : i*lda+u]
304 for _, v := range atmp {
305 y[jy+off*incY] += tmp * v
312 for i := 0; i < min(m, n+kL); i++ {
314 u := min(nCol, ld+kL-i)
316 atmp := a[i*lda+l : i*lda+u]
319 for _, v := range atmp {
320 y[jy+off*incY] += tmp * v
328 // x = A * x if tA == blas.NoTrans
329 // x = A^T * x if tA == blas.Trans or blas.ConjTrans
330 // A is an n×n Triangular matrix and x is a vector.
331 func (Implementation) Dtrmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float64, lda int, x []float64, incX int) {
332 if ul != blas.Lower && ul != blas.Upper {
335 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
338 if d != blas.NonUnit && d != blas.Unit {
350 if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
353 if lda*(n-1)+n > len(a) || lda < max(1, n) {
359 nonUnit := d != blas.Unit
370 if tA == blas.NoTrans {
371 if ul == blas.Upper {
373 for i := 0; i < n; i++ {
377 tmp = a[ilda+i] * x[i]
382 x[i] = tmp + f64.DotUnitary(a[ilda+i+1:ilda+n], xtmp)
387 for i := 0; i < n; i++ {
391 tmp = a[ilda+i] * x[ix]
395 x[ix] = tmp + f64.DotInc(x, a[ilda+i+1:ilda+n], uintptr(n-i-1), uintptr(incX), 1, uintptr(ix+incX), 0)
401 for i := n - 1; i >= 0; i-- {
405 tmp += a[ilda+i] * x[i]
409 x[i] = tmp + f64.DotUnitary(a[ilda:ilda+i], x)
413 ix := kx + (n-1)*incX
414 for i := n - 1; i >= 0; i-- {
418 tmp = a[ilda+i] * x[ix]
422 x[ix] = tmp + f64.DotInc(x, a[ilda:ilda+i], uintptr(i), uintptr(incX), 1, uintptr(kx), 0)
427 // Cases where a is transposed.
428 if ul == blas.Upper {
430 for i := n - 1; i >= 0; i-- {
433 f64.AxpyUnitary(xi, a[ilda+i+1:ilda+n], x[i+1:n])
440 ix := kx + (n-1)*incX
441 for i := n - 1; i >= 0; i-- {
444 f64.AxpyInc(xi, a[ilda+i+1:ilda+n], x, uintptr(n-i-1), 1, uintptr(incX), 0, uintptr(kx+(i+1)*incX))
453 for i := 0; i < n; i++ {
456 f64.AxpyUnitary(xi, a[ilda:ilda+i], x)
464 for i := 0; i < n; i++ {
467 f64.AxpyInc(xi, a[ilda:ilda+i], x, uintptr(i), 1, uintptr(incX), 0, uintptr(kx))
476 // A * x = b if tA == blas.NoTrans
477 // A^T * x = b if tA == blas.Trans or blas.ConjTrans
478 // A is an n×n triangular matrix and x is a vector.
479 // At entry to the function, x contains the values of b, and the result is
480 // stored in place into x.
482 // No test for singularity or near-singularity is included in this
483 // routine. Such tests must be performed before calling this routine.
484 func (Implementation) Dtrsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float64, lda int, x []float64, incX int) {
485 // Test the input parameters
487 if ul != blas.Lower && ul != blas.Upper {
490 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
493 if d != blas.NonUnit && d != blas.Unit {
499 if lda*(n-1)+n > len(a) || lda < max(1, n) {
505 if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
508 // Quick return if possible
513 if d == blas.NonUnit {
523 nonUnit := d == blas.NonUnit
524 if tA == blas.NoTrans {
525 if ul == blas.Upper {
527 for i := n - 1; i >= 0; i-- {
529 atmp := a[i*lda+i+1 : i*lda+n]
530 for j, v := range atmp {
541 ix := kx + (n-1)*incX
542 for i := n - 1; i >= 0; i-- {
545 atmp := a[i*lda+i+1 : i*lda+n]
546 for _, v := range atmp {
559 for i := 0; i < n; i++ {
561 atmp := a[i*lda : i*lda+i]
562 for j, v := range atmp {
573 for i := 0; i < n; i++ {
576 atmp := a[i*lda : i*lda+i]
577 for _, v := range atmp {
589 // Cases where a is transposed.
590 if ul == blas.Upper {
592 for i := 0; i < n; i++ {
597 atmp := a[i*lda+i+1 : i*lda+n]
598 for j, v := range atmp {
606 for i := 0; i < n; i++ {
611 jx := kx + (i+1)*incX
612 atmp := a[i*lda+i+1 : i*lda+n]
613 for _, v := range atmp {
622 for i := n - 1; i >= 0; i-- {
627 atmp := a[i*lda : i*lda+i]
628 for j, v := range atmp {
634 ix := kx + (n-1)*incX
635 for i := n - 1; i >= 0; i-- {
641 atmp := a[i*lda : i*lda+i]
642 for _, v := range atmp {
651 // y = alpha * A * x + beta * y,
652 // where a is an n×n symmetric matrix, x and y are vectors, and alpha and
654 func (Implementation) Dsymv(ul blas.Uplo, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) {
656 if ul != blas.Lower && ul != blas.Upper {
662 if lda > 1 && lda < n {
671 if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
674 if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
677 if lda*(n-1)+n > len(a) || lda < max(1, n) {
680 // Quick return if possible
681 if n == 0 || (alpha == 0 && beta == 1) {
685 // Set up start points
701 Implementation{}.Dscal(n, beta, y, incY)
703 Implementation{}.Dscal(n, beta, y, -incY)
712 y[0] += alpha * a[0] * x[0]
716 if ul == blas.Upper {
719 for i := 0; i < n; i++ {
721 sum := x[i] * a[i*lda+i]
722 jy := ky + (i+1)*incY
723 atmp := a[i*lda+i+1 : i*lda+n]
724 for j, v := range atmp {
737 for i := 0; i < n; i++ {
739 sum := x[ix] * a[i*lda+i]
740 jx := kx + (i+1)*incX
741 jy := ky + (i+1)*incY
742 atmp := a[i*lda+i+1 : i*lda+n]
743 for _, v := range atmp {
755 // Cases where a is lower triangular.
758 for i := 0; i < n; i++ {
761 atmp := a[i*lda : i*lda+i]
763 for j, v := range atmp {
768 sum += x[i] * a[i*lda+i]
777 for i := 0; i < n; i++ {
781 atmp := a[i*lda : i*lda+i]
783 for _, v := range atmp {
789 sum += x[ix] * a[i*lda+i]
798 // x = A * x if tA == blas.NoTrans
799 // x = A^T * x if tA == blas.Trans or blas.ConjTrans
800 // where A is an n×n triangular banded matrix with k diagonals, and x is a vector.
801 func (Implementation) Dtbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float64, lda int, x []float64, incX int) {
802 if ul != blas.Lower && ul != blas.Upper {
805 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
808 if d != blas.NonUnit && d != blas.Unit {
817 if lda*(n-1)+k+1 > len(a) || lda < k+1 {
823 if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
832 } else if incX != 1 {
836 nonunit := d != blas.Unit
838 if tA == blas.NoTrans {
839 if ul == blas.Upper {
841 for i := 0; i < n; i++ {
846 for j := 1; j < u; j++ {
847 sum += xtmp[j] * atmp[j]
850 sum += xtmp[0] * atmp[0]
859 for i := 0; i < n; i++ {
864 for j := 1; j < u; j++ {
865 sum += x[ix+jx] * atmp[j]
869 sum += x[ix] * atmp[0]
879 for i := n - 1; i >= 0; i-- {
883 for j := l; j < k; j++ {
884 sum += x[i-k+j] * atmp[j]
887 sum += x[i] * atmp[k]
895 ix := kx + (n-1)*incX
896 for i := n - 1; i >= 0; i-- {
901 for j := l; j < k; j++ {
902 sum += x[ix-k*incX+jx] * atmp[j]
906 sum += x[ix] * atmp[k]
915 if ul == blas.Upper {
917 for i := n - 1; i >= 0; i-- {
923 for j := 1; j < u; j++ {
924 sum += x[i-j] * a[(i-j)*lda+j]
927 sum += x[i] * a[i*lda]
935 ix := kx + (n-1)*incX
936 for i := n - 1; i >= 0; i-- {
943 for j := 1; j < u; j++ {
944 sum += x[ix-jx] * a[(i-j)*lda+j]
948 sum += x[ix] * a[i*lda]
958 for i := 0; i < n; i++ {
964 for j := 0; j < u; j++ {
965 sum += x[i+j+1] * a[(i+j+1)*lda+k-j-1]
968 sum += x[i] * a[i*lda+k]
977 for i := 0; i < n; i++ {
986 for j := 0; j < u; j++ {
987 sum += x[ix+jx+incX] * a[(i+j+1)*lda+k-j-1]
991 sum += x[ix] * a[i*lda+k]
1001 // x = A * x if tA == blas.NoTrans
1002 // x = A^T * x if tA == blas.Trans or blas.ConjTrans
1003 // where A is an n×n unit triangular matrix in packed format, and x is a vector.
1004 func (Implementation) Dtpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []float64, x []float64, incX int) {
1006 if ul != blas.Lower && ul != blas.Upper {
1009 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
1012 if d != blas.NonUnit && d != blas.Unit {
1018 if len(ap) < (n*(n+1))/2 {
1024 if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
1032 kx = -(n - 1) * incX
1035 nonUnit := d == blas.NonUnit
1036 var offset int // Offset is the index of (i,i)
1037 if tA == blas.NoTrans {
1038 if ul == blas.Upper {
1040 for i := 0; i < n; i++ {
1045 atmp := ap[offset+1 : offset+n-i]
1047 for j, v := range atmp {
1056 for i := 0; i < n; i++ {
1061 atmp := ap[offset+1 : offset+n-i]
1062 jx := kx + (i+1)*incX
1063 for _, v := range atmp {
1074 offset = n*(n+1)/2 - 1
1075 for i := n - 1; i >= 0; i-- {
1080 atmp := ap[offset-i : offset]
1081 for j, v := range atmp {
1089 ix := kx + (n-1)*incX
1090 offset = n*(n+1)/2 - 1
1091 for i := n - 1; i >= 0; i-- {
1096 atmp := ap[offset-i : offset]
1098 for _, v := range atmp {
1108 // Cases where ap is transposed.
1109 if ul == blas.Upper {
1111 offset = n*(n+1)/2 - 1
1112 for i := n - 1; i >= 0; i-- {
1114 atmp := ap[offset+1 : offset+n-i]
1116 for j, v := range atmp {
1126 ix := kx + (n-1)*incX
1127 offset = n*(n+1)/2 - 1
1128 for i := n - 1; i >= 0; i-- {
1130 jx := kx + (i+1)*incX
1131 atmp := ap[offset+1 : offset+n-i]
1132 for _, v := range atmp {
1145 for i := 0; i < n; i++ {
1147 atmp := ap[offset-i : offset]
1148 for j, v := range atmp {
1159 for i := 0; i < n; i++ {
1162 atmp := ap[offset-i : offset]
1163 for _, v := range atmp {
1177 // where A is an n×n triangular banded matrix with k diagonals in packed format,
1178 // and x is a vector.
1179 // At entry to the function, x contains the values of b, and the result is
1180 // stored in place into x.
1182 // No test for singularity or near-singularity is included in this
1183 // routine. Such tests must be performed before calling this routine.
1184 func (Implementation) Dtbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float64, lda int, x []float64, incX int) {
1185 if ul != blas.Lower && ul != blas.Upper {
1188 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
1191 if d != blas.NonUnit && d != blas.Unit {
1197 if lda*(n-1)+k+1 > len(a) || lda < k+1 {
1203 if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
1211 kx = -(n - 1) * incX
1215 nonUnit := d == blas.NonUnit
1217 // Several cases below use subslices for speed improvement.
1218 // The incX != 1 cases usually do not because incX may be negative.
1219 if tA == blas.NoTrans {
1220 if ul == blas.Upper {
1222 for i := n - 1; i >= 0; i-- {
1228 xtmp := x[i+1 : i+bands+1]
1230 for j, v := range xtmp {
1240 ix := kx + (n-1)*incX
1241 for i := n - 1; i >= 0; i-- {
1251 for j := 1; j < max; j++ {
1253 sum += x[ix+jx] * atmp[j]
1264 for i := 0; i < n; i++ {
1269 atmp := a[i*lda+k-bands:]
1270 xtmp := x[i-bands : i]
1272 for j, v := range xtmp {
1283 for i := 0; i < n; i++ {
1288 atmp := a[i*lda+k-bands:]
1293 for j := 0; j < bands; j++ {
1294 sum += x[ix-bands*incX+jx] * atmp[j]
1299 x[ix] /= atmp[bands]
1305 // Cases where a is transposed.
1306 if ul == blas.Upper {
1308 for i := 0; i < n; i++ {
1314 for j := 0; j < bands; j++ {
1315 sum += x[i-bands+j] * a[(i-bands+j)*lda+bands-j]
1325 for i := 0; i < n; i++ {
1334 for j := 0; j < bands; j++ {
1335 sum += x[ix-bands*incX+jx] * a[(i-bands+j)*lda+bands-j]
1347 for i := n - 1; i >= 0; i-- {
1353 xtmp := x[i+1 : i+1+bands]
1354 for j, v := range xtmp {
1355 sum += v * a[(i+j+1)*lda+k-j-1]
1364 ix := kx + (n-1)*incX
1365 for i := n - 1; i >= 0; i-- {
1374 for j := 0; j < bands; j++ {
1375 sum += x[ix+jx+incX] * a[(i+j+1)*lda+k-j-1]
1387 // y = alpha * A * x + beta * y
1388 // where A is an n×n symmetric banded matrix, x and y are vectors, and alpha
1389 // and beta are scalars.
1390 func (Implementation) Dsbmv(ul blas.Uplo, n, k int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) {
1391 if ul != blas.Lower && ul != blas.Upper {
1404 if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
1407 if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
1410 if lda*(n-1)+k+1 > len(a) || lda < k+1 {
1414 // Quick return if possible
1415 if n == 0 || (alpha == 0 && beta == 1) {
1426 kx = -(lenX - 1) * incX
1431 ky = -(lenY - 1) * incY
1434 // First form y = beta * y
1436 Implementation{}.Dscal(lenY, beta, y, incY)
1438 Implementation{}.Dscal(lenY, beta, y, -incY)
1445 if ul == blas.Upper {
1448 for i := 0; i < n; i++ {
1451 sum := tmp * atmp[0]
1454 for j := 1; j <= u; j++ {
1456 sum += alpha * x[i+j] * v
1467 for i := 0; i < n; i++ {
1469 tmp := alpha * x[ix]
1470 sum := tmp * atmp[0]
1474 for j := 1; j <= u; j++ {
1476 sum += alpha * x[ix+jx] * v
1488 // Casses where a has bands below the diagonal.
1491 for i := 0; i < n; i++ {
1496 for j := l; j < k; j++ {
1498 y[iy] += alpha * v * x[i-k+j]
1499 y[iy-k*incY+jy] += tmp * v
1502 y[iy] += tmp * atmp[k]
1509 for i := 0; i < n; i++ {
1511 tmp := alpha * x[ix]
1515 for j := l; j < k; j++ {
1517 y[iy] += alpha * v * x[ix-k*incX+jx]
1518 y[iy-k*incY+jy] += tmp * v
1522 y[iy] += tmp * atmp[k]
1528 // Dsyr performs the rank-one update
1529 // a += alpha * x * x^T
1530 // where a is an n×n symmetric matrix, and x is a vector.
1531 func (Implementation) Dsyr(ul blas.Uplo, n int, alpha float64, x []float64, incX int, a []float64, lda int) {
1532 if ul != blas.Lower && ul != blas.Upper {
1541 if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
1544 if lda*(n-1)+n > len(a) || lda < max(1, n) {
1547 if alpha == 0 || n == 0 {
1556 kx = -(lenX - 1) * incX
1558 if ul == blas.Upper {
1560 for i := 0; i < n; i++ {
1563 atmp := a[i*lda+i : i*lda+n]
1565 for j, v := range xtmp {
1573 for i := 0; i < n; i++ {
1574 tmp := x[ix] * alpha
1578 for j := i; j < n; j++ {
1579 atmp[j] += x[jx] * tmp
1587 // Cases where a is lower triangular.
1589 for i := 0; i < n; i++ {
1594 for j, v := range xtmp {
1602 for i := 0; i < n; i++ {
1603 tmp := x[ix] * alpha
1607 for j := 0; j < i+1; j++ {
1608 atmp[j] += tmp * x[jx]
1616 // Dsyr2 performs the symmetric rank-two update
1617 // A += alpha * x * y^T + alpha * y * x^T
1618 // where A is a symmetric n×n matrix, x and y are vectors, and alpha is a scalar.
1619 func (Implementation) Dsyr2(ul blas.Uplo, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64, lda int) {
1620 if ul != blas.Lower && ul != blas.Upper {
1632 if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
1635 if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
1638 if lda*(n-1)+n > len(a) || lda < max(1, n) {
1649 ky = -(n - 1) * incY
1654 kx = -(n - 1) * incX
1656 if ul == blas.Upper {
1657 if incX == 1 && incY == 1 {
1658 for i := 0; i < n; i++ {
1662 for j := i; j < n; j++ {
1663 atmp[j] += alpha * (xi*y[j] + x[j]*yi)
1670 for i := 0; i < n; i++ {
1676 for j := i; j < n; j++ {
1677 atmp[j] += alpha * (xi*y[jy] + x[jx]*yi)
1686 if incX == 1 && incY == 1 {
1687 for i := 0; i < n; i++ {
1691 for j := 0; j <= i; j++ {
1692 atmp[j] += alpha * (xi*y[j] + x[j]*yi)
1699 for i := 0; i < n; i++ {
1705 for j := 0; j <= i; j++ {
1706 atmp[j] += alpha * (xi*y[jy] + x[jx]*yi)
1716 // A * x = b if tA == blas.NoTrans
1717 // A^T * x = b if tA == blas.Trans or blas.ConjTrans
1718 // where A is an n×n triangular matrix in packed format and x is a vector.
1719 // At entry to the function, x contains the values of b, and the result is
1720 // stored in place into x.
1722 // No test for singularity or near-singularity is included in this
1723 // routine. Such tests must be performed before calling this routine.
1724 func (Implementation) Dtpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []float64, x []float64, incX int) {
1726 if ul != blas.Lower && ul != blas.Upper {
1729 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
1732 if d != blas.NonUnit && d != blas.Unit {
1738 if len(ap) < (n*(n+1))/2 {
1744 if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
1752 kx = -(n - 1) * incX
1755 nonUnit := d == blas.NonUnit
1756 var offset int // Offset is the index of (i,i)
1757 if tA == blas.NoTrans {
1758 if ul == blas.Upper {
1759 offset = n*(n+1)/2 - 1
1761 for i := n - 1; i >= 0; i-- {
1762 atmp := ap[offset+1 : offset+n-i]
1765 for j, v := range atmp {
1776 ix := kx + (n-1)*incX
1777 for i := n - 1; i >= 0; i-- {
1778 atmp := ap[offset+1 : offset+n-i]
1779 jx := kx + (i+1)*incX
1781 for _, v := range atmp {
1795 for i := 0; i < n; i++ {
1796 atmp := ap[offset-i : offset]
1798 for j, v := range atmp {
1810 for i := 0; i < n; i++ {
1812 atmp := ap[offset-i : offset]
1814 for _, v := range atmp {
1827 // Cases where ap is transposed.
1828 if ul == blas.Upper {
1830 for i := 0; i < n; i++ {
1835 atmp := ap[offset+1 : offset+n-i]
1837 for j, v := range atmp {
1845 for i := 0; i < n; i++ {
1850 atmp := ap[offset+1 : offset+n-i]
1851 jx := kx + (i+1)*incX
1852 for _, v := range atmp {
1862 offset = n*(n+1)/2 - 1
1863 for i := n - 1; i >= 0; i-- {
1868 atmp := ap[offset-i : offset]
1869 for j, v := range atmp {
1876 ix := kx + (n-1)*incX
1877 offset = n*(n+1)/2 - 1
1878 for i := n - 1; i >= 0; i-- {
1883 atmp := ap[offset-i : offset]
1885 for _, v := range atmp {
1895 // y = alpha * A * x + beta * y,
1896 // where A is an n×n symmetric matrix in packed format, x and y are vectors
1897 // and alpha and beta are scalars.
1898 func (Implementation) Dspmv(ul blas.Uplo, n int, alpha float64, a []float64, x []float64, incX int, beta float64, y []float64, incY int) {
1900 if ul != blas.Lower && ul != blas.Upper {
1906 if len(a) < (n*(n+1))/2 {
1915 if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
1918 if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
1921 // Quick return if possible
1922 if n == 0 || (alpha == 0 && beta == 1) {
1926 // Set up start points
1931 kx = -(n - 1) * incX
1936 ky = -(n - 1) * incY
1939 // Form y = beta * y
1942 Implementation{}.Dscal(n, beta, y, incY)
1944 Implementation{}.Dscal(n, beta, y, -incY)
1953 y[0] += alpha * a[0] * x[0]
1956 var offset int // Offset is the index of (i,i).
1957 if ul == blas.Upper {
1960 for i := 0; i < n; i++ {
1962 sum := a[offset] * x[i]
1963 atmp := a[offset+1 : offset+n-i]
1965 jy := ky + (i+1)*incY
1966 for j, v := range atmp {
1971 y[iy] += alpha * sum
1979 for i := 0; i < n; i++ {
1981 sum := a[offset] * x[ix]
1982 atmp := a[offset+1 : offset+n-i]
1983 jx := kx + (i+1)*incX
1984 jy := ky + (i+1)*incY
1985 for _, v := range atmp {
1991 y[iy] += alpha * sum
2000 for i := 0; i < n; i++ {
2002 atmp := a[offset-i : offset]
2005 for j, v := range atmp {
2010 sum += a[offset] * x[i]
2011 y[iy] += alpha * sum
2019 for i := 0; i < n; i++ {
2021 atmp := a[offset-i : offset]
2025 for _, v := range atmp {
2032 sum += a[offset] * x[ix]
2033 y[iy] += alpha * sum
2040 // Dspr computes the rank-one operation
2041 // a += alpha * x * x^T
2042 // where a is an n×n symmetric matrix in packed format, x is a vector, and
2043 // alpha is a scalar.
2044 func (Implementation) Dspr(ul blas.Uplo, n int, alpha float64, x []float64, incX int, a []float64) {
2045 if ul != blas.Lower && ul != blas.Upper {
2054 if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
2057 if len(a) < (n*(n+1))/2 {
2060 if alpha == 0 || n == 0 {
2068 kx = -(lenX - 1) * incX
2070 var offset int // Offset is the index of (i,i).
2071 if ul == blas.Upper {
2073 for i := 0; i < n; i++ {
2077 for j, v := range xtmp {
2085 for i := 0; i < n; i++ {
2089 for j := 0; j < n-i; j++ {
2090 atmp[j] += xv * x[jx]
2099 for i := 0; i < n; i++ {
2100 atmp := a[offset-i:]
2103 for j, v := range xtmp {
2111 for i := 0; i < n; i++ {
2113 atmp := a[offset-i:]
2115 for j := 0; j <= i; j++ {
2116 atmp[j] += xv * x[jx]
2124 // Dspr2 performs the symmetric rank-2 update
2125 // A += alpha * x * y^T + alpha * y * x^T,
2126 // where A is an n×n symmetric matrix in packed format, x and y are vectors,
2127 // and alpha is a scalar.
2128 func (Implementation) Dspr2(ul blas.Uplo, n int, alpha float64, x []float64, incX int, y []float64, incY int, ap []float64) {
2129 if ul != blas.Lower && ul != blas.Upper {
2141 if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
2144 if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
2147 if len(ap) < (n*(n+1))/2 {
2157 ky = -(n - 1) * incY
2162 kx = -(n - 1) * incX
2164 var offset int // Offset is the index of (i,i).
2165 if ul == blas.Upper {
2166 if incX == 1 && incY == 1 {
2167 for i := 0; i < n; i++ {
2173 for j, v := range xtmp {
2174 atmp[j] += alpha * (xi*ytmp[j] + v*yi)
2182 for i := 0; i < n; i++ {
2188 for j := 0; j < n-i; j++ {
2189 atmp[j] += alpha * (xi*y[jy] + x[jx]*yi)
2199 if incX == 1 && incY == 1 {
2200 for i := 0; i < n; i++ {
2201 atmp := ap[offset-i:]
2205 for j, v := range xtmp {
2206 atmp[j] += alpha * (xi*y[j] + v*yi)
2214 for i := 0; i < n; i++ {
2217 atmp := ap[offset-i:]
2218 for j := 0; j <= i; j++ {
2219 atmp[j] += alpha * (x[ix]*y[jy] + x[jx]*y[iy])