X-Git-Url: http://git.osdn.net/view?p=bytom%2Fvapor.git;a=blobdiff_plain;f=vendor%2Fgonum.org%2Fv1%2Fgonum%2Fblas%2Fgonum%2Flevel3double.go;fp=vendor%2Fgonum.org%2Fv1%2Fgonum%2Fblas%2Fgonum%2Flevel3double.go;h=55822463051e1b56c957c6a44c1b746b4af872ce;hp=0000000000000000000000000000000000000000;hb=db158dcf09436b003defd333f1a665e7e051d820;hpb=d09b7a78d44dc259725902b8141cdba0d716b121 diff --git a/vendor/gonum.org/v1/gonum/blas/gonum/level3double.go b/vendor/gonum.org/v1/gonum/blas/gonum/level3double.go new file mode 100644 index 00000000..55822463 --- /dev/null +++ b/vendor/gonum.org/v1/gonum/blas/gonum/level3double.go @@ -0,0 +1,831 @@ +// Copyright ©2014 The Gonum Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gonum + +import ( + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/internal/asm/f64" +) + +var _ blas.Float64Level3 = Implementation{} + +// Dtrsm solves +// A * X = alpha * B, if tA == blas.NoTrans side == blas.Left, +// A^T * X = alpha * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Left, +// X * A = alpha * B, if tA == blas.NoTrans side == blas.Right, +// X * A^T = alpha * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Right, +// where A is an n×n or m×m triangular matrix, X is an m×n matrix, and alpha is a +// scalar. +// +// At entry to the function, X contains the values of B, and the result is +// stored in place into X. +// +// No check is made that A is invertible. +func (Implementation) Dtrsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int) { + if s != blas.Left && s != blas.Right { + panic(badSide) + } + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if d != blas.NonUnit && d != blas.Unit { + panic(badDiag) + } + if m < 0 { + panic(mLT0) + } + if n < 0 { + panic(nLT0) + } + if ldb < n { + panic(badLdB) + } + var k int + if s == blas.Left { + k = m + } else { + k = n + } + if lda*(k-1)+k > len(a) || lda < max(1, k) { + panic(badLdA) + } + if ldb*(m-1)+n > len(b) || ldb < max(1, n) { + panic(badLdB) + } + + if m == 0 || n == 0 { + return + } + + if alpha == 0 { + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + for j := range btmp { + btmp[j] = 0 + } + } + return + } + nonUnit := d == blas.NonUnit + if s == blas.Left { + if tA == blas.NoTrans { + if ul == blas.Upper { + for i := m - 1; i >= 0; i-- { + btmp := b[i*ldb : i*ldb+n] + if alpha != 1 { + for j := range btmp { + btmp[j] *= alpha + } + } + for ka, va := range a[i*lda+i+1 : i*lda+m] { + k := ka + i + 1 + if va != 0 { + f64.AxpyUnitaryTo(btmp, -va, b[k*ldb:k*ldb+n], btmp) + } + } + if nonUnit { + tmp := 1 / a[i*lda+i] + for j := 0; j < n; j++ { + btmp[j] *= tmp + } + } + } + return + } + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + if alpha != 1 { + for j := 0; j < n; j++ { + btmp[j] *= alpha + } + } + for k, va := range a[i*lda : i*lda+i] { + if va != 0 { + f64.AxpyUnitaryTo(btmp, -va, b[k*ldb:k*ldb+n], btmp) + } + } + if nonUnit { + tmp := 1 / a[i*lda+i] + for j := 0; j < n; j++ { + btmp[j] *= tmp + } + } + } + return + } + // Cases where a is transposed + if ul == blas.Upper { + for k := 0; k < m; k++ { + btmpk := b[k*ldb : k*ldb+n] + if nonUnit { + tmp := 1 / a[k*lda+k] + for j := 0; j < n; j++ { + btmpk[j] *= tmp + } + } + for ia, va := range a[k*lda+k+1 : k*lda+m] { + i := ia + k + 1 + if va != 0 { + btmp := b[i*ldb : i*ldb+n] + f64.AxpyUnitaryTo(btmp, -va, btmpk, btmp) + } + } + if alpha != 1 { + for j := 0; j < n; j++ { + btmpk[j] *= alpha + } + } + } + return + } + for k := m - 1; k >= 0; k-- { + btmpk := b[k*ldb : k*ldb+n] + if nonUnit { + tmp := 1 / a[k*lda+k] + for j := 0; j < n; j++ { + btmpk[j] *= tmp + } + } + for i, va := range a[k*lda : k*lda+k] { + if va != 0 { + btmp := b[i*ldb : i*ldb+n] + f64.AxpyUnitaryTo(btmp, -va, btmpk, btmp) + } + } + if alpha != 1 { + for j := 0; j < n; j++ { + btmpk[j] *= alpha + } + } + } + return + } + // Cases where a is to the right of X. + if tA == blas.NoTrans { + if ul == blas.Upper { + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + if alpha != 1 { + for j := 0; j < n; j++ { + btmp[j] *= alpha + } + } + for k, vb := range btmp { + if vb != 0 { + if btmp[k] != 0 { + if nonUnit { + btmp[k] /= a[k*lda+k] + } + btmpk := btmp[k+1 : n] + f64.AxpyUnitaryTo(btmpk, -btmp[k], a[k*lda+k+1:k*lda+n], btmpk) + } + } + } + } + return + } + for i := 0; i < m; i++ { + btmp := b[i*lda : i*lda+n] + if alpha != 1 { + for j := 0; j < n; j++ { + btmp[j] *= alpha + } + } + for k := n - 1; k >= 0; k-- { + if btmp[k] != 0 { + if nonUnit { + btmp[k] /= a[k*lda+k] + } + f64.AxpyUnitaryTo(btmp, -btmp[k], a[k*lda:k*lda+k], btmp) + } + } + } + return + } + // Cases where a is transposed. + if ul == blas.Upper { + for i := 0; i < m; i++ { + btmp := b[i*lda : i*lda+n] + for j := n - 1; j >= 0; j-- { + tmp := alpha*btmp[j] - f64.DotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:]) + if nonUnit { + tmp /= a[j*lda+j] + } + btmp[j] = tmp + } + } + return + } + for i := 0; i < m; i++ { + btmp := b[i*lda : i*lda+n] + for j := 0; j < n; j++ { + tmp := alpha*btmp[j] - f64.DotUnitary(a[j*lda:j*lda+j], btmp) + if nonUnit { + tmp /= a[j*lda+j] + } + btmp[j] = tmp + } + } +} + +// Dsymm performs one of +// C = alpha * A * B + beta * C, if side == blas.Left, +// C = alpha * B * A + beta * C, if side == blas.Right, +// where A is an n×n or m×m symmetric matrix, B and C are m×n matrices, and alpha +// is a scalar. +func (Implementation) Dsymm(s blas.Side, ul blas.Uplo, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int) { + if s != blas.Right && s != blas.Left { + panic("goblas: bad side") + } + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if m < 0 { + panic(mLT0) + } + if n < 0 { + panic(nLT0) + } + var k int + if s == blas.Left { + k = m + } else { + k = n + } + if lda*(k-1)+k > len(a) || lda < max(1, k) { + panic(badLdA) + } + if ldb*(m-1)+n > len(b) || ldb < max(1, n) { + panic(badLdB) + } + if ldc*(m-1)+n > len(c) || ldc < max(1, n) { + panic(badLdC) + } + if m == 0 || n == 0 { + return + } + if alpha == 0 && beta == 1 { + return + } + if alpha == 0 { + if beta == 0 { + for i := 0; i < m; i++ { + ctmp := c[i*ldc : i*ldc+n] + for j := range ctmp { + ctmp[j] = 0 + } + } + return + } + for i := 0; i < m; i++ { + ctmp := c[i*ldc : i*ldc+n] + for j := 0; j < n; j++ { + ctmp[j] *= beta + } + } + return + } + + isUpper := ul == blas.Upper + if s == blas.Left { + for i := 0; i < m; i++ { + atmp := alpha * a[i*lda+i] + btmp := b[i*ldb : i*ldb+n] + ctmp := c[i*ldc : i*ldc+n] + for j, v := range btmp { + ctmp[j] *= beta + ctmp[j] += atmp * v + } + + for k := 0; k < i; k++ { + var atmp float64 + if isUpper { + atmp = a[k*lda+i] + } else { + atmp = a[i*lda+k] + } + atmp *= alpha + ctmp := c[i*ldc : i*ldc+n] + f64.AxpyUnitaryTo(ctmp, atmp, b[k*ldb:k*ldb+n], ctmp) + } + for k := i + 1; k < m; k++ { + var atmp float64 + if isUpper { + atmp = a[i*lda+k] + } else { + atmp = a[k*lda+i] + } + atmp *= alpha + ctmp := c[i*ldc : i*ldc+n] + f64.AxpyUnitaryTo(ctmp, atmp, b[k*ldb:k*ldb+n], ctmp) + } + } + return + } + if isUpper { + for i := 0; i < m; i++ { + for j := n - 1; j >= 0; j-- { + tmp := alpha * b[i*ldb+j] + var tmp2 float64 + atmp := a[j*lda+j+1 : j*lda+n] + btmp := b[i*ldb+j+1 : i*ldb+n] + ctmp := c[i*ldc+j+1 : i*ldc+n] + for k, v := range atmp { + ctmp[k] += tmp * v + tmp2 += btmp[k] * v + } + c[i*ldc+j] *= beta + c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2 + } + } + return + } + for i := 0; i < m; i++ { + for j := 0; j < n; j++ { + tmp := alpha * b[i*ldb+j] + var tmp2 float64 + atmp := a[j*lda : j*lda+j] + btmp := b[i*ldb : i*ldb+j] + ctmp := c[i*ldc : i*ldc+j] + for k, v := range atmp { + ctmp[k] += tmp * v + tmp2 += btmp[k] * v + } + c[i*ldc+j] *= beta + c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2 + } + } +} + +// Dsyrk performs the symmetric rank-k operation +// C = alpha * A * A^T + beta*C +// C is an n×n symmetric matrix. A is an n×k matrix if tA == blas.NoTrans, and +// a k×n matrix otherwise. alpha and beta are scalars. +func (Implementation) Dsyrk(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float64, a []float64, lda int, beta float64, c []float64, ldc int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans { + panic(badTranspose) + } + if n < 0 { + panic(nLT0) + } + if k < 0 { + panic(kLT0) + } + if ldc < n { + panic(badLdC) + } + var row, col int + if tA == blas.NoTrans { + row, col = n, k + } else { + row, col = k, n + } + if lda*(row-1)+col > len(a) || lda < max(1, col) { + panic(badLdA) + } + if ldc*(n-1)+n > len(c) || ldc < max(1, n) { + panic(badLdC) + } + if alpha == 0 { + if beta == 0 { + if ul == blas.Upper { + for i := 0; i < n; i++ { + ctmp := c[i*ldc+i : i*ldc+n] + for j := range ctmp { + ctmp[j] = 0 + } + } + return + } + for i := 0; i < n; i++ { + ctmp := c[i*ldc : i*ldc+i+1] + for j := range ctmp { + ctmp[j] = 0 + } + } + return + } + if ul == blas.Upper { + for i := 0; i < n; i++ { + ctmp := c[i*ldc+i : i*ldc+n] + for j := range ctmp { + ctmp[j] *= beta + } + } + return + } + for i := 0; i < n; i++ { + ctmp := c[i*ldc : i*ldc+i+1] + for j := range ctmp { + ctmp[j] *= beta + } + } + return + } + if tA == blas.NoTrans { + if ul == blas.Upper { + for i := 0; i < n; i++ { + ctmp := c[i*ldc+i : i*ldc+n] + atmp := a[i*lda : i*lda+k] + for jc, vc := range ctmp { + j := jc + i + ctmp[jc] = vc*beta + alpha*f64.DotUnitary(atmp, a[j*lda:j*lda+k]) + } + } + return + } + for i := 0; i < n; i++ { + atmp := a[i*lda : i*lda+k] + for j, vc := range c[i*ldc : i*ldc+i+1] { + c[i*ldc+j] = vc*beta + alpha*f64.DotUnitary(a[j*lda:j*lda+k], atmp) + } + } + return + } + // Cases where a is transposed. + if ul == blas.Upper { + for i := 0; i < n; i++ { + ctmp := c[i*ldc+i : i*ldc+n] + if beta != 1 { + for j := range ctmp { + ctmp[j] *= beta + } + } + for l := 0; l < k; l++ { + tmp := alpha * a[l*lda+i] + if tmp != 0 { + f64.AxpyUnitaryTo(ctmp, tmp, a[l*lda+i:l*lda+n], ctmp) + } + } + } + return + } + for i := 0; i < n; i++ { + ctmp := c[i*ldc : i*ldc+i+1] + if beta != 0 { + for j := range ctmp { + ctmp[j] *= beta + } + } + for l := 0; l < k; l++ { + tmp := alpha * a[l*lda+i] + if tmp != 0 { + f64.AxpyUnitaryTo(ctmp, tmp, a[l*lda:l*lda+i+1], ctmp) + } + } + } +} + +// Dsyr2k performs the symmetric rank 2k operation +// C = alpha * A * B^T + alpha * B * A^T + beta * C +// where C is an n×n symmetric matrix. A and B are n×k matrices if +// tA == NoTrans and k×n otherwise. alpha and beta are scalars. +func (Implementation) Dsyr2k(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int) { + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans { + panic(badTranspose) + } + if n < 0 { + panic(nLT0) + } + if k < 0 { + panic(kLT0) + } + if ldc < n { + panic(badLdC) + } + var row, col int + if tA == blas.NoTrans { + row, col = n, k + } else { + row, col = k, n + } + if lda*(row-1)+col > len(a) || lda < max(1, col) { + panic(badLdA) + } + if ldb*(row-1)+col > len(b) || ldb < max(1, col) { + panic(badLdB) + } + if ldc*(n-1)+n > len(c) || ldc < max(1, n) { + panic(badLdC) + } + if alpha == 0 { + if beta == 0 { + if ul == blas.Upper { + for i := 0; i < n; i++ { + ctmp := c[i*ldc+i : i*ldc+n] + for j := range ctmp { + ctmp[j] = 0 + } + } + return + } + for i := 0; i < n; i++ { + ctmp := c[i*ldc : i*ldc+i+1] + for j := range ctmp { + ctmp[j] = 0 + } + } + return + } + if ul == blas.Upper { + for i := 0; i < n; i++ { + ctmp := c[i*ldc+i : i*ldc+n] + for j := range ctmp { + ctmp[j] *= beta + } + } + return + } + for i := 0; i < n; i++ { + ctmp := c[i*ldc : i*ldc+i+1] + for j := range ctmp { + ctmp[j] *= beta + } + } + return + } + if tA == blas.NoTrans { + if ul == blas.Upper { + for i := 0; i < n; i++ { + atmp := a[i*lda : i*lda+k] + btmp := b[i*ldb : i*ldb+k] + ctmp := c[i*ldc+i : i*ldc+n] + for jc := range ctmp { + j := i + jc + var tmp1, tmp2 float64 + binner := b[j*ldb : j*ldb+k] + for l, v := range a[j*lda : j*lda+k] { + tmp1 += v * btmp[l] + tmp2 += atmp[l] * binner[l] + } + ctmp[jc] *= beta + ctmp[jc] += alpha * (tmp1 + tmp2) + } + } + return + } + for i := 0; i < n; i++ { + atmp := a[i*lda : i*lda+k] + btmp := b[i*ldb : i*ldb+k] + ctmp := c[i*ldc : i*ldc+i+1] + for j := 0; j <= i; j++ { + var tmp1, tmp2 float64 + binner := b[j*ldb : j*ldb+k] + for l, v := range a[j*lda : j*lda+k] { + tmp1 += v * btmp[l] + tmp2 += atmp[l] * binner[l] + } + ctmp[j] *= beta + ctmp[j] += alpha * (tmp1 + tmp2) + } + } + return + } + if ul == blas.Upper { + for i := 0; i < n; i++ { + ctmp := c[i*ldc+i : i*ldc+n] + if beta != 1 { + for j := range ctmp { + ctmp[j] *= beta + } + } + for l := 0; l < k; l++ { + tmp1 := alpha * b[l*lda+i] + tmp2 := alpha * a[l*lda+i] + btmp := b[l*ldb+i : l*ldb+n] + if tmp1 != 0 || tmp2 != 0 { + for j, v := range a[l*lda+i : l*lda+n] { + ctmp[j] += v*tmp1 + btmp[j]*tmp2 + } + } + } + } + return + } + for i := 0; i < n; i++ { + ctmp := c[i*ldc : i*ldc+i+1] + if beta != 1 { + for j := range ctmp { + ctmp[j] *= beta + } + } + for l := 0; l < k; l++ { + tmp1 := alpha * b[l*lda+i] + tmp2 := alpha * a[l*lda+i] + btmp := b[l*ldb : l*ldb+i+1] + if tmp1 != 0 || tmp2 != 0 { + for j, v := range a[l*lda : l*lda+i+1] { + ctmp[j] += v*tmp1 + btmp[j]*tmp2 + } + } + } + } +} + +// Dtrmm performs +// B = alpha * A * B, if tA == blas.NoTrans and side == blas.Left, +// B = alpha * A^T * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Left, +// B = alpha * B * A, if tA == blas.NoTrans and side == blas.Right, +// B = alpha * B * A^T, if tA == blas.Trans or blas.ConjTrans, and side == blas.Right, +// where A is an n×n or m×m triangular matrix, and B is an m×n matrix. +func (Implementation) Dtrmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int) { + if s != blas.Left && s != blas.Right { + panic(badSide) + } + if ul != blas.Lower && ul != blas.Upper { + panic(badUplo) + } + if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { + panic(badTranspose) + } + if d != blas.NonUnit && d != blas.Unit { + panic(badDiag) + } + if m < 0 { + panic(mLT0) + } + if n < 0 { + panic(nLT0) + } + var k int + if s == blas.Left { + k = m + } else { + k = n + } + if lda*(k-1)+k > len(a) || lda < max(1, k) { + panic(badLdA) + } + if ldb*(m-1)+n > len(b) || ldb < max(1, n) { + panic(badLdB) + } + if alpha == 0 { + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + for j := range btmp { + btmp[j] = 0 + } + } + return + } + + nonUnit := d == blas.NonUnit + if s == blas.Left { + if tA == blas.NoTrans { + if ul == blas.Upper { + for i := 0; i < m; i++ { + tmp := alpha + if nonUnit { + tmp *= a[i*lda+i] + } + btmp := b[i*ldb : i*ldb+n] + for j := range btmp { + btmp[j] *= tmp + } + for ka, va := range a[i*lda+i+1 : i*lda+m] { + k := ka + i + 1 + tmp := alpha * va + if tmp != 0 { + f64.AxpyUnitaryTo(btmp, tmp, b[k*ldb:k*ldb+n], btmp) + } + } + } + return + } + for i := m - 1; i >= 0; i-- { + tmp := alpha + if nonUnit { + tmp *= a[i*lda+i] + } + btmp := b[i*ldb : i*ldb+n] + for j := range btmp { + btmp[j] *= tmp + } + for k, va := range a[i*lda : i*lda+i] { + tmp := alpha * va + if tmp != 0 { + f64.AxpyUnitaryTo(btmp, tmp, b[k*ldb:k*ldb+n], btmp) + } + } + } + return + } + // Cases where a is transposed. + if ul == blas.Upper { + for k := m - 1; k >= 0; k-- { + btmpk := b[k*ldb : k*ldb+n] + for ia, va := range a[k*lda+k+1 : k*lda+m] { + i := ia + k + 1 + btmp := b[i*ldb : i*ldb+n] + tmp := alpha * va + if tmp != 0 { + f64.AxpyUnitaryTo(btmp, tmp, btmpk, btmp) + } + } + tmp := alpha + if nonUnit { + tmp *= a[k*lda+k] + } + if tmp != 1 { + for j := 0; j < n; j++ { + btmpk[j] *= tmp + } + } + } + return + } + for k := 0; k < m; k++ { + btmpk := b[k*ldb : k*ldb+n] + for i, va := range a[k*lda : k*lda+k] { + btmp := b[i*ldb : i*ldb+n] + tmp := alpha * va + if tmp != 0 { + f64.AxpyUnitaryTo(btmp, tmp, btmpk, btmp) + } + } + tmp := alpha + if nonUnit { + tmp *= a[k*lda+k] + } + if tmp != 1 { + for j := 0; j < n; j++ { + btmpk[j] *= tmp + } + } + } + return + } + // Cases where a is on the right + if tA == blas.NoTrans { + if ul == blas.Upper { + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + for k := n - 1; k >= 0; k-- { + tmp := alpha * btmp[k] + if tmp != 0 { + btmp[k] = tmp + if nonUnit { + btmp[k] *= a[k*lda+k] + } + for ja, v := range a[k*lda+k+1 : k*lda+n] { + j := ja + k + 1 + btmp[j] += tmp * v + } + } + } + } + return + } + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + for k := 0; k < n; k++ { + tmp := alpha * btmp[k] + if tmp != 0 { + btmp[k] = tmp + if nonUnit { + btmp[k] *= a[k*lda+k] + } + f64.AxpyUnitaryTo(btmp, tmp, a[k*lda:k*lda+k], btmp) + } + } + } + return + } + // Cases where a is transposed. + if ul == blas.Upper { + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + for j, vb := range btmp { + tmp := vb + if nonUnit { + tmp *= a[j*lda+j] + } + tmp += f64.DotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:n]) + btmp[j] = alpha * tmp + } + } + return + } + for i := 0; i < m; i++ { + btmp := b[i*ldb : i*ldb+n] + for j := n - 1; j >= 0; j-- { + tmp := btmp[j] + if nonUnit { + tmp *= a[j*lda+j] + } + tmp += f64.DotUnitary(a[j*lda:j*lda+j], btmp[:j]) + btmp[j] = alpha * tmp + } + } +}