OSDN Git Service

Hulk did something
[bytom/vapor.git] / vendor / gonum.org / v1 / gonum / blas / gonum / level2cmplx128.go
diff --git a/vendor/gonum.org/v1/gonum/blas/gonum/level2cmplx128.go b/vendor/gonum.org/v1/gonum/blas/gonum/level2cmplx128.go
new file mode 100644 (file)
index 0000000..321dcc8
--- /dev/null
@@ -0,0 +1,1542 @@
+// Copyright ©2017 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+       "math/cmplx"
+
+       "gonum.org/v1/gonum/blas"
+       "gonum.org/v1/gonum/internal/asm/c128"
+)
+
+// Zgemv performs one of the matrix-vector operations
+//  y = alpha * A * x + beta * y    if trans = blas.NoTrans
+//  y = alpha * A^T * x + beta * y  if trans = blas.Trans
+//  y = alpha * A^H * x + beta * y  if trans = blas.ConjTrans
+// where alpha and beta are scalars, x and y are vectors, and A is an m×n dense matrix.
+func (Implementation) Zgemv(trans blas.Transpose, m, n int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int) {
+       checkZMatrix('A', m, n, a, lda)
+       switch trans {
+       default:
+               panic(badTranspose)
+       case blas.NoTrans:
+               checkZVector('x', n, x, incX)
+               checkZVector('y', m, y, incY)
+       case blas.Trans, blas.ConjTrans:
+               checkZVector('x', m, x, incX)
+               checkZVector('y', n, y, incY)
+       }
+
+       if m == 0 || n == 0 || (alpha == 0 && beta == 1) {
+               return
+       }
+
+       var lenX, lenY int
+       if trans == blas.NoTrans {
+               lenX = n
+               lenY = m
+       } else {
+               lenX = m
+               lenY = n
+       }
+       var kx int
+       if incX < 0 {
+               kx = (1 - lenX) * incX
+       }
+       var ky int
+       if incY < 0 {
+               ky = (1 - lenY) * incY
+       }
+
+       // Form y = beta*y.
+       if beta != 1 {
+               if incY == 1 {
+                       if beta == 0 {
+                               for i := range y[:lenY] {
+                                       y[i] = 0
+                               }
+                       } else {
+                               c128.ScalUnitary(beta, y[:lenY])
+                       }
+               } else {
+                       iy := ky
+                       if beta == 0 {
+                               for i := 0; i < lenY; i++ {
+                                       y[iy] = 0
+                                       iy += incY
+                               }
+                       } else {
+                               if incY > 0 {
+                                       c128.ScalInc(beta, y, uintptr(lenY), uintptr(incY))
+                               } else {
+                                       c128.ScalInc(beta, y, uintptr(lenY), uintptr(-incY))
+                               }
+                       }
+               }
+       }
+
+       if alpha == 0 {
+               return
+       }
+
+       switch trans {
+       default:
+               // Form y = alpha*A*x + y.
+               iy := ky
+               if incX == 1 {
+                       for i := 0; i < m; i++ {
+                               y[iy] += alpha * c128.DotuUnitary(a[i*lda:i*lda+n], x[:n])
+                               iy += incY
+                       }
+                       return
+               }
+               for i := 0; i < m; i++ {
+                       y[iy] += alpha * c128.DotuInc(a[i*lda:i*lda+n], x, uintptr(n), 1, uintptr(incX), 0, uintptr(kx))
+                       iy += incY
+               }
+               return
+
+       case blas.Trans:
+               // Form y = alpha*A^T*x + y.
+               ix := kx
+               if incY == 1 {
+                       for i := 0; i < m; i++ {
+                               c128.AxpyUnitary(alpha*x[ix], a[i*lda:i*lda+n], y[:n])
+                               ix += incX
+                       }
+                       return
+               }
+               for i := 0; i < m; i++ {
+                       c128.AxpyInc(alpha*x[ix], a[i*lda:i*lda+n], y, uintptr(n), 1, uintptr(incY), 0, uintptr(ky))
+                       ix += incX
+               }
+               return
+
+       case blas.ConjTrans:
+               // Form y = alpha*A^H*x + y.
+               ix := kx
+               if incY == 1 {
+                       for i := 0; i < m; i++ {
+                               tmp := alpha * x[ix]
+                               for j := 0; j < n; j++ {
+                                       y[j] += tmp * cmplx.Conj(a[i*lda+j])
+                               }
+                               ix += incX
+                       }
+                       return
+               }
+               for i := 0; i < m; i++ {
+                       tmp := alpha * x[ix]
+                       jy := ky
+                       for j := 0; j < n; j++ {
+                               y[jy] += tmp * cmplx.Conj(a[i*lda+j])
+                               jy += incY
+                       }
+                       ix += incX
+               }
+               return
+       }
+}
+
+// Zgerc performs the rank-one operation
+//  A += alpha * x * y^H
+// where A is an m×n dense matrix, alpha is a scalar, x is an m element vector,
+// and y is an n element vector.
+func (Implementation) Zgerc(m, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, a []complex128, lda int) {
+       checkZMatrix('A', m, n, a, lda)
+       checkZVector('x', m, x, incX)
+       checkZVector('y', n, y, incY)
+
+       if m == 0 || n == 0 || alpha == 0 {
+               return
+       }
+
+       var kx, jy int
+       if incX < 0 {
+               kx = (1 - m) * incX
+       }
+       if incY < 0 {
+               jy = (1 - n) * incY
+       }
+       for j := 0; j < n; j++ {
+               if y[jy] != 0 {
+                       tmp := alpha * cmplx.Conj(y[jy])
+                       c128.AxpyInc(tmp, x, a[j:], uintptr(m), uintptr(incX), uintptr(lda), uintptr(kx), 0)
+               }
+               jy += incY
+       }
+}
+
+// Zgeru performs the rank-one operation
+//  A += alpha * x * y^T
+// where A is an m×n dense matrix, alpha is a scalar, x is an m element vector,
+// and y is an n element vector.
+func (Implementation) Zgeru(m, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, a []complex128, lda int) {
+       checkZMatrix('A', m, n, a, lda)
+       checkZVector('x', m, x, incX)
+       checkZVector('y', n, y, incY)
+
+       if m == 0 || n == 0 || alpha == 0 {
+               return
+       }
+
+       var kx int
+       if incX < 0 {
+               kx = (1 - m) * incX
+       }
+       if incY == 1 {
+               for i := 0; i < m; i++ {
+                       if x[kx] != 0 {
+                               tmp := alpha * x[kx]
+                               c128.AxpyUnitary(tmp, y[:n], a[i*lda:i*lda+n])
+                       }
+                       kx += incX
+               }
+               return
+       }
+       var jy int
+       if incY < 0 {
+               jy = (1 - n) * incY
+       }
+       for i := 0; i < m; i++ {
+               if x[kx] != 0 {
+                       tmp := alpha * x[kx]
+                       c128.AxpyInc(tmp, y, a[i*lda:i*lda+n], uintptr(n), uintptr(incY), 1, uintptr(jy), 0)
+               }
+               kx += incX
+       }
+}
+
+// Zhemv performs the matrix-vector operation
+//  y = alpha * A * x + beta * y
+// where alpha and beta are scalars, x and y are vectors, and A is an n×n
+// Hermitian matrix. The imaginary parts of the diagonal elements of A are
+// ignored and assumed to be zero.
+func (Implementation) Zhemv(uplo blas.Uplo, n int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int) {
+       if uplo != blas.Upper && uplo != blas.Lower {
+               panic(badUplo)
+       }
+       checkZMatrix('A', n, n, a, lda)
+       checkZVector('x', n, x, incX)
+       checkZVector('y', n, y, incY)
+
+       if n == 0 || (alpha == 0 && beta == 1) {
+               return
+       }
+
+       // Set up the start indices in X and Y.
+       var kx int
+       if incX < 0 {
+               kx = (1 - n) * incX
+       }
+       var ky int
+       if incY < 0 {
+               ky = (1 - n) * incY
+       }
+
+       // Form y = beta*y.
+       if beta != 1 {
+               if incY == 1 {
+                       if beta == 0 {
+                               for i := range y[:n] {
+                                       y[i] = 0
+                               }
+                       } else {
+                               for i, v := range y[:n] {
+                                       y[i] = beta * v
+                               }
+                       }
+               } else {
+                       iy := ky
+                       if beta == 0 {
+                               for i := 0; i < n; i++ {
+                                       y[iy] = 0
+                                       iy += incY
+                               }
+                       } else {
+                               for i := 0; i < n; i++ {
+                                       y[iy] = beta * y[iy]
+                                       iy += incY
+                               }
+                       }
+               }
+       }
+
+       if alpha == 0 {
+               return
+       }
+
+       // The elements of A are accessed sequentially with one pass through
+       // the triangular part of A.
+
+       if uplo == blas.Upper {
+               // Form y when A is stored in upper triangle.
+               if incX == 1 && incY == 1 {
+                       for i := 0; i < n; i++ {
+                               tmp1 := alpha * x[i]
+                               var tmp2 complex128
+                               for j := i + 1; j < n; j++ {
+                                       y[j] += tmp1 * cmplx.Conj(a[i*lda+j])
+                                       tmp2 += a[i*lda+j] * x[j]
+                               }
+                               aii := complex(real(a[i*lda+i]), 0)
+                               y[i] += tmp1*aii + alpha*tmp2
+                       }
+               } else {
+                       ix := kx
+                       iy := ky
+                       for i := 0; i < n; i++ {
+                               tmp1 := alpha * x[ix]
+                               var tmp2 complex128
+                               jx := ix
+                               jy := iy
+                               for j := i + 1; j < n; j++ {
+                                       jx += incX
+                                       jy += incY
+                                       y[jy] += tmp1 * cmplx.Conj(a[i*lda+j])
+                                       tmp2 += a[i*lda+j] * x[jx]
+                               }
+                               aii := complex(real(a[i*lda+i]), 0)
+                               y[iy] += tmp1*aii + alpha*tmp2
+                               ix += incX
+                               iy += incY
+                       }
+               }
+               return
+       }
+
+       // Form y when A is stored in lower triangle.
+       if incX == 1 && incY == 1 {
+               for i := 0; i < n; i++ {
+                       tmp1 := alpha * x[i]
+                       var tmp2 complex128
+                       for j := 0; j < i; j++ {
+                               y[j] += tmp1 * cmplx.Conj(a[i*lda+j])
+                               tmp2 += a[i*lda+j] * x[j]
+                       }
+                       aii := complex(real(a[i*lda+i]), 0)
+                       y[i] += tmp1*aii + alpha*tmp2
+               }
+       } else {
+               ix := kx
+               iy := ky
+               for i := 0; i < n; i++ {
+                       tmp1 := alpha * x[ix]
+                       var tmp2 complex128
+                       jx := kx
+                       jy := ky
+                       for j := 0; j < i; j++ {
+                               y[jy] += tmp1 * cmplx.Conj(a[i*lda+j])
+                               tmp2 += a[i*lda+j] * x[jx]
+                               jx += incX
+                               jy += incY
+                       }
+                       aii := complex(real(a[i*lda+i]), 0)
+                       y[iy] += tmp1*aii + alpha*tmp2
+                       ix += incX
+                       iy += incY
+               }
+       }
+}
+
+// Zher performs the Hermitian rank-one operation
+//  A += alpha * x * x^H
+// where A is an n×n Hermitian matrix, alpha is a real scalar, and x is an n
+// element vector. On entry, the imaginary parts of the diagonal elements of A
+// are ignored and assumed to be zero, on return they will be set to zero.
+func (Implementation) Zher(uplo blas.Uplo, n int, alpha float64, x []complex128, incX int, a []complex128, lda int) {
+       if uplo != blas.Upper && uplo != blas.Lower {
+               panic(badUplo)
+       }
+       checkZMatrix('A', n, n, a, lda)
+       checkZVector('x', n, x, incX)
+
+       if n == 0 || alpha == 0 {
+               return
+       }
+
+       var kx int
+       if incX < 0 {
+               kx = (1 - n) * incX
+       }
+       if uplo == blas.Upper {
+               if incX == 1 {
+                       for i := 0; i < n; i++ {
+                               if x[i] != 0 {
+                                       tmp := complex(alpha*real(x[i]), alpha*imag(x[i]))
+                                       aii := real(a[i*lda+i])
+                                       xtmp := real(tmp * cmplx.Conj(x[i]))
+                                       a[i*lda+i] = complex(aii+xtmp, 0)
+                                       for j := i + 1; j < n; j++ {
+                                               a[i*lda+j] += tmp * cmplx.Conj(x[j])
+                                       }
+                               } else {
+                                       aii := real(a[i*lda+i])
+                                       a[i*lda+i] = complex(aii, 0)
+                               }
+                       }
+                       return
+               }
+
+               ix := kx
+               for i := 0; i < n; i++ {
+                       if x[ix] != 0 {
+                               tmp := complex(alpha*real(x[ix]), alpha*imag(x[ix]))
+                               aii := real(a[i*lda+i])
+                               xtmp := real(tmp * cmplx.Conj(x[ix]))
+                               a[i*lda+i] = complex(aii+xtmp, 0)
+                               jx := ix + incX
+                               for j := i + 1; j < n; j++ {
+                                       a[i*lda+j] += tmp * cmplx.Conj(x[jx])
+                                       jx += incX
+                               }
+                       } else {
+                               aii := real(a[i*lda+i])
+                               a[i*lda+i] = complex(aii, 0)
+                       }
+                       ix += incX
+               }
+               return
+       }
+
+       if incX == 1 {
+               for i := 0; i < n; i++ {
+                       if x[i] != 0 {
+                               tmp := complex(alpha*real(x[i]), alpha*imag(x[i]))
+                               for j := 0; j < i; j++ {
+                                       a[i*lda+j] += tmp * cmplx.Conj(x[j])
+                               }
+                               aii := real(a[i*lda+i])
+                               xtmp := real(tmp * cmplx.Conj(x[i]))
+                               a[i*lda+i] = complex(aii+xtmp, 0)
+                       } else {
+                               aii := real(a[i*lda+i])
+                               a[i*lda+i] = complex(aii, 0)
+                       }
+               }
+               return
+       }
+
+       ix := kx
+       for i := 0; i < n; i++ {
+               if x[ix] != 0 {
+                       tmp := complex(alpha*real(x[ix]), alpha*imag(x[ix]))
+                       jx := kx
+                       for j := 0; j < i; j++ {
+                               a[i*lda+j] += tmp * cmplx.Conj(x[jx])
+                               jx += incX
+                       }
+                       aii := real(a[i*lda+i])
+                       xtmp := real(tmp * cmplx.Conj(x[ix]))
+                       a[i*lda+i] = complex(aii+xtmp, 0)
+
+               } else {
+                       aii := real(a[i*lda+i])
+                       a[i*lda+i] = complex(aii, 0)
+               }
+               ix += incX
+       }
+}
+
+// Zher2 performs the Hermitian rank-two operation
+//  A += alpha*x*y^H + conj(alpha)*y*x^H
+// where alpha is a scalar, x and y are n element vectors and A is an n×n
+// Hermitian matrix. On entry, the imaginary parts of the diagonal elements are
+// ignored and assumed to be zero. On return they will be set to zero.
+func (Implementation) Zher2(uplo blas.Uplo, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, a []complex128, lda int) {
+       if uplo != blas.Upper && uplo != blas.Lower {
+               panic(badUplo)
+       }
+       checkZMatrix('A', n, n, a, lda)
+       checkZVector('x', n, x, incX)
+       checkZVector('y', n, y, incY)
+
+       if n == 0 || alpha == 0 {
+               return
+       }
+
+       var kx, ky int
+       var ix, iy int
+       if incX != 1 || incY != 1 {
+               if incX < 0 {
+                       kx = (1 - n) * incX
+               }
+               if incY < 0 {
+                       ky = (1 - n) * incY
+               }
+               ix = kx
+               iy = ky
+       }
+       if uplo == blas.Upper {
+               if incX == 1 && incY == 1 {
+                       for i := 0; i < n; i++ {
+                               if x[i] != 0 || y[i] != 0 {
+                                       tmp1 := alpha * x[i]
+                                       tmp2 := cmplx.Conj(alpha) * y[i]
+                                       aii := real(a[i*lda+i]) + real(tmp1*cmplx.Conj(y[i])) + real(tmp2*cmplx.Conj(x[i]))
+                                       a[i*lda+i] = complex(aii, 0)
+                                       for j := i + 1; j < n; j++ {
+                                               a[i*lda+j] += tmp1*cmplx.Conj(y[j]) + tmp2*cmplx.Conj(x[j])
+                                       }
+                               } else {
+                                       aii := real(a[i*lda+i])
+                                       a[i*lda+i] = complex(aii, 0)
+                               }
+                       }
+                       return
+               }
+               for i := 0; i < n; i++ {
+                       if x[ix] != 0 || y[iy] != 0 {
+                               tmp1 := alpha * x[ix]
+                               tmp2 := cmplx.Conj(alpha) * y[iy]
+                               aii := real(a[i*lda+i]) + real(tmp1*cmplx.Conj(y[iy])) + real(tmp2*cmplx.Conj(x[ix]))
+                               a[i*lda+i] = complex(aii, 0)
+                               jx := ix + incX
+                               jy := iy + incY
+                               for j := i + 1; j < n; j++ {
+                                       a[i*lda+j] += tmp1*cmplx.Conj(y[jy]) + tmp2*cmplx.Conj(x[jx])
+                                       jx += incX
+                                       jy += incY
+                               }
+                       } else {
+                               aii := real(a[i*lda+i])
+                               a[i*lda+i] = complex(aii, 0)
+                       }
+                       ix += incX
+                       iy += incY
+               }
+               return
+       }
+
+       if incX == 1 && incY == 1 {
+               for i := 0; i < n; i++ {
+                       if x[i] != 0 || y[i] != 0 {
+                               tmp1 := alpha * x[i]
+                               tmp2 := cmplx.Conj(alpha) * y[i]
+                               for j := 0; j < i; j++ {
+                                       a[i*lda+j] += tmp1*cmplx.Conj(y[j]) + tmp2*cmplx.Conj(x[j])
+                               }
+                               aii := real(a[i*lda+i]) + real(tmp1*cmplx.Conj(y[i])) + real(tmp2*cmplx.Conj(x[i]))
+                               a[i*lda+i] = complex(aii, 0)
+                       } else {
+                               aii := real(a[i*lda+i])
+                               a[i*lda+i] = complex(aii, 0)
+                       }
+               }
+               return
+       }
+       for i := 0; i < n; i++ {
+               if x[ix] != 0 || y[iy] != 0 {
+                       tmp1 := alpha * x[ix]
+                       tmp2 := cmplx.Conj(alpha) * y[iy]
+                       jx := kx
+                       jy := ky
+                       for j := 0; j < i; j++ {
+                               a[i*lda+j] += tmp1*cmplx.Conj(y[jy]) + tmp2*cmplx.Conj(x[jx])
+                               jx += incX
+                               jy += incY
+                       }
+                       aii := real(a[i*lda+i]) + real(tmp1*cmplx.Conj(y[iy])) + real(tmp2*cmplx.Conj(x[ix]))
+                       a[i*lda+i] = complex(aii, 0)
+               } else {
+                       aii := real(a[i*lda+i])
+                       a[i*lda+i] = complex(aii, 0)
+               }
+               ix += incX
+               iy += incY
+       }
+}
+
+// Zhpmv performs the matrix-vector operation
+//  y = alpha * A * x + beta * y
+// where alpha and beta are scalars, x and y are vectors, and A is an n×n
+// Hermitian matrix in packed form. The imaginary parts of the diagonal
+// elements of A are ignored and assumed to be zero.
+func (Implementation) Zhpmv(uplo blas.Uplo, n int, alpha complex128, ap []complex128, x []complex128, incX int, beta complex128, y []complex128, incY int) {
+       if uplo != blas.Upper && uplo != blas.Lower {
+               panic(badUplo)
+       }
+       checkZVector('x', n, x, incX)
+       checkZVector('y', n, y, incY)
+       if len(ap) < n*(n+1)/2 {
+               panic("blas: insufficient A packed matrix slice length")
+       }
+
+       if n == 0 || (alpha == 0 && beta == 1) {
+               return
+       }
+
+       // Set up the start indices in X and Y.
+       var kx int
+       if incX < 0 {
+               kx = (1 - n) * incX
+       }
+       var ky int
+       if incY < 0 {
+               ky = (1 - n) * incY
+       }
+
+       // Form y = beta*y.
+       if beta != 1 {
+               if incY == 1 {
+                       if beta == 0 {
+                               for i := range y[:n] {
+                                       y[i] = 0
+                               }
+                       } else {
+                               for i, v := range y[:n] {
+                                       y[i] = beta * v
+                               }
+                       }
+               } else {
+                       iy := ky
+                       if beta == 0 {
+                               for i := 0; i < n; i++ {
+                                       y[iy] = 0
+                                       iy += incY
+                               }
+                       } else {
+                               for i := 0; i < n; i++ {
+                                       y[iy] *= beta
+                                       iy += incY
+                               }
+                       }
+               }
+       }
+
+       if alpha == 0 {
+               return
+       }
+
+       // The elements of A are accessed sequentially with one pass through ap.
+
+       var kk int
+       if uplo == blas.Upper {
+               // Form y when ap contains the upper triangle.
+               // Here, kk points to the current diagonal element in ap.
+               if incX == 1 && incY == 1 {
+                       for i := 0; i < n; i++ {
+                               tmp1 := alpha * x[i]
+                               y[i] += tmp1 * complex(real(ap[kk]), 0)
+                               var tmp2 complex128
+                               k := kk + 1
+                               for j := i + 1; j < n; j++ {
+                                       y[j] += tmp1 * cmplx.Conj(ap[k])
+                                       tmp2 += ap[k] * x[j]
+                                       k++
+                               }
+                               y[i] += alpha * tmp2
+                               kk += n - i
+                       }
+               } else {
+                       ix := kx
+                       iy := ky
+                       for i := 0; i < n; i++ {
+                               tmp1 := alpha * x[ix]
+                               y[iy] += tmp1 * complex(real(ap[kk]), 0)
+                               var tmp2 complex128
+                               jx := ix
+                               jy := iy
+                               for k := kk + 1; k < kk+n-i; k++ {
+                                       jx += incX
+                                       jy += incY
+                                       y[jy] += tmp1 * cmplx.Conj(ap[k])
+                                       tmp2 += ap[k] * x[jx]
+                               }
+                               y[iy] += alpha * tmp2
+                               ix += incX
+                               iy += incY
+                               kk += n - i
+                       }
+               }
+               return
+       }
+
+       // Form y when ap contains the lower triangle.
+       // Here, kk points to the beginning of current row in ap.
+       if incX == 1 && incY == 1 {
+               for i := 0; i < n; i++ {
+                       tmp1 := alpha * x[i]
+                       var tmp2 complex128
+                       k := kk
+                       for j := 0; j < i; j++ {
+                               y[j] += tmp1 * cmplx.Conj(ap[k])
+                               tmp2 += ap[k] * x[j]
+                               k++
+                       }
+                       aii := complex(real(ap[kk+i]), 0)
+                       y[i] += tmp1*aii + alpha*tmp2
+                       kk += i + 1
+               }
+       } else {
+               ix := kx
+               iy := ky
+               for i := 0; i < n; i++ {
+                       tmp1 := alpha * x[ix]
+                       var tmp2 complex128
+                       jx := kx
+                       jy := ky
+                       for k := kk; k < kk+i; k++ {
+                               y[jy] += tmp1 * cmplx.Conj(ap[k])
+                               tmp2 += ap[k] * x[jx]
+                               jx += incX
+                               jy += incY
+                       }
+                       aii := complex(real(ap[kk+i]), 0)
+                       y[iy] += tmp1*aii + alpha*tmp2
+                       ix += incX
+                       iy += incY
+                       kk += i + 1
+               }
+       }
+}
+
+// Zhpr performs the Hermitian rank-1 operation
+//  A += alpha * x * x^H,
+// where alpha is a real scalar, x is a vector, and A is an n×n hermitian matrix
+// in packed form. On entry, the imaginary parts of the diagonal elements are
+// assumed to be zero, and on return they are set to zero.
+func (Implementation) Zhpr(uplo blas.Uplo, n int, alpha float64, x []complex128, incX int, ap []complex128) {
+       if uplo != blas.Upper && uplo != blas.Lower {
+               panic(badUplo)
+       }
+       if n < 0 {
+               panic(nLT0)
+       }
+       checkZVector('x', n, x, incX)
+       if len(ap) < n*(n+1)/2 {
+               panic("blas: insufficient A packed matrix slice length")
+       }
+
+       if n == 0 || alpha == 0 {
+               return
+       }
+
+       // Set up start index in X.
+       var kx int
+       if incX < 0 {
+               kx = (1 - n) * incX
+       }
+
+       // The elements of A are accessed sequentially with one pass through ap.
+
+       var kk int
+       if uplo == blas.Upper {
+               // Form A when upper triangle is stored in AP.
+               // Here, kk points to the current diagonal element in ap.
+               if incX == 1 {
+                       for i := 0; i < n; i++ {
+                               xi := x[i]
+                               if xi != 0 {
+                                       aii := real(ap[kk]) + alpha*real(cmplx.Conj(xi)*xi)
+                                       ap[kk] = complex(aii, 0)
+
+                                       tmp := complex(alpha, 0) * xi
+                                       a := ap[kk+1 : kk+n-i]
+                                       x := x[i+1 : n]
+                                       for j, v := range x {
+                                               a[j] += tmp * cmplx.Conj(v)
+                                       }
+                               } else {
+                                       ap[kk] = complex(real(ap[kk]), 0)
+                               }
+                               kk += n - i
+                       }
+               } else {
+                       ix := kx
+                       for i := 0; i < n; i++ {
+                               xi := x[ix]
+                               if xi != 0 {
+                                       aii := real(ap[kk]) + alpha*real(cmplx.Conj(xi)*xi)
+                                       ap[kk] = complex(aii, 0)
+
+                                       tmp := complex(alpha, 0) * xi
+                                       jx := ix + incX
+                                       a := ap[kk+1 : kk+n-i]
+                                       for k := range a {
+                                               a[k] += tmp * cmplx.Conj(x[jx])
+                                               jx += incX
+                                       }
+                               } else {
+                                       ap[kk] = complex(real(ap[kk]), 0)
+                               }
+                               ix += incX
+                               kk += n - i
+                       }
+               }
+               return
+       }
+
+       // Form A when lower triangle is stored in AP.
+       // Here, kk points to the beginning of current row in ap.
+       if incX == 1 {
+               for i := 0; i < n; i++ {
+                       xi := x[i]
+                       if xi != 0 {
+                               tmp := complex(alpha, 0) * xi
+                               a := ap[kk : kk+i]
+                               for j, v := range x[:i] {
+                                       a[j] += tmp * cmplx.Conj(v)
+                               }
+
+                               aii := real(ap[kk+i]) + alpha*real(cmplx.Conj(xi)*xi)
+                               ap[kk+i] = complex(aii, 0)
+                       } else {
+                               ap[kk+i] = complex(real(ap[kk+i]), 0)
+                       }
+                       kk += i + 1
+               }
+       } else {
+               ix := kx
+               for i := 0; i < n; i++ {
+                       xi := x[ix]
+                       if xi != 0 {
+                               tmp := complex(alpha, 0) * xi
+                               a := ap[kk : kk+i]
+                               jx := kx
+                               for k := range a {
+                                       a[k] += tmp * cmplx.Conj(x[jx])
+                                       jx += incX
+                               }
+
+                               aii := real(ap[kk+i]) + alpha*real(cmplx.Conj(xi)*xi)
+                               ap[kk+i] = complex(aii, 0)
+                       } else {
+                               ap[kk+i] = complex(real(ap[kk+i]), 0)
+                       }
+                       ix += incX
+                       kk += i + 1
+               }
+       }
+}
+
+// Zhpr2 performs the Hermitian rank-2 operation
+//  A += alpha*x*y^H + conj(alpha)*y*x^H,
+// where alpha is a complex scalar, x and y are n element vectors, and A is an
+// n×n Hermitian matrix, supplied in packed form. On entry, the imaginary parts
+// of the diagonal elements are assumed to be zero, and on return they are set to zero.
+func (Implementation) Zhpr2(uplo blas.Uplo, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, ap []complex128) {
+       if uplo != blas.Upper && uplo != blas.Lower {
+               panic(badUplo)
+       }
+       if n < 0 {
+               panic(nLT0)
+       }
+       checkZVector('x', n, x, incX)
+       checkZVector('y', n, y, incY)
+       if len(ap) < n*(n+1)/2 {
+               panic("blas: insufficient A packed matrix slice length")
+       }
+
+       if n == 0 || alpha == 0 {
+               return
+       }
+
+       // Set up start indices in X and Y.
+       var kx int
+       if incX < 0 {
+               kx = (1 - n) * incX
+       }
+       var ky int
+       if incY < 0 {
+               ky = (1 - n) * incY
+       }
+
+       // The elements of A are accessed sequentially with one pass through ap.
+
+       var kk int
+       if uplo == blas.Upper {
+               // Form A when upper triangle is stored in AP.
+               // Here, kk points to the current diagonal element in ap.
+               if incX == 1 && incY == 1 {
+                       for i := 0; i < n; i++ {
+                               if x[i] != 0 || y[i] != 0 {
+                                       tmp1 := alpha * x[i]
+                                       tmp2 := cmplx.Conj(alpha) * y[i]
+                                       aii := real(ap[kk]) + real(tmp1*cmplx.Conj(y[i])) + real(tmp2*cmplx.Conj(x[i]))
+                                       ap[kk] = complex(aii, 0)
+                                       k := kk + 1
+                                       for j := i + 1; j < n; j++ {
+                                               ap[k] += tmp1*cmplx.Conj(y[j]) + tmp2*cmplx.Conj(x[j])
+                                               k++
+                                       }
+                               } else {
+                                       ap[kk] = complex(real(ap[kk]), 0)
+                               }
+                               kk += n - i
+                       }
+               } else {
+                       ix := kx
+                       iy := ky
+                       for i := 0; i < n; i++ {
+                               if x[ix] != 0 || y[iy] != 0 {
+                                       tmp1 := alpha * x[ix]
+                                       tmp2 := cmplx.Conj(alpha) * y[iy]
+                                       aii := real(ap[kk]) + real(tmp1*cmplx.Conj(y[iy])) + real(tmp2*cmplx.Conj(x[ix]))
+                                       ap[kk] = complex(aii, 0)
+                                       jx := ix + incX
+                                       jy := iy + incY
+                                       for k := kk + 1; k < kk+n-i; k++ {
+                                               ap[k] += tmp1*cmplx.Conj(y[jy]) + tmp2*cmplx.Conj(x[jx])
+                                               jx += incX
+                                               jy += incY
+                                       }
+                               } else {
+                                       ap[kk] = complex(real(ap[kk]), 0)
+                               }
+                               ix += incX
+                               iy += incY
+                               kk += n - i
+                       }
+               }
+               return
+       }
+
+       // Form A when lower triangle is stored in AP.
+       // Here, kk points to the beginning of current row in ap.
+       if incX == 1 && incY == 1 {
+               for i := 0; i < n; i++ {
+                       if x[i] != 0 || y[i] != 0 {
+                               tmp1 := alpha * x[i]
+                               tmp2 := cmplx.Conj(alpha) * y[i]
+                               k := kk
+                               for j := 0; j < i; j++ {
+                                       ap[k] += tmp1*cmplx.Conj(y[j]) + tmp2*cmplx.Conj(x[j])
+                                       k++
+                               }
+                               aii := real(ap[kk+i]) + real(tmp1*cmplx.Conj(y[i])) + real(tmp2*cmplx.Conj(x[i]))
+                               ap[kk+i] = complex(aii, 0)
+                       } else {
+                               ap[kk+i] = complex(real(ap[kk+i]), 0)
+                       }
+                       kk += i + 1
+               }
+       } else {
+               ix := kx
+               iy := ky
+               for i := 0; i < n; i++ {
+                       if x[ix] != 0 || y[iy] != 0 {
+                               tmp1 := alpha * x[ix]
+                               tmp2 := cmplx.Conj(alpha) * y[iy]
+                               jx := kx
+                               jy := ky
+                               for k := kk; k < kk+i; k++ {
+                                       ap[k] += tmp1*cmplx.Conj(y[jy]) + tmp2*cmplx.Conj(x[jx])
+                                       jx += incX
+                                       jy += incY
+                               }
+                               aii := real(ap[kk+i]) + real(tmp1*cmplx.Conj(y[iy])) + real(tmp2*cmplx.Conj(x[ix]))
+                               ap[kk+i] = complex(aii, 0)
+                       } else {
+                               ap[kk+i] = complex(real(ap[kk+i]), 0)
+                       }
+                       ix += incX
+                       iy += incY
+                       kk += i + 1
+               }
+       }
+}
+
+// Ztpmv performs one of the matrix-vector operations
+//  x = A * x    if trans = blas.NoTrans
+//  x = A^T * x  if trans = blas.Trans
+//  x = A^H * x  if trans = blas.ConjTrans
+// where x is an n element vector and A is an n×n triangular matrix, supplied in
+// packed form.
+func (Implementation) Ztpmv(uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, n int, ap []complex128, x []complex128, incX int) {
+       if uplo != blas.Upper && uplo != blas.Lower {
+               panic(badUplo)
+       }
+       if trans != blas.NoTrans && trans != blas.Trans && trans != blas.ConjTrans {
+               panic(badTranspose)
+       }
+       if diag != blas.Unit && diag != blas.NonUnit {
+               panic(badDiag)
+       }
+       checkZVector('x', n, x, incX)
+       if len(ap) < n*(n+1)/2 {
+               panic("blas: insufficient A packed matrix slice length")
+       }
+
+       if n == 0 {
+               return
+       }
+
+       // Set up start index in X.
+       var kx int
+       if incX < 0 {
+               kx = (1 - n) * incX
+       }
+
+       // The elements of A are accessed sequentially with one pass through A.
+
+       if trans == blas.NoTrans {
+               // Form x = A*x.
+               if uplo == blas.Upper {
+                       // kk points to the current diagonal element in ap.
+                       kk := 0
+                       if incX == 1 {
+                               x = x[:n]
+                               for i := range x {
+                                       if diag == blas.NonUnit {
+                                               x[i] *= ap[kk]
+                                       }
+                                       if n-i-1 > 0 {
+                                               x[i] += c128.DotuUnitary(ap[kk+1:kk+n-i], x[i+1:])
+                                       }
+                                       kk += n - i
+                               }
+                       } else {
+                               ix := kx
+                               for i := 0; i < n; i++ {
+                                       if diag == blas.NonUnit {
+                                               x[ix] *= ap[kk]
+                                       }
+                                       if n-i-1 > 0 {
+                                               x[ix] += c128.DotuInc(ap[kk+1:kk+n-i], x, uintptr(n-i-1), 1, uintptr(incX), 0, uintptr(ix+incX))
+                                       }
+                                       ix += incX
+                                       kk += n - i
+                               }
+                       }
+               } else {
+                       // kk points to the beginning of current row in ap.
+                       kk := n*(n+1)/2 - n
+                       if incX == 1 {
+                               for i := n - 1; i >= 0; i-- {
+                                       if diag == blas.NonUnit {
+                                               x[i] *= ap[kk+i]
+                                       }
+                                       if i > 0 {
+                                               x[i] += c128.DotuUnitary(ap[kk:kk+i], x[:i])
+                                       }
+                                       kk -= i
+                               }
+                       } else {
+                               ix := kx + (n-1)*incX
+                               for i := n - 1; i >= 0; i-- {
+                                       if diag == blas.NonUnit {
+                                               x[ix] *= ap[kk+i]
+                                       }
+                                       if i > 0 {
+                                               x[ix] += c128.DotuInc(ap[kk:kk+i], x, uintptr(i), 1, uintptr(incX), 0, uintptr(kx))
+                                       }
+                                       ix -= incX
+                                       kk -= i
+                               }
+                       }
+               }
+               return
+       }
+
+       if trans == blas.Trans {
+               // Form x = A^T*x.
+               if uplo == blas.Upper {
+                       // kk points to the current diagonal element in ap.
+                       kk := n*(n+1)/2 - 1
+                       if incX == 1 {
+                               for i := n - 1; i >= 0; i-- {
+                                       xi := x[i]
+                                       if diag == blas.NonUnit {
+                                               x[i] *= ap[kk]
+                                       }
+                                       if n-i-1 > 0 {
+                                               c128.AxpyUnitary(xi, ap[kk+1:kk+n-i], x[i+1:n])
+                                       }
+                                       kk -= n - i + 1
+                               }
+                       } else {
+                               ix := kx + (n-1)*incX
+                               for i := n - 1; i >= 0; i-- {
+                                       xi := x[ix]
+                                       if diag == blas.NonUnit {
+                                               x[ix] *= ap[kk]
+                                       }
+                                       if n-i-1 > 0 {
+                                               c128.AxpyInc(xi, ap[kk+1:kk+n-i], x, uintptr(n-i-1), 1, uintptr(incX), 0, uintptr(ix+incX))
+                                       }
+                                       ix -= incX
+                                       kk -= n - i + 1
+                               }
+                       }
+               } else {
+                       // kk points to the beginning of current row in ap.
+                       kk := 0
+                       if incX == 1 {
+                               x = x[:n]
+                               for i := range x {
+                                       if i > 0 {
+                                               c128.AxpyUnitary(x[i], ap[kk:kk+i], x[:i])
+                                       }
+                                       if diag == blas.NonUnit {
+                                               x[i] *= ap[kk+i]
+                                       }
+                                       kk += i + 1
+                               }
+                       } else {
+                               ix := kx
+                               for i := 0; i < n; i++ {
+                                       if i > 0 {
+                                               c128.AxpyInc(x[ix], ap[kk:kk+i], x, uintptr(i), 1, uintptr(incX), 0, uintptr(kx))
+                                       }
+                                       if diag == blas.NonUnit {
+                                               x[ix] *= ap[kk+i]
+                                       }
+                                       ix += incX
+                                       kk += i + 1
+                               }
+                       }
+               }
+               return
+       }
+
+       // Form x = A^H*x.
+       if uplo == blas.Upper {
+               // kk points to the current diagonal element in ap.
+               kk := n*(n+1)/2 - 1
+               if incX == 1 {
+                       for i := n - 1; i >= 0; i-- {
+                               xi := x[i]
+                               if diag == blas.NonUnit {
+                                       x[i] *= cmplx.Conj(ap[kk])
+                               }
+                               k := kk + 1
+                               for j := i + 1; j < n; j++ {
+                                       x[j] += xi * cmplx.Conj(ap[k])
+                                       k++
+                               }
+                               kk -= n - i + 1
+                       }
+               } else {
+                       ix := kx + (n-1)*incX
+                       for i := n - 1; i >= 0; i-- {
+                               xi := x[ix]
+                               if diag == blas.NonUnit {
+                                       x[ix] *= cmplx.Conj(ap[kk])
+                               }
+                               jx := ix + incX
+                               k := kk + 1
+                               for j := i + 1; j < n; j++ {
+                                       x[jx] += xi * cmplx.Conj(ap[k])
+                                       jx += incX
+                                       k++
+                               }
+                               ix -= incX
+                               kk -= n - i + 1
+                       }
+               }
+       } else {
+               // kk points to the beginning of current row in ap.
+               kk := 0
+               if incX == 1 {
+                       x = x[:n]
+                       for i, xi := range x {
+                               for j := 0; j < i; j++ {
+                                       x[j] += xi * cmplx.Conj(ap[kk+j])
+                               }
+                               if diag == blas.NonUnit {
+                                       x[i] *= cmplx.Conj(ap[kk+i])
+                               }
+                               kk += i + 1
+                       }
+               } else {
+                       ix := kx
+                       for i := 0; i < n; i++ {
+                               xi := x[ix]
+                               jx := kx
+                               for j := 0; j < i; j++ {
+                                       x[jx] += xi * cmplx.Conj(ap[kk+j])
+                                       jx += incX
+                               }
+                               if diag == blas.NonUnit {
+                                       x[ix] *= cmplx.Conj(ap[kk+i])
+                               }
+                               ix += incX
+                               kk += i + 1
+                       }
+               }
+       }
+}
+
+// Ztrmv performs one of the matrix-vector operations
+//  x = A * x    if trans = blas.NoTrans
+//  x = A^T * x  if trans = blas.Trans
+//  x = A^H * x  if trans = blas.ConjTrans
+// where x is a vector, and A is an n×n triangular matrix.
+func (Implementation) Ztrmv(uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, n int, a []complex128, lda int, x []complex128, incX int) {
+       if uplo != blas.Upper && uplo != blas.Lower {
+               panic(badUplo)
+       }
+       if trans != blas.NoTrans && trans != blas.Trans && trans != blas.ConjTrans {
+               panic(badTranspose)
+       }
+       if diag != blas.Unit && diag != blas.NonUnit {
+               panic(badDiag)
+       }
+       checkZMatrix('A', n, n, a, lda)
+       checkZVector('x', n, x, incX)
+
+       if n == 0 {
+               return
+       }
+
+       // Set up start index in X.
+       var kx int
+       if incX < 0 {
+               kx = (1 - n) * incX
+       }
+
+       // The elements of A are accessed sequentially with one pass through A.
+
+       if trans == blas.NoTrans {
+               // Form x = A*x.
+               if uplo == blas.Upper {
+                       if incX == 1 {
+                               for i := 0; i < n; i++ {
+                                       if diag == blas.NonUnit {
+                                               x[i] *= a[i*lda+i]
+                                       }
+                                       if n-i-1 > 0 {
+                                               x[i] += c128.DotuUnitary(a[i*lda+i+1:i*lda+n], x[i+1:n])
+                                       }
+                               }
+                       } else {
+                               ix := kx
+                               for i := 0; i < n; i++ {
+                                       if diag == blas.NonUnit {
+                                               x[ix] *= a[i*lda+i]
+                                       }
+                                       if n-i-1 > 0 {
+                                               x[ix] += c128.DotuInc(a[i*lda+i+1:i*lda+n], x, uintptr(n-i-1), 1, uintptr(incX), 0, uintptr(ix+incX))
+                                       }
+                                       ix += incX
+                               }
+                       }
+               } else {
+                       if incX == 1 {
+                               for i := n - 1; i >= 0; i-- {
+                                       if diag == blas.NonUnit {
+                                               x[i] *= a[i*lda+i]
+                                       }
+                                       if i > 0 {
+                                               x[i] += c128.DotuUnitary(a[i*lda:i*lda+i], x[:i])
+                                       }
+                               }
+                       } else {
+                               ix := kx + (n-1)*incX
+                               for i := n - 1; i >= 0; i-- {
+                                       if diag == blas.NonUnit {
+                                               x[ix] *= a[i*lda+i]
+                                       }
+                                       if i > 0 {
+                                               x[ix] += c128.DotuInc(a[i*lda:i*lda+i], x, uintptr(i), 1, uintptr(incX), 0, uintptr(kx))
+                                       }
+                                       ix -= incX
+                               }
+                       }
+               }
+               return
+       }
+
+       if trans == blas.Trans {
+               // Form x = A^T*x.
+               if uplo == blas.Upper {
+                       if incX == 1 {
+                               for i := n - 1; i >= 0; i-- {
+                                       xi := x[i]
+                                       if diag == blas.NonUnit {
+                                               x[i] *= a[i*lda+i]
+                                       }
+                                       if n-i-1 > 0 {
+                                               c128.AxpyUnitary(xi, a[i*lda+i+1:i*lda+n], x[i+1:n])
+                                       }
+                               }
+                       } else {
+                               ix := kx + (n-1)*incX
+                               for i := n - 1; i >= 0; i-- {
+                                       xi := x[ix]
+                                       if diag == blas.NonUnit {
+                                               x[ix] *= a[i*lda+i]
+                                       }
+                                       if n-i-1 > 0 {
+                                               c128.AxpyInc(xi, a[i*lda+i+1:i*lda+n], x, uintptr(n-i-1), 1, uintptr(incX), 0, uintptr(ix+incX))
+                                       }
+                                       ix -= incX
+                               }
+                       }
+               } else {
+                       if incX == 1 {
+                               for i := 0; i < n; i++ {
+                                       if i > 0 {
+                                               c128.AxpyUnitary(x[i], a[i*lda:i*lda+i], x[:i])
+                                       }
+                                       if diag == blas.NonUnit {
+                                               x[i] *= a[i*lda+i]
+                                       }
+                               }
+                       } else {
+                               ix := kx
+                               for i := 0; i < n; i++ {
+                                       if i > 0 {
+                                               c128.AxpyInc(x[ix], a[i*lda:i*lda+i], x, uintptr(i), 1, uintptr(incX), 0, uintptr(kx))
+                                       }
+                                       if diag == blas.NonUnit {
+                                               x[ix] *= a[i*lda+i]
+                                       }
+                                       ix += incX
+                               }
+                       }
+               }
+               return
+       }
+
+       // Form x = A^H*x.
+       if uplo == blas.Upper {
+               if incX == 1 {
+                       for i := n - 1; i >= 0; i-- {
+                               xi := x[i]
+                               if diag == blas.NonUnit {
+                                       x[i] *= cmplx.Conj(a[i*lda+i])
+                               }
+                               for j := i + 1; j < n; j++ {
+                                       x[j] += xi * cmplx.Conj(a[i*lda+j])
+                               }
+                       }
+               } else {
+                       ix := kx + (n-1)*incX
+                       for i := n - 1; i >= 0; i-- {
+                               xi := x[ix]
+                               if diag == blas.NonUnit {
+                                       x[ix] *= cmplx.Conj(a[i*lda+i])
+                               }
+                               jx := ix + incX
+                               for j := i + 1; j < n; j++ {
+                                       x[jx] += xi * cmplx.Conj(a[i*lda+j])
+                                       jx += incX
+                               }
+                               ix -= incX
+                       }
+               }
+       } else {
+               if incX == 1 {
+                       for i := 0; i < n; i++ {
+                               for j := 0; j < i; j++ {
+                                       x[j] += x[i] * cmplx.Conj(a[i*lda+j])
+                               }
+                               if diag == blas.NonUnit {
+                                       x[i] *= cmplx.Conj(a[i*lda+i])
+                               }
+                       }
+               } else {
+                       ix := kx
+                       for i := 0; i < n; i++ {
+                               jx := kx
+                               for j := 0; j < i; j++ {
+                                       x[jx] += x[ix] * cmplx.Conj(a[i*lda+j])
+                                       jx += incX
+                               }
+                               if diag == blas.NonUnit {
+                                       x[ix] *= cmplx.Conj(a[i*lda+i])
+                               }
+                               ix += incX
+                       }
+               }
+       }
+}
+
+// Ztrsv solves one of the systems of equations
+//  A*x = b     if trans == blas.NoTrans,
+//  A^T*x = b,  if trans == blas.Trans,
+//  A^H*x = b,  if trans == blas.ConjTrans,
+// where b and x are n element vectors and A is an n×n triangular matrix.
+//
+// On entry, x contains the values of b, and the solution is
+// stored in-place into x.
+//
+// No test for singularity or near-singularity is included in this
+// routine. Such tests must be performed before calling this routine.
+func (Implementation) Ztrsv(uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, n int, a []complex128, lda int, x []complex128, incX int) {
+       if uplo != blas.Upper && uplo != blas.Lower {
+               panic(badUplo)
+       }
+       if trans != blas.NoTrans && trans != blas.Trans && trans != blas.ConjTrans {
+               panic(badTranspose)
+       }
+       if diag != blas.Unit && diag != blas.NonUnit {
+               panic(badDiag)
+       }
+       checkZMatrix('A', n, n, a, lda)
+       checkZVector('x', n, x, incX)
+
+       if n == 0 {
+               return
+       }
+
+       // Set up start index in X.
+       var kx int
+       if incX < 0 {
+               kx = (1 - n) * incX
+       }
+
+       // The elements of A are accessed sequentially with one pass through A.
+
+       if trans == blas.NoTrans {
+               // Form x = inv(A)*x.
+               if uplo == blas.Upper {
+                       if incX == 1 {
+                               for i := n - 1; i >= 0; i-- {
+                                       aii := a[i*lda+i]
+                                       if n-i-1 > 0 {
+                                               x[i] -= c128.DotuUnitary(x[i+1:n], a[i*lda+i+1:i*lda+n])
+                                       }
+                                       if diag == blas.NonUnit {
+                                               x[i] /= aii
+                                       }
+                               }
+                       } else {
+                               ix := kx + (n-1)*incX
+                               for i := n - 1; i >= 0; i-- {
+                                       aii := a[i*lda+i]
+                                       if n-i-1 > 0 {
+                                               x[ix] -= c128.DotuInc(x, a[i*lda+i+1:i*lda+n], uintptr(n-i-1), uintptr(incX), 1, uintptr(ix+incX), 0)
+                                       }
+                                       if diag == blas.NonUnit {
+                                               x[ix] /= aii
+                                       }
+                                       ix -= incX
+                               }
+                       }
+               } else {
+                       if incX == 1 {
+                               for i := 0; i < n; i++ {
+                                       if i > 0 {
+                                               x[i] -= c128.DotuUnitary(x[:i], a[i*lda:i*lda+i])
+                                       }
+                                       if diag == blas.NonUnit {
+                                               x[i] /= a[i*lda+i]
+                                       }
+                               }
+                       } else {
+                               ix := kx
+                               for i := 0; i < n; i++ {
+                                       if i > 0 {
+                                               x[ix] -= c128.DotuInc(x, a[i*lda:i*lda+i], uintptr(i), uintptr(incX), 1, uintptr(kx), 0)
+                                       }
+                                       if diag == blas.NonUnit {
+                                               x[ix] /= a[i*lda+i]
+                                       }
+                                       ix += incX
+                               }
+                       }
+               }
+               return
+       }
+
+       if trans == blas.Trans {
+               // Form x = inv(A^T)*x.
+               if uplo == blas.Upper {
+                       if incX == 1 {
+                               for j := 0; j < n; j++ {
+                                       if diag == blas.NonUnit {
+                                               x[j] /= a[j*lda+j]
+                                       }
+                                       if n-j-1 > 0 {
+                                               c128.AxpyUnitary(-x[j], a[j*lda+j+1:j*lda+n], x[j+1:n])
+                                       }
+                               }
+                       } else {
+                               jx := kx
+                               for j := 0; j < n; j++ {
+                                       if diag == blas.NonUnit {
+                                               x[jx] /= a[j*lda+j]
+                                       }
+                                       if n-j-1 > 0 {
+                                               c128.AxpyInc(-x[jx], a[j*lda+j+1:j*lda+n], x, uintptr(n-j-1), 1, uintptr(incX), 0, uintptr(jx+incX))
+                                       }
+                                       jx += incX
+                               }
+                       }
+               } else {
+                       if incX == 1 {
+                               for j := n - 1; j >= 0; j-- {
+                                       if diag == blas.NonUnit {
+                                               x[j] /= a[j*lda+j]
+                                       }
+                                       xj := x[j]
+                                       if j > 0 {
+                                               c128.AxpyUnitary(-xj, a[j*lda:j*lda+j], x[:j])
+                                       }
+                               }
+                       } else {
+                               jx := kx + (n-1)*incX
+                               for j := n - 1; j >= 0; j-- {
+                                       if diag == blas.NonUnit {
+                                               x[jx] /= a[j*lda+j]
+                                       }
+                                       if j > 0 {
+                                               c128.AxpyInc(-x[jx], a[j*lda:j*lda+j], x, uintptr(j), 1, uintptr(incX), 0, uintptr(kx))
+                                       }
+                                       jx -= incX
+                               }
+                       }
+               }
+               return
+       }
+
+       // Form x = inv(A^H)*x.
+       if uplo == blas.Upper {
+               if incX == 1 {
+                       for j := 0; j < n; j++ {
+                               if diag == blas.NonUnit {
+                                       x[j] /= cmplx.Conj(a[j*lda+j])
+                               }
+                               xj := x[j]
+                               for i := j + 1; i < n; i++ {
+                                       x[i] -= xj * cmplx.Conj(a[j*lda+i])
+                               }
+                       }
+               } else {
+                       jx := kx
+                       for j := 0; j < n; j++ {
+                               if diag == blas.NonUnit {
+                                       x[jx] /= cmplx.Conj(a[j*lda+j])
+                               }
+                               xj := x[jx]
+                               ix := jx + incX
+                               for i := j + 1; i < n; i++ {
+                                       x[ix] -= xj * cmplx.Conj(a[j*lda+i])
+                                       ix += incX
+                               }
+                               jx += incX
+                       }
+               }
+       } else {
+               if incX == 1 {
+                       for j := n - 1; j >= 0; j-- {
+                               if diag == blas.NonUnit {
+                                       x[j] /= cmplx.Conj(a[j*lda+j])
+                               }
+                               xj := x[j]
+                               for i := 0; i < j; i++ {
+                                       x[i] -= xj * cmplx.Conj(a[j*lda+i])
+                               }
+                       }
+               } else {
+                       jx := kx + (n-1)*incX
+                       for j := n - 1; j >= 0; j-- {
+                               if diag == blas.NonUnit {
+                                       x[jx] /= cmplx.Conj(a[j*lda+j])
+                               }
+                               xj := x[jx]
+                               ix := kx
+                               for i := 0; i < j; i++ {
+                                       x[ix] -= xj * cmplx.Conj(a[j*lda+i])
+                                       ix += incX
+                               }
+                               jx -= incX
+                       }
+               }
+       }
+}