OSDN Git Service

test (#52)
[bytom/vapor.git] / vendor / gonum.org / v1 / gonum / blas / gonum / level3double.go
1 // Copyright ©2014 The Gonum Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package gonum
6
7 import (
8         "gonum.org/v1/gonum/blas"
9         "gonum.org/v1/gonum/internal/asm/f64"
10 )
11
12 var _ blas.Float64Level3 = Implementation{}
13
14 // Dtrsm solves
15 //  A * X = alpha * B,   if tA == blas.NoTrans side == blas.Left,
16 //  A^T * X = alpha * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Left,
17 //  X * A = alpha * B,   if tA == blas.NoTrans side == blas.Right,
18 //  X * A^T = alpha * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Right,
19 // where A is an n×n or m×m triangular matrix, X is an m×n matrix, and alpha is a
20 // scalar.
21 //
22 // At entry to the function, X contains the values of B, and the result is
23 // stored in place into X.
24 //
25 // No check is made that A is invertible.
26 func (Implementation) Dtrsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int) {
27         if s != blas.Left && s != blas.Right {
28                 panic(badSide)
29         }
30         if ul != blas.Lower && ul != blas.Upper {
31                 panic(badUplo)
32         }
33         if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
34                 panic(badTranspose)
35         }
36         if d != blas.NonUnit && d != blas.Unit {
37                 panic(badDiag)
38         }
39         if m < 0 {
40                 panic(mLT0)
41         }
42         if n < 0 {
43                 panic(nLT0)
44         }
45         if ldb < n {
46                 panic(badLdB)
47         }
48         var k int
49         if s == blas.Left {
50                 k = m
51         } else {
52                 k = n
53         }
54         if lda*(k-1)+k > len(a) || lda < max(1, k) {
55                 panic(badLdA)
56         }
57         if ldb*(m-1)+n > len(b) || ldb < max(1, n) {
58                 panic(badLdB)
59         }
60
61         if m == 0 || n == 0 {
62                 return
63         }
64
65         if alpha == 0 {
66                 for i := 0; i < m; i++ {
67                         btmp := b[i*ldb : i*ldb+n]
68                         for j := range btmp {
69                                 btmp[j] = 0
70                         }
71                 }
72                 return
73         }
74         nonUnit := d == blas.NonUnit
75         if s == blas.Left {
76                 if tA == blas.NoTrans {
77                         if ul == blas.Upper {
78                                 for i := m - 1; i >= 0; i-- {
79                                         btmp := b[i*ldb : i*ldb+n]
80                                         if alpha != 1 {
81                                                 for j := range btmp {
82                                                         btmp[j] *= alpha
83                                                 }
84                                         }
85                                         for ka, va := range a[i*lda+i+1 : i*lda+m] {
86                                                 k := ka + i + 1
87                                                 if va != 0 {
88                                                         f64.AxpyUnitaryTo(btmp, -va, b[k*ldb:k*ldb+n], btmp)
89                                                 }
90                                         }
91                                         if nonUnit {
92                                                 tmp := 1 / a[i*lda+i]
93                                                 for j := 0; j < n; j++ {
94                                                         btmp[j] *= tmp
95                                                 }
96                                         }
97                                 }
98                                 return
99                         }
100                         for i := 0; i < m; i++ {
101                                 btmp := b[i*ldb : i*ldb+n]
102                                 if alpha != 1 {
103                                         for j := 0; j < n; j++ {
104                                                 btmp[j] *= alpha
105                                         }
106                                 }
107                                 for k, va := range a[i*lda : i*lda+i] {
108                                         if va != 0 {
109                                                 f64.AxpyUnitaryTo(btmp, -va, b[k*ldb:k*ldb+n], btmp)
110                                         }
111                                 }
112                                 if nonUnit {
113                                         tmp := 1 / a[i*lda+i]
114                                         for j := 0; j < n; j++ {
115                                                 btmp[j] *= tmp
116                                         }
117                                 }
118                         }
119                         return
120                 }
121                 // Cases where a is transposed
122                 if ul == blas.Upper {
123                         for k := 0; k < m; k++ {
124                                 btmpk := b[k*ldb : k*ldb+n]
125                                 if nonUnit {
126                                         tmp := 1 / a[k*lda+k]
127                                         for j := 0; j < n; j++ {
128                                                 btmpk[j] *= tmp
129                                         }
130                                 }
131                                 for ia, va := range a[k*lda+k+1 : k*lda+m] {
132                                         i := ia + k + 1
133                                         if va != 0 {
134                                                 btmp := b[i*ldb : i*ldb+n]
135                                                 f64.AxpyUnitaryTo(btmp, -va, btmpk, btmp)
136                                         }
137                                 }
138                                 if alpha != 1 {
139                                         for j := 0; j < n; j++ {
140                                                 btmpk[j] *= alpha
141                                         }
142                                 }
143                         }
144                         return
145                 }
146                 for k := m - 1; k >= 0; k-- {
147                         btmpk := b[k*ldb : k*ldb+n]
148                         if nonUnit {
149                                 tmp := 1 / a[k*lda+k]
150                                 for j := 0; j < n; j++ {
151                                         btmpk[j] *= tmp
152                                 }
153                         }
154                         for i, va := range a[k*lda : k*lda+k] {
155                                 if va != 0 {
156                                         btmp := b[i*ldb : i*ldb+n]
157                                         f64.AxpyUnitaryTo(btmp, -va, btmpk, btmp)
158                                 }
159                         }
160                         if alpha != 1 {
161                                 for j := 0; j < n; j++ {
162                                         btmpk[j] *= alpha
163                                 }
164                         }
165                 }
166                 return
167         }
168         // Cases where a is to the right of X.
169         if tA == blas.NoTrans {
170                 if ul == blas.Upper {
171                         for i := 0; i < m; i++ {
172                                 btmp := b[i*ldb : i*ldb+n]
173                                 if alpha != 1 {
174                                         for j := 0; j < n; j++ {
175                                                 btmp[j] *= alpha
176                                         }
177                                 }
178                                 for k, vb := range btmp {
179                                         if vb != 0 {
180                                                 if btmp[k] != 0 {
181                                                         if nonUnit {
182                                                                 btmp[k] /= a[k*lda+k]
183                                                         }
184                                                         btmpk := btmp[k+1 : n]
185                                                         f64.AxpyUnitaryTo(btmpk, -btmp[k], a[k*lda+k+1:k*lda+n], btmpk)
186                                                 }
187                                         }
188                                 }
189                         }
190                         return
191                 }
192                 for i := 0; i < m; i++ {
193                         btmp := b[i*lda : i*lda+n]
194                         if alpha != 1 {
195                                 for j := 0; j < n; j++ {
196                                         btmp[j] *= alpha
197                                 }
198                         }
199                         for k := n - 1; k >= 0; k-- {
200                                 if btmp[k] != 0 {
201                                         if nonUnit {
202                                                 btmp[k] /= a[k*lda+k]
203                                         }
204                                         f64.AxpyUnitaryTo(btmp, -btmp[k], a[k*lda:k*lda+k], btmp)
205                                 }
206                         }
207                 }
208                 return
209         }
210         // Cases where a is transposed.
211         if ul == blas.Upper {
212                 for i := 0; i < m; i++ {
213                         btmp := b[i*lda : i*lda+n]
214                         for j := n - 1; j >= 0; j-- {
215                                 tmp := alpha*btmp[j] - f64.DotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:])
216                                 if nonUnit {
217                                         tmp /= a[j*lda+j]
218                                 }
219                                 btmp[j] = tmp
220                         }
221                 }
222                 return
223         }
224         for i := 0; i < m; i++ {
225                 btmp := b[i*lda : i*lda+n]
226                 for j := 0; j < n; j++ {
227                         tmp := alpha*btmp[j] - f64.DotUnitary(a[j*lda:j*lda+j], btmp)
228                         if nonUnit {
229                                 tmp /= a[j*lda+j]
230                         }
231                         btmp[j] = tmp
232                 }
233         }
234 }
235
236 // Dsymm performs one of
237 //  C = alpha * A * B + beta * C, if side == blas.Left,
238 //  C = alpha * B * A + beta * C, if side == blas.Right,
239 // where A is an n×n or m×m symmetric matrix, B and C are m×n matrices, and alpha
240 // is a scalar.
241 func (Implementation) Dsymm(s blas.Side, ul blas.Uplo, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int) {
242         if s != blas.Right && s != blas.Left {
243                 panic("goblas: bad side")
244         }
245         if ul != blas.Lower && ul != blas.Upper {
246                 panic(badUplo)
247         }
248         if m < 0 {
249                 panic(mLT0)
250         }
251         if n < 0 {
252                 panic(nLT0)
253         }
254         var k int
255         if s == blas.Left {
256                 k = m
257         } else {
258                 k = n
259         }
260         if lda*(k-1)+k > len(a) || lda < max(1, k) {
261                 panic(badLdA)
262         }
263         if ldb*(m-1)+n > len(b) || ldb < max(1, n) {
264                 panic(badLdB)
265         }
266         if ldc*(m-1)+n > len(c) || ldc < max(1, n) {
267                 panic(badLdC)
268         }
269         if m == 0 || n == 0 {
270                 return
271         }
272         if alpha == 0 && beta == 1 {
273                 return
274         }
275         if alpha == 0 {
276                 if beta == 0 {
277                         for i := 0; i < m; i++ {
278                                 ctmp := c[i*ldc : i*ldc+n]
279                                 for j := range ctmp {
280                                         ctmp[j] = 0
281                                 }
282                         }
283                         return
284                 }
285                 for i := 0; i < m; i++ {
286                         ctmp := c[i*ldc : i*ldc+n]
287                         for j := 0; j < n; j++ {
288                                 ctmp[j] *= beta
289                         }
290                 }
291                 return
292         }
293
294         isUpper := ul == blas.Upper
295         if s == blas.Left {
296                 for i := 0; i < m; i++ {
297                         atmp := alpha * a[i*lda+i]
298                         btmp := b[i*ldb : i*ldb+n]
299                         ctmp := c[i*ldc : i*ldc+n]
300                         for j, v := range btmp {
301                                 ctmp[j] *= beta
302                                 ctmp[j] += atmp * v
303                         }
304
305                         for k := 0; k < i; k++ {
306                                 var atmp float64
307                                 if isUpper {
308                                         atmp = a[k*lda+i]
309                                 } else {
310                                         atmp = a[i*lda+k]
311                                 }
312                                 atmp *= alpha
313                                 ctmp := c[i*ldc : i*ldc+n]
314                                 f64.AxpyUnitaryTo(ctmp, atmp, b[k*ldb:k*ldb+n], ctmp)
315                         }
316                         for k := i + 1; k < m; k++ {
317                                 var atmp float64
318                                 if isUpper {
319                                         atmp = a[i*lda+k]
320                                 } else {
321                                         atmp = a[k*lda+i]
322                                 }
323                                 atmp *= alpha
324                                 ctmp := c[i*ldc : i*ldc+n]
325                                 f64.AxpyUnitaryTo(ctmp, atmp, b[k*ldb:k*ldb+n], ctmp)
326                         }
327                 }
328                 return
329         }
330         if isUpper {
331                 for i := 0; i < m; i++ {
332                         for j := n - 1; j >= 0; j-- {
333                                 tmp := alpha * b[i*ldb+j]
334                                 var tmp2 float64
335                                 atmp := a[j*lda+j+1 : j*lda+n]
336                                 btmp := b[i*ldb+j+1 : i*ldb+n]
337                                 ctmp := c[i*ldc+j+1 : i*ldc+n]
338                                 for k, v := range atmp {
339                                         ctmp[k] += tmp * v
340                                         tmp2 += btmp[k] * v
341                                 }
342                                 c[i*ldc+j] *= beta
343                                 c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2
344                         }
345                 }
346                 return
347         }
348         for i := 0; i < m; i++ {
349                 for j := 0; j < n; j++ {
350                         tmp := alpha * b[i*ldb+j]
351                         var tmp2 float64
352                         atmp := a[j*lda : j*lda+j]
353                         btmp := b[i*ldb : i*ldb+j]
354                         ctmp := c[i*ldc : i*ldc+j]
355                         for k, v := range atmp {
356                                 ctmp[k] += tmp * v
357                                 tmp2 += btmp[k] * v
358                         }
359                         c[i*ldc+j] *= beta
360                         c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2
361                 }
362         }
363 }
364
365 // Dsyrk performs the symmetric rank-k operation
366 //  C = alpha * A * A^T + beta*C
367 // C is an n×n symmetric matrix. A is an n×k matrix if tA == blas.NoTrans, and
368 // a k×n matrix otherwise. alpha and beta are scalars.
369 func (Implementation) Dsyrk(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float64, a []float64, lda int, beta float64, c []float64, ldc int) {
370         if ul != blas.Lower && ul != blas.Upper {
371                 panic(badUplo)
372         }
373         if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans {
374                 panic(badTranspose)
375         }
376         if n < 0 {
377                 panic(nLT0)
378         }
379         if k < 0 {
380                 panic(kLT0)
381         }
382         if ldc < n {
383                 panic(badLdC)
384         }
385         var row, col int
386         if tA == blas.NoTrans {
387                 row, col = n, k
388         } else {
389                 row, col = k, n
390         }
391         if lda*(row-1)+col > len(a) || lda < max(1, col) {
392                 panic(badLdA)
393         }
394         if ldc*(n-1)+n > len(c) || ldc < max(1, n) {
395                 panic(badLdC)
396         }
397         if alpha == 0 {
398                 if beta == 0 {
399                         if ul == blas.Upper {
400                                 for i := 0; i < n; i++ {
401                                         ctmp := c[i*ldc+i : i*ldc+n]
402                                         for j := range ctmp {
403                                                 ctmp[j] = 0
404                                         }
405                                 }
406                                 return
407                         }
408                         for i := 0; i < n; i++ {
409                                 ctmp := c[i*ldc : i*ldc+i+1]
410                                 for j := range ctmp {
411                                         ctmp[j] = 0
412                                 }
413                         }
414                         return
415                 }
416                 if ul == blas.Upper {
417                         for i := 0; i < n; i++ {
418                                 ctmp := c[i*ldc+i : i*ldc+n]
419                                 for j := range ctmp {
420                                         ctmp[j] *= beta
421                                 }
422                         }
423                         return
424                 }
425                 for i := 0; i < n; i++ {
426                         ctmp := c[i*ldc : i*ldc+i+1]
427                         for j := range ctmp {
428                                 ctmp[j] *= beta
429                         }
430                 }
431                 return
432         }
433         if tA == blas.NoTrans {
434                 if ul == blas.Upper {
435                         for i := 0; i < n; i++ {
436                                 ctmp := c[i*ldc+i : i*ldc+n]
437                                 atmp := a[i*lda : i*lda+k]
438                                 for jc, vc := range ctmp {
439                                         j := jc + i
440                                         ctmp[jc] = vc*beta + alpha*f64.DotUnitary(atmp, a[j*lda:j*lda+k])
441                                 }
442                         }
443                         return
444                 }
445                 for i := 0; i < n; i++ {
446                         atmp := a[i*lda : i*lda+k]
447                         for j, vc := range c[i*ldc : i*ldc+i+1] {
448                                 c[i*ldc+j] = vc*beta + alpha*f64.DotUnitary(a[j*lda:j*lda+k], atmp)
449                         }
450                 }
451                 return
452         }
453         // Cases where a is transposed.
454         if ul == blas.Upper {
455                 for i := 0; i < n; i++ {
456                         ctmp := c[i*ldc+i : i*ldc+n]
457                         if beta != 1 {
458                                 for j := range ctmp {
459                                         ctmp[j] *= beta
460                                 }
461                         }
462                         for l := 0; l < k; l++ {
463                                 tmp := alpha * a[l*lda+i]
464                                 if tmp != 0 {
465                                         f64.AxpyUnitaryTo(ctmp, tmp, a[l*lda+i:l*lda+n], ctmp)
466                                 }
467                         }
468                 }
469                 return
470         }
471         for i := 0; i < n; i++ {
472                 ctmp := c[i*ldc : i*ldc+i+1]
473                 if beta != 0 {
474                         for j := range ctmp {
475                                 ctmp[j] *= beta
476                         }
477                 }
478                 for l := 0; l < k; l++ {
479                         tmp := alpha * a[l*lda+i]
480                         if tmp != 0 {
481                                 f64.AxpyUnitaryTo(ctmp, tmp, a[l*lda:l*lda+i+1], ctmp)
482                         }
483                 }
484         }
485 }
486
487 // Dsyr2k performs the symmetric rank 2k operation
488 //  C = alpha * A * B^T + alpha * B * A^T + beta * C
489 // where C is an n×n symmetric matrix. A and B are n×k matrices if
490 // tA == NoTrans and k×n otherwise. alpha and beta are scalars.
491 func (Implementation) Dsyr2k(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int) {
492         if ul != blas.Lower && ul != blas.Upper {
493                 panic(badUplo)
494         }
495         if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans {
496                 panic(badTranspose)
497         }
498         if n < 0 {
499                 panic(nLT0)
500         }
501         if k < 0 {
502                 panic(kLT0)
503         }
504         if ldc < n {
505                 panic(badLdC)
506         }
507         var row, col int
508         if tA == blas.NoTrans {
509                 row, col = n, k
510         } else {
511                 row, col = k, n
512         }
513         if lda*(row-1)+col > len(a) || lda < max(1, col) {
514                 panic(badLdA)
515         }
516         if ldb*(row-1)+col > len(b) || ldb < max(1, col) {
517                 panic(badLdB)
518         }
519         if ldc*(n-1)+n > len(c) || ldc < max(1, n) {
520                 panic(badLdC)
521         }
522         if alpha == 0 {
523                 if beta == 0 {
524                         if ul == blas.Upper {
525                                 for i := 0; i < n; i++ {
526                                         ctmp := c[i*ldc+i : i*ldc+n]
527                                         for j := range ctmp {
528                                                 ctmp[j] = 0
529                                         }
530                                 }
531                                 return
532                         }
533                         for i := 0; i < n; i++ {
534                                 ctmp := c[i*ldc : i*ldc+i+1]
535                                 for j := range ctmp {
536                                         ctmp[j] = 0
537                                 }
538                         }
539                         return
540                 }
541                 if ul == blas.Upper {
542                         for i := 0; i < n; i++ {
543                                 ctmp := c[i*ldc+i : i*ldc+n]
544                                 for j := range ctmp {
545                                         ctmp[j] *= beta
546                                 }
547                         }
548                         return
549                 }
550                 for i := 0; i < n; i++ {
551                         ctmp := c[i*ldc : i*ldc+i+1]
552                         for j := range ctmp {
553                                 ctmp[j] *= beta
554                         }
555                 }
556                 return
557         }
558         if tA == blas.NoTrans {
559                 if ul == blas.Upper {
560                         for i := 0; i < n; i++ {
561                                 atmp := a[i*lda : i*lda+k]
562                                 btmp := b[i*ldb : i*ldb+k]
563                                 ctmp := c[i*ldc+i : i*ldc+n]
564                                 for jc := range ctmp {
565                                         j := i + jc
566                                         var tmp1, tmp2 float64
567                                         binner := b[j*ldb : j*ldb+k]
568                                         for l, v := range a[j*lda : j*lda+k] {
569                                                 tmp1 += v * btmp[l]
570                                                 tmp2 += atmp[l] * binner[l]
571                                         }
572                                         ctmp[jc] *= beta
573                                         ctmp[jc] += alpha * (tmp1 + tmp2)
574                                 }
575                         }
576                         return
577                 }
578                 for i := 0; i < n; i++ {
579                         atmp := a[i*lda : i*lda+k]
580                         btmp := b[i*ldb : i*ldb+k]
581                         ctmp := c[i*ldc : i*ldc+i+1]
582                         for j := 0; j <= i; j++ {
583                                 var tmp1, tmp2 float64
584                                 binner := b[j*ldb : j*ldb+k]
585                                 for l, v := range a[j*lda : j*lda+k] {
586                                         tmp1 += v * btmp[l]
587                                         tmp2 += atmp[l] * binner[l]
588                                 }
589                                 ctmp[j] *= beta
590                                 ctmp[j] += alpha * (tmp1 + tmp2)
591                         }
592                 }
593                 return
594         }
595         if ul == blas.Upper {
596                 for i := 0; i < n; i++ {
597                         ctmp := c[i*ldc+i : i*ldc+n]
598                         if beta != 1 {
599                                 for j := range ctmp {
600                                         ctmp[j] *= beta
601                                 }
602                         }
603                         for l := 0; l < k; l++ {
604                                 tmp1 := alpha * b[l*lda+i]
605                                 tmp2 := alpha * a[l*lda+i]
606                                 btmp := b[l*ldb+i : l*ldb+n]
607                                 if tmp1 != 0 || tmp2 != 0 {
608                                         for j, v := range a[l*lda+i : l*lda+n] {
609                                                 ctmp[j] += v*tmp1 + btmp[j]*tmp2
610                                         }
611                                 }
612                         }
613                 }
614                 return
615         }
616         for i := 0; i < n; i++ {
617                 ctmp := c[i*ldc : i*ldc+i+1]
618                 if beta != 1 {
619                         for j := range ctmp {
620                                 ctmp[j] *= beta
621                         }
622                 }
623                 for l := 0; l < k; l++ {
624                         tmp1 := alpha * b[l*lda+i]
625                         tmp2 := alpha * a[l*lda+i]
626                         btmp := b[l*ldb : l*ldb+i+1]
627                         if tmp1 != 0 || tmp2 != 0 {
628                                 for j, v := range a[l*lda : l*lda+i+1] {
629                                         ctmp[j] += v*tmp1 + btmp[j]*tmp2
630                                 }
631                         }
632                 }
633         }
634 }
635
636 // Dtrmm performs
637 //  B = alpha * A * B,   if tA == blas.NoTrans and side == blas.Left,
638 //  B = alpha * A^T * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Left,
639 //  B = alpha * B * A,   if tA == blas.NoTrans and side == blas.Right,
640 //  B = alpha * B * A^T, if tA == blas.Trans or blas.ConjTrans, and side == blas.Right,
641 // where A is an n×n or m×m triangular matrix, and B is an m×n matrix.
642 func (Implementation) Dtrmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int) {
643         if s != blas.Left && s != blas.Right {
644                 panic(badSide)
645         }
646         if ul != blas.Lower && ul != blas.Upper {
647                 panic(badUplo)
648         }
649         if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
650                 panic(badTranspose)
651         }
652         if d != blas.NonUnit && d != blas.Unit {
653                 panic(badDiag)
654         }
655         if m < 0 {
656                 panic(mLT0)
657         }
658         if n < 0 {
659                 panic(nLT0)
660         }
661         var k int
662         if s == blas.Left {
663                 k = m
664         } else {
665                 k = n
666         }
667         if lda*(k-1)+k > len(a) || lda < max(1, k) {
668                 panic(badLdA)
669         }
670         if ldb*(m-1)+n > len(b) || ldb < max(1, n) {
671                 panic(badLdB)
672         }
673         if alpha == 0 {
674                 for i := 0; i < m; i++ {
675                         btmp := b[i*ldb : i*ldb+n]
676                         for j := range btmp {
677                                 btmp[j] = 0
678                         }
679                 }
680                 return
681         }
682
683         nonUnit := d == blas.NonUnit
684         if s == blas.Left {
685                 if tA == blas.NoTrans {
686                         if ul == blas.Upper {
687                                 for i := 0; i < m; i++ {
688                                         tmp := alpha
689                                         if nonUnit {
690                                                 tmp *= a[i*lda+i]
691                                         }
692                                         btmp := b[i*ldb : i*ldb+n]
693                                         for j := range btmp {
694                                                 btmp[j] *= tmp
695                                         }
696                                         for ka, va := range a[i*lda+i+1 : i*lda+m] {
697                                                 k := ka + i + 1
698                                                 tmp := alpha * va
699                                                 if tmp != 0 {
700                                                         f64.AxpyUnitaryTo(btmp, tmp, b[k*ldb:k*ldb+n], btmp)
701                                                 }
702                                         }
703                                 }
704                                 return
705                         }
706                         for i := m - 1; i >= 0; i-- {
707                                 tmp := alpha
708                                 if nonUnit {
709                                         tmp *= a[i*lda+i]
710                                 }
711                                 btmp := b[i*ldb : i*ldb+n]
712                                 for j := range btmp {
713                                         btmp[j] *= tmp
714                                 }
715                                 for k, va := range a[i*lda : i*lda+i] {
716                                         tmp := alpha * va
717                                         if tmp != 0 {
718                                                 f64.AxpyUnitaryTo(btmp, tmp, b[k*ldb:k*ldb+n], btmp)
719                                         }
720                                 }
721                         }
722                         return
723                 }
724                 // Cases where a is transposed.
725                 if ul == blas.Upper {
726                         for k := m - 1; k >= 0; k-- {
727                                 btmpk := b[k*ldb : k*ldb+n]
728                                 for ia, va := range a[k*lda+k+1 : k*lda+m] {
729                                         i := ia + k + 1
730                                         btmp := b[i*ldb : i*ldb+n]
731                                         tmp := alpha * va
732                                         if tmp != 0 {
733                                                 f64.AxpyUnitaryTo(btmp, tmp, btmpk, btmp)
734                                         }
735                                 }
736                                 tmp := alpha
737                                 if nonUnit {
738                                         tmp *= a[k*lda+k]
739                                 }
740                                 if tmp != 1 {
741                                         for j := 0; j < n; j++ {
742                                                 btmpk[j] *= tmp
743                                         }
744                                 }
745                         }
746                         return
747                 }
748                 for k := 0; k < m; k++ {
749                         btmpk := b[k*ldb : k*ldb+n]
750                         for i, va := range a[k*lda : k*lda+k] {
751                                 btmp := b[i*ldb : i*ldb+n]
752                                 tmp := alpha * va
753                                 if tmp != 0 {
754                                         f64.AxpyUnitaryTo(btmp, tmp, btmpk, btmp)
755                                 }
756                         }
757                         tmp := alpha
758                         if nonUnit {
759                                 tmp *= a[k*lda+k]
760                         }
761                         if tmp != 1 {
762                                 for j := 0; j < n; j++ {
763                                         btmpk[j] *= tmp
764                                 }
765                         }
766                 }
767                 return
768         }
769         // Cases where a is on the right
770         if tA == blas.NoTrans {
771                 if ul == blas.Upper {
772                         for i := 0; i < m; i++ {
773                                 btmp := b[i*ldb : i*ldb+n]
774                                 for k := n - 1; k >= 0; k-- {
775                                         tmp := alpha * btmp[k]
776                                         if tmp != 0 {
777                                                 btmp[k] = tmp
778                                                 if nonUnit {
779                                                         btmp[k] *= a[k*lda+k]
780                                                 }
781                                                 for ja, v := range a[k*lda+k+1 : k*lda+n] {
782                                                         j := ja + k + 1
783                                                         btmp[j] += tmp * v
784                                                 }
785                                         }
786                                 }
787                         }
788                         return
789                 }
790                 for i := 0; i < m; i++ {
791                         btmp := b[i*ldb : i*ldb+n]
792                         for k := 0; k < n; k++ {
793                                 tmp := alpha * btmp[k]
794                                 if tmp != 0 {
795                                         btmp[k] = tmp
796                                         if nonUnit {
797                                                 btmp[k] *= a[k*lda+k]
798                                         }
799                                         f64.AxpyUnitaryTo(btmp, tmp, a[k*lda:k*lda+k], btmp)
800                                 }
801                         }
802                 }
803                 return
804         }
805         // Cases where a is transposed.
806         if ul == blas.Upper {
807                 for i := 0; i < m; i++ {
808                         btmp := b[i*ldb : i*ldb+n]
809                         for j, vb := range btmp {
810                                 tmp := vb
811                                 if nonUnit {
812                                         tmp *= a[j*lda+j]
813                                 }
814                                 tmp += f64.DotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:n])
815                                 btmp[j] = alpha * tmp
816                         }
817                 }
818                 return
819         }
820         for i := 0; i < m; i++ {
821                 btmp := b[i*ldb : i*ldb+n]
822                 for j := n - 1; j >= 0; j-- {
823                         tmp := btmp[j]
824                         if nonUnit {
825                                 tmp *= a[j*lda+j]
826                         }
827                         tmp += f64.DotUnitary(a[j*lda:j*lda+j], btmp[:j])
828                         btmp[j] = alpha * tmp
829                 }
830         }
831 }