OSDN Git Service

test (#52)
[bytom/vapor.git] / vendor / gonum.org / v1 / gonum / blas / gonum / level3single.go
1 // Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
2
3 // Copyright ©2014 The Gonum Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file.
6
7 package gonum
8
9 import (
10         "gonum.org/v1/gonum/blas"
11         "gonum.org/v1/gonum/internal/asm/f32"
12 )
13
14 var _ blas.Float32Level3 = Implementation{}
15
16 // Strsm solves
17 //  A * X = alpha * B,   if tA == blas.NoTrans side == blas.Left,
18 //  A^T * X = alpha * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Left,
19 //  X * A = alpha * B,   if tA == blas.NoTrans side == blas.Right,
20 //  X * A^T = alpha * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Right,
21 // where A is an n×n or m×m triangular matrix, X is an m×n matrix, and alpha is a
22 // scalar.
23 //
24 // At entry to the function, X contains the values of B, and the result is
25 // stored in place into X.
26 //
27 // No check is made that A is invertible.
28 //
29 // Float32 implementations are autogenerated and not directly tested.
30 func (Implementation) Strsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int) {
31         if s != blas.Left && s != blas.Right {
32                 panic(badSide)
33         }
34         if ul != blas.Lower && ul != blas.Upper {
35                 panic(badUplo)
36         }
37         if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
38                 panic(badTranspose)
39         }
40         if d != blas.NonUnit && d != blas.Unit {
41                 panic(badDiag)
42         }
43         if m < 0 {
44                 panic(mLT0)
45         }
46         if n < 0 {
47                 panic(nLT0)
48         }
49         if ldb < n {
50                 panic(badLdB)
51         }
52         var k int
53         if s == blas.Left {
54                 k = m
55         } else {
56                 k = n
57         }
58         if lda*(k-1)+k > len(a) || lda < max(1, k) {
59                 panic(badLdA)
60         }
61         if ldb*(m-1)+n > len(b) || ldb < max(1, n) {
62                 panic(badLdB)
63         }
64
65         if m == 0 || n == 0 {
66                 return
67         }
68
69         if alpha == 0 {
70                 for i := 0; i < m; i++ {
71                         btmp := b[i*ldb : i*ldb+n]
72                         for j := range btmp {
73                                 btmp[j] = 0
74                         }
75                 }
76                 return
77         }
78         nonUnit := d == blas.NonUnit
79         if s == blas.Left {
80                 if tA == blas.NoTrans {
81                         if ul == blas.Upper {
82                                 for i := m - 1; i >= 0; i-- {
83                                         btmp := b[i*ldb : i*ldb+n]
84                                         if alpha != 1 {
85                                                 for j := range btmp {
86                                                         btmp[j] *= alpha
87                                                 }
88                                         }
89                                         for ka, va := range a[i*lda+i+1 : i*lda+m] {
90                                                 k := ka + i + 1
91                                                 if va != 0 {
92                                                         f32.AxpyUnitaryTo(btmp, -va, b[k*ldb:k*ldb+n], btmp)
93                                                 }
94                                         }
95                                         if nonUnit {
96                                                 tmp := 1 / a[i*lda+i]
97                                                 for j := 0; j < n; j++ {
98                                                         btmp[j] *= tmp
99                                                 }
100                                         }
101                                 }
102                                 return
103                         }
104                         for i := 0; i < m; i++ {
105                                 btmp := b[i*ldb : i*ldb+n]
106                                 if alpha != 1 {
107                                         for j := 0; j < n; j++ {
108                                                 btmp[j] *= alpha
109                                         }
110                                 }
111                                 for k, va := range a[i*lda : i*lda+i] {
112                                         if va != 0 {
113                                                 f32.AxpyUnitaryTo(btmp, -va, b[k*ldb:k*ldb+n], btmp)
114                                         }
115                                 }
116                                 if nonUnit {
117                                         tmp := 1 / a[i*lda+i]
118                                         for j := 0; j < n; j++ {
119                                                 btmp[j] *= tmp
120                                         }
121                                 }
122                         }
123                         return
124                 }
125                 // Cases where a is transposed
126                 if ul == blas.Upper {
127                         for k := 0; k < m; k++ {
128                                 btmpk := b[k*ldb : k*ldb+n]
129                                 if nonUnit {
130                                         tmp := 1 / a[k*lda+k]
131                                         for j := 0; j < n; j++ {
132                                                 btmpk[j] *= tmp
133                                         }
134                                 }
135                                 for ia, va := range a[k*lda+k+1 : k*lda+m] {
136                                         i := ia + k + 1
137                                         if va != 0 {
138                                                 btmp := b[i*ldb : i*ldb+n]
139                                                 f32.AxpyUnitaryTo(btmp, -va, btmpk, btmp)
140                                         }
141                                 }
142                                 if alpha != 1 {
143                                         for j := 0; j < n; j++ {
144                                                 btmpk[j] *= alpha
145                                         }
146                                 }
147                         }
148                         return
149                 }
150                 for k := m - 1; k >= 0; k-- {
151                         btmpk := b[k*ldb : k*ldb+n]
152                         if nonUnit {
153                                 tmp := 1 / a[k*lda+k]
154                                 for j := 0; j < n; j++ {
155                                         btmpk[j] *= tmp
156                                 }
157                         }
158                         for i, va := range a[k*lda : k*lda+k] {
159                                 if va != 0 {
160                                         btmp := b[i*ldb : i*ldb+n]
161                                         f32.AxpyUnitaryTo(btmp, -va, btmpk, btmp)
162                                 }
163                         }
164                         if alpha != 1 {
165                                 for j := 0; j < n; j++ {
166                                         btmpk[j] *= alpha
167                                 }
168                         }
169                 }
170                 return
171         }
172         // Cases where a is to the right of X.
173         if tA == blas.NoTrans {
174                 if ul == blas.Upper {
175                         for i := 0; i < m; i++ {
176                                 btmp := b[i*ldb : i*ldb+n]
177                                 if alpha != 1 {
178                                         for j := 0; j < n; j++ {
179                                                 btmp[j] *= alpha
180                                         }
181                                 }
182                                 for k, vb := range btmp {
183                                         if vb != 0 {
184                                                 if btmp[k] != 0 {
185                                                         if nonUnit {
186                                                                 btmp[k] /= a[k*lda+k]
187                                                         }
188                                                         btmpk := btmp[k+1 : n]
189                                                         f32.AxpyUnitaryTo(btmpk, -btmp[k], a[k*lda+k+1:k*lda+n], btmpk)
190                                                 }
191                                         }
192                                 }
193                         }
194                         return
195                 }
196                 for i := 0; i < m; i++ {
197                         btmp := b[i*lda : i*lda+n]
198                         if alpha != 1 {
199                                 for j := 0; j < n; j++ {
200                                         btmp[j] *= alpha
201                                 }
202                         }
203                         for k := n - 1; k >= 0; k-- {
204                                 if btmp[k] != 0 {
205                                         if nonUnit {
206                                                 btmp[k] /= a[k*lda+k]
207                                         }
208                                         f32.AxpyUnitaryTo(btmp, -btmp[k], a[k*lda:k*lda+k], btmp)
209                                 }
210                         }
211                 }
212                 return
213         }
214         // Cases where a is transposed.
215         if ul == blas.Upper {
216                 for i := 0; i < m; i++ {
217                         btmp := b[i*lda : i*lda+n]
218                         for j := n - 1; j >= 0; j-- {
219                                 tmp := alpha*btmp[j] - f32.DotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:])
220                                 if nonUnit {
221                                         tmp /= a[j*lda+j]
222                                 }
223                                 btmp[j] = tmp
224                         }
225                 }
226                 return
227         }
228         for i := 0; i < m; i++ {
229                 btmp := b[i*lda : i*lda+n]
230                 for j := 0; j < n; j++ {
231                         tmp := alpha*btmp[j] - f32.DotUnitary(a[j*lda:j*lda+j], btmp)
232                         if nonUnit {
233                                 tmp /= a[j*lda+j]
234                         }
235                         btmp[j] = tmp
236                 }
237         }
238 }
239
240 // Ssymm performs one of
241 //  C = alpha * A * B + beta * C, if side == blas.Left,
242 //  C = alpha * B * A + beta * C, if side == blas.Right,
243 // where A is an n×n or m×m symmetric matrix, B and C are m×n matrices, and alpha
244 // is a scalar.
245 //
246 // Float32 implementations are autogenerated and not directly tested.
247 func (Implementation) Ssymm(s blas.Side, ul blas.Uplo, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int) {
248         if s != blas.Right && s != blas.Left {
249                 panic("goblas: bad side")
250         }
251         if ul != blas.Lower && ul != blas.Upper {
252                 panic(badUplo)
253         }
254         if m < 0 {
255                 panic(mLT0)
256         }
257         if n < 0 {
258                 panic(nLT0)
259         }
260         var k int
261         if s == blas.Left {
262                 k = m
263         } else {
264                 k = n
265         }
266         if lda*(k-1)+k > len(a) || lda < max(1, k) {
267                 panic(badLdA)
268         }
269         if ldb*(m-1)+n > len(b) || ldb < max(1, n) {
270                 panic(badLdB)
271         }
272         if ldc*(m-1)+n > len(c) || ldc < max(1, n) {
273                 panic(badLdC)
274         }
275         if m == 0 || n == 0 {
276                 return
277         }
278         if alpha == 0 && beta == 1 {
279                 return
280         }
281         if alpha == 0 {
282                 if beta == 0 {
283                         for i := 0; i < m; i++ {
284                                 ctmp := c[i*ldc : i*ldc+n]
285                                 for j := range ctmp {
286                                         ctmp[j] = 0
287                                 }
288                         }
289                         return
290                 }
291                 for i := 0; i < m; i++ {
292                         ctmp := c[i*ldc : i*ldc+n]
293                         for j := 0; j < n; j++ {
294                                 ctmp[j] *= beta
295                         }
296                 }
297                 return
298         }
299
300         isUpper := ul == blas.Upper
301         if s == blas.Left {
302                 for i := 0; i < m; i++ {
303                         atmp := alpha * a[i*lda+i]
304                         btmp := b[i*ldb : i*ldb+n]
305                         ctmp := c[i*ldc : i*ldc+n]
306                         for j, v := range btmp {
307                                 ctmp[j] *= beta
308                                 ctmp[j] += atmp * v
309                         }
310
311                         for k := 0; k < i; k++ {
312                                 var atmp float32
313                                 if isUpper {
314                                         atmp = a[k*lda+i]
315                                 } else {
316                                         atmp = a[i*lda+k]
317                                 }
318                                 atmp *= alpha
319                                 ctmp := c[i*ldc : i*ldc+n]
320                                 f32.AxpyUnitaryTo(ctmp, atmp, b[k*ldb:k*ldb+n], ctmp)
321                         }
322                         for k := i + 1; k < m; k++ {
323                                 var atmp float32
324                                 if isUpper {
325                                         atmp = a[i*lda+k]
326                                 } else {
327                                         atmp = a[k*lda+i]
328                                 }
329                                 atmp *= alpha
330                                 ctmp := c[i*ldc : i*ldc+n]
331                                 f32.AxpyUnitaryTo(ctmp, atmp, b[k*ldb:k*ldb+n], ctmp)
332                         }
333                 }
334                 return
335         }
336         if isUpper {
337                 for i := 0; i < m; i++ {
338                         for j := n - 1; j >= 0; j-- {
339                                 tmp := alpha * b[i*ldb+j]
340                                 var tmp2 float32
341                                 atmp := a[j*lda+j+1 : j*lda+n]
342                                 btmp := b[i*ldb+j+1 : i*ldb+n]
343                                 ctmp := c[i*ldc+j+1 : i*ldc+n]
344                                 for k, v := range atmp {
345                                         ctmp[k] += tmp * v
346                                         tmp2 += btmp[k] * v
347                                 }
348                                 c[i*ldc+j] *= beta
349                                 c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2
350                         }
351                 }
352                 return
353         }
354         for i := 0; i < m; i++ {
355                 for j := 0; j < n; j++ {
356                         tmp := alpha * b[i*ldb+j]
357                         var tmp2 float32
358                         atmp := a[j*lda : j*lda+j]
359                         btmp := b[i*ldb : i*ldb+j]
360                         ctmp := c[i*ldc : i*ldc+j]
361                         for k, v := range atmp {
362                                 ctmp[k] += tmp * v
363                                 tmp2 += btmp[k] * v
364                         }
365                         c[i*ldc+j] *= beta
366                         c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2
367                 }
368         }
369 }
370
371 // Ssyrk performs the symmetric rank-k operation
372 //  C = alpha * A * A^T + beta*C
373 // C is an n×n symmetric matrix. A is an n×k matrix if tA == blas.NoTrans, and
374 // a k×n matrix otherwise. alpha and beta are scalars.
375 //
376 // Float32 implementations are autogenerated and not directly tested.
377 func (Implementation) Ssyrk(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float32, a []float32, lda int, beta float32, c []float32, ldc int) {
378         if ul != blas.Lower && ul != blas.Upper {
379                 panic(badUplo)
380         }
381         if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans {
382                 panic(badTranspose)
383         }
384         if n < 0 {
385                 panic(nLT0)
386         }
387         if k < 0 {
388                 panic(kLT0)
389         }
390         if ldc < n {
391                 panic(badLdC)
392         }
393         var row, col int
394         if tA == blas.NoTrans {
395                 row, col = n, k
396         } else {
397                 row, col = k, n
398         }
399         if lda*(row-1)+col > len(a) || lda < max(1, col) {
400                 panic(badLdA)
401         }
402         if ldc*(n-1)+n > len(c) || ldc < max(1, n) {
403                 panic(badLdC)
404         }
405         if alpha == 0 {
406                 if beta == 0 {
407                         if ul == blas.Upper {
408                                 for i := 0; i < n; i++ {
409                                         ctmp := c[i*ldc+i : i*ldc+n]
410                                         for j := range ctmp {
411                                                 ctmp[j] = 0
412                                         }
413                                 }
414                                 return
415                         }
416                         for i := 0; i < n; i++ {
417                                 ctmp := c[i*ldc : i*ldc+i+1]
418                                 for j := range ctmp {
419                                         ctmp[j] = 0
420                                 }
421                         }
422                         return
423                 }
424                 if ul == blas.Upper {
425                         for i := 0; i < n; i++ {
426                                 ctmp := c[i*ldc+i : i*ldc+n]
427                                 for j := range ctmp {
428                                         ctmp[j] *= beta
429                                 }
430                         }
431                         return
432                 }
433                 for i := 0; i < n; i++ {
434                         ctmp := c[i*ldc : i*ldc+i+1]
435                         for j := range ctmp {
436                                 ctmp[j] *= beta
437                         }
438                 }
439                 return
440         }
441         if tA == blas.NoTrans {
442                 if ul == blas.Upper {
443                         for i := 0; i < n; i++ {
444                                 ctmp := c[i*ldc+i : i*ldc+n]
445                                 atmp := a[i*lda : i*lda+k]
446                                 for jc, vc := range ctmp {
447                                         j := jc + i
448                                         ctmp[jc] = vc*beta + alpha*f32.DotUnitary(atmp, a[j*lda:j*lda+k])
449                                 }
450                         }
451                         return
452                 }
453                 for i := 0; i < n; i++ {
454                         atmp := a[i*lda : i*lda+k]
455                         for j, vc := range c[i*ldc : i*ldc+i+1] {
456                                 c[i*ldc+j] = vc*beta + alpha*f32.DotUnitary(a[j*lda:j*lda+k], atmp)
457                         }
458                 }
459                 return
460         }
461         // Cases where a is transposed.
462         if ul == blas.Upper {
463                 for i := 0; i < n; i++ {
464                         ctmp := c[i*ldc+i : i*ldc+n]
465                         if beta != 1 {
466                                 for j := range ctmp {
467                                         ctmp[j] *= beta
468                                 }
469                         }
470                         for l := 0; l < k; l++ {
471                                 tmp := alpha * a[l*lda+i]
472                                 if tmp != 0 {
473                                         f32.AxpyUnitaryTo(ctmp, tmp, a[l*lda+i:l*lda+n], ctmp)
474                                 }
475                         }
476                 }
477                 return
478         }
479         for i := 0; i < n; i++ {
480                 ctmp := c[i*ldc : i*ldc+i+1]
481                 if beta != 0 {
482                         for j := range ctmp {
483                                 ctmp[j] *= beta
484                         }
485                 }
486                 for l := 0; l < k; l++ {
487                         tmp := alpha * a[l*lda+i]
488                         if tmp != 0 {
489                                 f32.AxpyUnitaryTo(ctmp, tmp, a[l*lda:l*lda+i+1], ctmp)
490                         }
491                 }
492         }
493 }
494
495 // Ssyr2k performs the symmetric rank 2k operation
496 //  C = alpha * A * B^T + alpha * B * A^T + beta * C
497 // where C is an n×n symmetric matrix. A and B are n×k matrices if
498 // tA == NoTrans and k×n otherwise. alpha and beta are scalars.
499 //
500 // Float32 implementations are autogenerated and not directly tested.
501 func (Implementation) Ssyr2k(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int) {
502         if ul != blas.Lower && ul != blas.Upper {
503                 panic(badUplo)
504         }
505         if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans {
506                 panic(badTranspose)
507         }
508         if n < 0 {
509                 panic(nLT0)
510         }
511         if k < 0 {
512                 panic(kLT0)
513         }
514         if ldc < n {
515                 panic(badLdC)
516         }
517         var row, col int
518         if tA == blas.NoTrans {
519                 row, col = n, k
520         } else {
521                 row, col = k, n
522         }
523         if lda*(row-1)+col > len(a) || lda < max(1, col) {
524                 panic(badLdA)
525         }
526         if ldb*(row-1)+col > len(b) || ldb < max(1, col) {
527                 panic(badLdB)
528         }
529         if ldc*(n-1)+n > len(c) || ldc < max(1, n) {
530                 panic(badLdC)
531         }
532         if alpha == 0 {
533                 if beta == 0 {
534                         if ul == blas.Upper {
535                                 for i := 0; i < n; i++ {
536                                         ctmp := c[i*ldc+i : i*ldc+n]
537                                         for j := range ctmp {
538                                                 ctmp[j] = 0
539                                         }
540                                 }
541                                 return
542                         }
543                         for i := 0; i < n; i++ {
544                                 ctmp := c[i*ldc : i*ldc+i+1]
545                                 for j := range ctmp {
546                                         ctmp[j] = 0
547                                 }
548                         }
549                         return
550                 }
551                 if ul == blas.Upper {
552                         for i := 0; i < n; i++ {
553                                 ctmp := c[i*ldc+i : i*ldc+n]
554                                 for j := range ctmp {
555                                         ctmp[j] *= beta
556                                 }
557                         }
558                         return
559                 }
560                 for i := 0; i < n; i++ {
561                         ctmp := c[i*ldc : i*ldc+i+1]
562                         for j := range ctmp {
563                                 ctmp[j] *= beta
564                         }
565                 }
566                 return
567         }
568         if tA == blas.NoTrans {
569                 if ul == blas.Upper {
570                         for i := 0; i < n; i++ {
571                                 atmp := a[i*lda : i*lda+k]
572                                 btmp := b[i*ldb : i*ldb+k]
573                                 ctmp := c[i*ldc+i : i*ldc+n]
574                                 for jc := range ctmp {
575                                         j := i + jc
576                                         var tmp1, tmp2 float32
577                                         binner := b[j*ldb : j*ldb+k]
578                                         for l, v := range a[j*lda : j*lda+k] {
579                                                 tmp1 += v * btmp[l]
580                                                 tmp2 += atmp[l] * binner[l]
581                                         }
582                                         ctmp[jc] *= beta
583                                         ctmp[jc] += alpha * (tmp1 + tmp2)
584                                 }
585                         }
586                         return
587                 }
588                 for i := 0; i < n; i++ {
589                         atmp := a[i*lda : i*lda+k]
590                         btmp := b[i*ldb : i*ldb+k]
591                         ctmp := c[i*ldc : i*ldc+i+1]
592                         for j := 0; j <= i; j++ {
593                                 var tmp1, tmp2 float32
594                                 binner := b[j*ldb : j*ldb+k]
595                                 for l, v := range a[j*lda : j*lda+k] {
596                                         tmp1 += v * btmp[l]
597                                         tmp2 += atmp[l] * binner[l]
598                                 }
599                                 ctmp[j] *= beta
600                                 ctmp[j] += alpha * (tmp1 + tmp2)
601                         }
602                 }
603                 return
604         }
605         if ul == blas.Upper {
606                 for i := 0; i < n; i++ {
607                         ctmp := c[i*ldc+i : i*ldc+n]
608                         if beta != 1 {
609                                 for j := range ctmp {
610                                         ctmp[j] *= beta
611                                 }
612                         }
613                         for l := 0; l < k; l++ {
614                                 tmp1 := alpha * b[l*lda+i]
615                                 tmp2 := alpha * a[l*lda+i]
616                                 btmp := b[l*ldb+i : l*ldb+n]
617                                 if tmp1 != 0 || tmp2 != 0 {
618                                         for j, v := range a[l*lda+i : l*lda+n] {
619                                                 ctmp[j] += v*tmp1 + btmp[j]*tmp2
620                                         }
621                                 }
622                         }
623                 }
624                 return
625         }
626         for i := 0; i < n; i++ {
627                 ctmp := c[i*ldc : i*ldc+i+1]
628                 if beta != 1 {
629                         for j := range ctmp {
630                                 ctmp[j] *= beta
631                         }
632                 }
633                 for l := 0; l < k; l++ {
634                         tmp1 := alpha * b[l*lda+i]
635                         tmp2 := alpha * a[l*lda+i]
636                         btmp := b[l*ldb : l*ldb+i+1]
637                         if tmp1 != 0 || tmp2 != 0 {
638                                 for j, v := range a[l*lda : l*lda+i+1] {
639                                         ctmp[j] += v*tmp1 + btmp[j]*tmp2
640                                 }
641                         }
642                 }
643         }
644 }
645
646 // Strmm performs
647 //  B = alpha * A * B,   if tA == blas.NoTrans and side == blas.Left,
648 //  B = alpha * A^T * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Left,
649 //  B = alpha * B * A,   if tA == blas.NoTrans and side == blas.Right,
650 //  B = alpha * B * A^T, if tA == blas.Trans or blas.ConjTrans, and side == blas.Right,
651 // where A is an n×n or m×m triangular matrix, and B is an m×n matrix.
652 //
653 // Float32 implementations are autogenerated and not directly tested.
654 func (Implementation) Strmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int) {
655         if s != blas.Left && s != blas.Right {
656                 panic(badSide)
657         }
658         if ul != blas.Lower && ul != blas.Upper {
659                 panic(badUplo)
660         }
661         if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
662                 panic(badTranspose)
663         }
664         if d != blas.NonUnit && d != blas.Unit {
665                 panic(badDiag)
666         }
667         if m < 0 {
668                 panic(mLT0)
669         }
670         if n < 0 {
671                 panic(nLT0)
672         }
673         var k int
674         if s == blas.Left {
675                 k = m
676         } else {
677                 k = n
678         }
679         if lda*(k-1)+k > len(a) || lda < max(1, k) {
680                 panic(badLdA)
681         }
682         if ldb*(m-1)+n > len(b) || ldb < max(1, n) {
683                 panic(badLdB)
684         }
685         if alpha == 0 {
686                 for i := 0; i < m; i++ {
687                         btmp := b[i*ldb : i*ldb+n]
688                         for j := range btmp {
689                                 btmp[j] = 0
690                         }
691                 }
692                 return
693         }
694
695         nonUnit := d == blas.NonUnit
696         if s == blas.Left {
697                 if tA == blas.NoTrans {
698                         if ul == blas.Upper {
699                                 for i := 0; i < m; i++ {
700                                         tmp := alpha
701                                         if nonUnit {
702                                                 tmp *= a[i*lda+i]
703                                         }
704                                         btmp := b[i*ldb : i*ldb+n]
705                                         for j := range btmp {
706                                                 btmp[j] *= tmp
707                                         }
708                                         for ka, va := range a[i*lda+i+1 : i*lda+m] {
709                                                 k := ka + i + 1
710                                                 tmp := alpha * va
711                                                 if tmp != 0 {
712                                                         f32.AxpyUnitaryTo(btmp, tmp, b[k*ldb:k*ldb+n], btmp)
713                                                 }
714                                         }
715                                 }
716                                 return
717                         }
718                         for i := m - 1; i >= 0; i-- {
719                                 tmp := alpha
720                                 if nonUnit {
721                                         tmp *= a[i*lda+i]
722                                 }
723                                 btmp := b[i*ldb : i*ldb+n]
724                                 for j := range btmp {
725                                         btmp[j] *= tmp
726                                 }
727                                 for k, va := range a[i*lda : i*lda+i] {
728                                         tmp := alpha * va
729                                         if tmp != 0 {
730                                                 f32.AxpyUnitaryTo(btmp, tmp, b[k*ldb:k*ldb+n], btmp)
731                                         }
732                                 }
733                         }
734                         return
735                 }
736                 // Cases where a is transposed.
737                 if ul == blas.Upper {
738                         for k := m - 1; k >= 0; k-- {
739                                 btmpk := b[k*ldb : k*ldb+n]
740                                 for ia, va := range a[k*lda+k+1 : k*lda+m] {
741                                         i := ia + k + 1
742                                         btmp := b[i*ldb : i*ldb+n]
743                                         tmp := alpha * va
744                                         if tmp != 0 {
745                                                 f32.AxpyUnitaryTo(btmp, tmp, btmpk, btmp)
746                                         }
747                                 }
748                                 tmp := alpha
749                                 if nonUnit {
750                                         tmp *= a[k*lda+k]
751                                 }
752                                 if tmp != 1 {
753                                         for j := 0; j < n; j++ {
754                                                 btmpk[j] *= tmp
755                                         }
756                                 }
757                         }
758                         return
759                 }
760                 for k := 0; k < m; k++ {
761                         btmpk := b[k*ldb : k*ldb+n]
762                         for i, va := range a[k*lda : k*lda+k] {
763                                 btmp := b[i*ldb : i*ldb+n]
764                                 tmp := alpha * va
765                                 if tmp != 0 {
766                                         f32.AxpyUnitaryTo(btmp, tmp, btmpk, btmp)
767                                 }
768                         }
769                         tmp := alpha
770                         if nonUnit {
771                                 tmp *= a[k*lda+k]
772                         }
773                         if tmp != 1 {
774                                 for j := 0; j < n; j++ {
775                                         btmpk[j] *= tmp
776                                 }
777                         }
778                 }
779                 return
780         }
781         // Cases where a is on the right
782         if tA == blas.NoTrans {
783                 if ul == blas.Upper {
784                         for i := 0; i < m; i++ {
785                                 btmp := b[i*ldb : i*ldb+n]
786                                 for k := n - 1; k >= 0; k-- {
787                                         tmp := alpha * btmp[k]
788                                         if tmp != 0 {
789                                                 btmp[k] = tmp
790                                                 if nonUnit {
791                                                         btmp[k] *= a[k*lda+k]
792                                                 }
793                                                 for ja, v := range a[k*lda+k+1 : k*lda+n] {
794                                                         j := ja + k + 1
795                                                         btmp[j] += tmp * v
796                                                 }
797                                         }
798                                 }
799                         }
800                         return
801                 }
802                 for i := 0; i < m; i++ {
803                         btmp := b[i*ldb : i*ldb+n]
804                         for k := 0; k < n; k++ {
805                                 tmp := alpha * btmp[k]
806                                 if tmp != 0 {
807                                         btmp[k] = tmp
808                                         if nonUnit {
809                                                 btmp[k] *= a[k*lda+k]
810                                         }
811                                         f32.AxpyUnitaryTo(btmp, tmp, a[k*lda:k*lda+k], btmp)
812                                 }
813                         }
814                 }
815                 return
816         }
817         // Cases where a is transposed.
818         if ul == blas.Upper {
819                 for i := 0; i < m; i++ {
820                         btmp := b[i*ldb : i*ldb+n]
821                         for j, vb := range btmp {
822                                 tmp := vb
823                                 if nonUnit {
824                                         tmp *= a[j*lda+j]
825                                 }
826                                 tmp += f32.DotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:n])
827                                 btmp[j] = alpha * tmp
828                         }
829                 }
830                 return
831         }
832         for i := 0; i < m; i++ {
833                 btmp := b[i*ldb : i*ldb+n]
834                 for j := n - 1; j >= 0; j-- {
835                         tmp := btmp[j]
836                         if nonUnit {
837                                 tmp *= a[j*lda+j]
838                         }
839                         tmp += f32.DotUnitary(a[j*lda:j*lda+j], btmp[:j])
840                         btmp[j] = alpha * tmp
841                 }
842         }
843 }