1 // Copyright ©2016 The Gonum Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // +build !noasm,!appengine
9 // func L1NormInc(x []float64, n, incX int) (sum float64)
10 TEXT ·L1NormInc(SB), NOSPLIT, $0
11 MOVQ x_base+0(FP), SI // SI = &x
12 MOVQ n+24(FP), CX // CX = n
13 MOVQ incX+32(FP), AX // AX = increment * sizeof( float64 )
15 MOVQ AX, DX // DX = AX * 3
17 PXOR X0, X0 // p_sum_i = 0
25 CMPQ CX, $0 // if CX == 0 { return 0 }
28 ANDQ $7, BX // BX = n % 8
29 SHRQ $3, CX // CX = floor( n / 8 )
30 JZ absum_tail_start // if CX == 0 { goto absum_tail_start }
33 // p_sum = max( p_sum + x[i], p_sum - x[i] )
34 MOVSD (SI), X8 // X_i[0] = x[i]
38 LEAQ (SI)(AX*4), SI // SI = SI + 4
39 MOVHPD (SI), X8 // X_i[1] = x[i+4]
41 MOVHPD (SI)(AX*2), X10
42 MOVHPD (SI)(DX*1), X11
43 ADDPD X8, X0 // p_sum_i += X_i ( positive values )
47 SUBPD X8, X1 // p_sum_(i+1) -= X_i ( negative values )
51 MAXPD X1, X0 // p_sum_i = max( p_sum_i, p_sum_(i+1) )
55 MOVAPS X0, X1 // p_sum_(i+1) = p_sum_i
59 LEAQ (SI)(AX*4), SI // SI = SI + 4
60 LOOP absum_loop // } while --CX > 0
62 // p_sum_0 = \sum_{i=1}^{3}( p_sum_(i*2) )
67 // p_sum_0[0] = p_sum_0[0] + p_sum_0[1]
69 SHUFPD $0x3, X0, X0 // lower( p_sum_0 ) = upper( p_sum_0 )
72 JE absum_end // if BX == 0 { goto absum_end }
74 absum_tail_start: // Reset loop registers
75 MOVQ BX, CX // Loop counter: CX = BX
76 XORPS X8, X8 // X_8 = 0
79 // p_sum += max( p_sum + x[i], p_sum - x[i] )
80 MOVSD (SI), X8 // X_8 = x[i]
81 MOVSD X0, X1 // p_sum_1 = p_sum_0
82 ADDSD X8, X0 // p_sum_0 += X_8
83 SUBSD X8, X1 // p_sum_1 -= X_8
84 MAXSD X1, X0 // p_sum_0 = max( p_sum_0, p_sum_1 )
86 LOOP absum_tail // } while --CX > 0
88 absum_end: // return p_sum_0