1 // Copyright ©2017 The Gonum Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 //+build !noasm,!appengine
18 #define MOVDDUP_ALPHA LONG $0x44120FF2; WORD $0x0824 // MOVDDUP 8(SP), X0
20 // func DscalInc(alpha float64, x []complex128, n, inc uintptr)
21 TEXT ·DscalInc(SB), NOSPLIT, $0
22 MOVQ x_base+8(FP), SRC // SRC = &x
23 MOVQ n+32(FP), LEN // LEN = n
24 CMPQ LEN, $0 // if LEN == 0 { return }
27 MOVDDUP_ALPHA // ALPHA = alpha
28 MOVQ inc+40(FP), INC // INC = inc
29 SHLQ $4, INC // INC = INC * sizeof(complex128)
30 LEAQ (INC)(INC*2), INC3 // INC3 = 3 * INC
31 MOVUPS ALPHA, ALPHA_2 // Copy ALPHA and ALPHA_2 for pipelining
32 MOVQ LEN, TAIL // TAIL = LEN
33 SHRQ $2, LEN // LEN = floor( n / 4 )
34 JZ dscal_tail // if LEN == 0 { goto dscal_tail }
37 MOVUPS (SRC), X2 // X_i = x[i]
38 MOVUPS (SRC)(INC*1), X3
39 MOVUPS (SRC)(INC*2), X4
40 MOVUPS (SRC)(INC3*1), X5
42 MULPD ALPHA, X2 // X_i *= ALPHA
47 MOVUPS X2, (DST) // x[i] = X_i
48 MOVUPS X3, (DST)(INC*1)
49 MOVUPS X4, (DST)(INC*2)
50 MOVUPS X5, (DST)(INC3*1)
52 LEAQ (SRC)(INC*4), SRC // SRC += INC*4
54 JNZ dscal_loop // } while --LEN > 0
57 ANDQ $3, TAIL // TAIL = TAIL % 4
58 JE dscal_end // if TAIL == 0 { return }
60 dscal_tail_loop: // do {
61 MOVUPS (SRC), X2 // X_i = x[i]
62 MULPD ALPHA, X2 // X_i *= ALPHA
63 MOVUPS X2, (DST) // x[i] = X_i
64 ADDQ INC, SRC // SRC += INC
66 JNZ dscal_tail_loop // } while --TAIL > 0