// Copyright ©2017 The Gonum Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. //+build !noasm,!appengine #include "textflag.h" #define SRC SI #define DST SI #define LEN CX #define TAIL BX #define INC R9 #define INC3 R10 #define ALPHA X0 #define ALPHA_2 X1 #define MOVDDUP_ALPHA LONG $0x44120FF2; WORD $0x0824 // MOVDDUP 8(SP), X0 // func DscalInc(alpha float64, x []complex128, n, inc uintptr) TEXT ·DscalInc(SB), NOSPLIT, $0 MOVQ x_base+8(FP), SRC // SRC = &x MOVQ n+32(FP), LEN // LEN = n CMPQ LEN, $0 // if LEN == 0 { return } JE dscal_end MOVDDUP_ALPHA // ALPHA = alpha MOVQ inc+40(FP), INC // INC = inc SHLQ $4, INC // INC = INC * sizeof(complex128) LEAQ (INC)(INC*2), INC3 // INC3 = 3 * INC MOVUPS ALPHA, ALPHA_2 // Copy ALPHA and ALPHA_2 for pipelining MOVQ LEN, TAIL // TAIL = LEN SHRQ $2, LEN // LEN = floor( n / 4 ) JZ dscal_tail // if LEN == 0 { goto dscal_tail } dscal_loop: // do { MOVUPS (SRC), X2 // X_i = x[i] MOVUPS (SRC)(INC*1), X3 MOVUPS (SRC)(INC*2), X4 MOVUPS (SRC)(INC3*1), X5 MULPD ALPHA, X2 // X_i *= ALPHA MULPD ALPHA_2, X3 MULPD ALPHA, X4 MULPD ALPHA_2, X5 MOVUPS X2, (DST) // x[i] = X_i MOVUPS X3, (DST)(INC*1) MOVUPS X4, (DST)(INC*2) MOVUPS X5, (DST)(INC3*1) LEAQ (SRC)(INC*4), SRC // SRC += INC*4 DECQ LEN JNZ dscal_loop // } while --LEN > 0 dscal_tail: ANDQ $3, TAIL // TAIL = TAIL % 4 JE dscal_end // if TAIL == 0 { return } dscal_tail_loop: // do { MOVUPS (SRC), X2 // X_i = x[i] MULPD ALPHA, X2 // X_i *= ALPHA MOVUPS X2, (DST) // x[i] = X_i ADDQ INC, SRC // SRC += INC DECQ TAIL JNZ dscal_tail_loop // } while --TAIL > 0 dscal_end: RET