1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // +build amd64,!gccgo,!appengine
9 DATA ·iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908
10 DATA ·iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b
11 GLOBL ·iv0<>(SB), (NOPTR+RODATA), $16
13 DATA ·iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b
14 DATA ·iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1
15 GLOBL ·iv1<>(SB), (NOPTR+RODATA), $16
17 DATA ·iv2<>+0x00(SB)/8, $0x510e527fade682d1
18 DATA ·iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f
19 GLOBL ·iv2<>(SB), (NOPTR+RODATA), $16
21 DATA ·iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b
22 DATA ·iv3<>+0x08(SB)/8, $0x5be0cd19137e2179
23 GLOBL ·iv3<>(SB), (NOPTR+RODATA), $16
25 DATA ·c40<>+0x00(SB)/8, $0x0201000706050403
26 DATA ·c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b
27 GLOBL ·c40<>(SB), (NOPTR+RODATA), $16
29 DATA ·c48<>+0x00(SB)/8, $0x0100070605040302
30 DATA ·c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a
31 GLOBL ·c48<>(SB), (NOPTR+RODATA), $16
33 #define SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2) \
50 #define SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2) \
67 #define HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \
74 PSHUFD $0xB1, v6, v6; \
75 PSHUFD $0xB1, v7, v7; \
103 #define LOAD_MSG(m0, m1, m2, m3, src, i0, i1, i2, i3, i4, i5, i6, i7) \
104 MOVQ i0*8(src), m0; \
105 PINSRQ $1, i1*8(src), m0; \
106 MOVQ i2*8(src), m1; \
107 PINSRQ $1, i3*8(src), m1; \
108 MOVQ i4*8(src), m2; \
109 PINSRQ $1, i5*8(src), m2; \
110 MOVQ i6*8(src), m3; \
111 PINSRQ $1, i7*8(src), m3
113 // func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
114 TEXT ·hashBlocksSSE4(SB), 4, $288-48 // frame size = 272 + 16 byte alignment
118 MOVQ blocks_base+24(FP), SI
119 MOVQ blocks_len+32(FP), DI
129 XORQ CX, 0(SP) // 0(SP) = ·iv3 ^ (CX || 0)
131 MOVOU ·c40<>(SB), X13
132 MOVOU ·c48<>(SB), X14
161 LOAD_MSG(X8, X9, X10, X11, SI, 0, 2, 4, 6, 1, 3, 5, 7)
166 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
167 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
168 LOAD_MSG(X8, X9, X10, X11, SI, 8, 10, 12, 14, 9, 11, 13, 15)
173 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
174 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
176 LOAD_MSG(X8, X9, X10, X11, SI, 14, 4, 9, 13, 10, 8, 15, 6)
181 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
182 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
183 LOAD_MSG(X8, X9, X10, X11, SI, 1, 0, 11, 5, 12, 2, 7, 3)
188 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
189 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
191 LOAD_MSG(X8, X9, X10, X11, SI, 11, 12, 5, 15, 8, 0, 2, 13)
192 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
193 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
194 LOAD_MSG(X8, X9, X10, X11, SI, 10, 3, 7, 9, 14, 6, 1, 4)
195 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
196 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
198 LOAD_MSG(X8, X9, X10, X11, SI, 7, 3, 13, 11, 9, 1, 12, 14)
199 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
200 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
201 LOAD_MSG(X8, X9, X10, X11, SI, 2, 5, 4, 15, 6, 10, 0, 8)
202 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
203 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
205 LOAD_MSG(X8, X9, X10, X11, SI, 9, 5, 2, 10, 0, 7, 4, 15)
206 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
207 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
208 LOAD_MSG(X8, X9, X10, X11, SI, 14, 11, 6, 3, 1, 12, 8, 13)
209 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
210 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
212 LOAD_MSG(X8, X9, X10, X11, SI, 2, 6, 0, 8, 12, 10, 11, 3)
213 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
214 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
215 LOAD_MSG(X8, X9, X10, X11, SI, 4, 7, 15, 1, 13, 5, 14, 9)
216 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
217 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
219 LOAD_MSG(X8, X9, X10, X11, SI, 12, 1, 14, 4, 5, 15, 13, 10)
220 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
221 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
222 LOAD_MSG(X8, X9, X10, X11, SI, 0, 6, 9, 8, 7, 3, 2, 11)
223 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
224 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
226 LOAD_MSG(X8, X9, X10, X11, SI, 13, 7, 12, 3, 11, 14, 1, 9)
227 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
228 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
229 LOAD_MSG(X8, X9, X10, X11, SI, 5, 15, 8, 2, 0, 4, 6, 10)
230 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
231 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
233 LOAD_MSG(X8, X9, X10, X11, SI, 6, 14, 11, 0, 15, 9, 3, 8)
234 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
235 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
236 LOAD_MSG(X8, X9, X10, X11, SI, 12, 13, 1, 10, 2, 7, 4, 5)
237 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
238 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
240 LOAD_MSG(X8, X9, X10, X11, SI, 10, 8, 7, 1, 2, 4, 6, 5)
241 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
242 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
243 LOAD_MSG(X8, X9, X10, X11, SI, 15, 9, 3, 13, 11, 14, 12, 0)
244 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
245 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
247 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 16(SP), 32(SP), 48(SP), 64(SP), X11, X13, X14)
248 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
249 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 80(SP), 96(SP), 112(SP), 128(SP), X11, X13, X14)
250 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
252 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 144(SP), 160(SP), 176(SP), 192(SP), X11, X13, X14)
253 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
254 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 208(SP), 224(SP), 240(SP), 256(SP), X11, X13, X14)
255 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
283 // func supportsSSE4() bool
284 TEXT ·supportsSSE4(SB), 4, $0-1
287 SHRL $19, CX // Bit 19 indicates SSE4 support
288 ANDL $1, CX // CX != 0 if support SSE4