1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=KNL
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefix=SKX
8 define i1 @allones_v16i8_sign(<16 x i8> %arg) {
9 ; SSE2-LABEL: allones_v16i8_sign:
11 ; SSE2-NEXT: pmovmskb %xmm0, %eax
12 ; SSE2-NEXT: cmpw $-1, %ax
16 ; AVX-LABEL: allones_v16i8_sign:
18 ; AVX-NEXT: vpmovmskb %xmm0, %eax
19 ; AVX-NEXT: cmpw $-1, %ax
23 ; KNL-LABEL: allones_v16i8_sign:
25 ; KNL-NEXT: vpmovmskb %xmm0, %eax
26 ; KNL-NEXT: cmpw $-1, %ax
30 ; SKX-LABEL: allones_v16i8_sign:
32 ; SKX-NEXT: vpmovb2m %xmm0, %k0
33 ; SKX-NEXT: kortestw %k0, %k0
36 %tmp = icmp slt <16 x i8> %arg, zeroinitializer
37 %tmp1 = bitcast <16 x i1> %tmp to i16
38 %tmp2 = icmp eq i16 %tmp1, -1
42 define i1 @allzeros_v16i8_sign(<16 x i8> %arg) {
43 ; SSE2-LABEL: allzeros_v16i8_sign:
45 ; SSE2-NEXT: pmovmskb %xmm0, %eax
46 ; SSE2-NEXT: testw %ax, %ax
50 ; AVX-LABEL: allzeros_v16i8_sign:
52 ; AVX-NEXT: vpmovmskb %xmm0, %eax
53 ; AVX-NEXT: testw %ax, %ax
57 ; KNL-LABEL: allzeros_v16i8_sign:
59 ; KNL-NEXT: vpmovmskb %xmm0, %eax
60 ; KNL-NEXT: testw %ax, %ax
64 ; SKX-LABEL: allzeros_v16i8_sign:
66 ; SKX-NEXT: vpmovb2m %xmm0, %k0
67 ; SKX-NEXT: kortestw %k0, %k0
70 %tmp = icmp slt <16 x i8> %arg, zeroinitializer
71 %tmp1 = bitcast <16 x i1> %tmp to i16
72 %tmp2 = icmp eq i16 %tmp1, 0
76 define i1 @allones_v32i8_sign(<32 x i8> %arg) {
77 ; SSE2-LABEL: allones_v32i8_sign:
79 ; SSE2-NEXT: pmovmskb %xmm0, %eax
80 ; SSE2-NEXT: pmovmskb %xmm1, %ecx
81 ; SSE2-NEXT: shll $16, %ecx
82 ; SSE2-NEXT: orl %eax, %ecx
83 ; SSE2-NEXT: cmpl $-1, %ecx
87 ; AVX1-LABEL: allones_v32i8_sign:
89 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
90 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
91 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
92 ; AVX1-NEXT: shll $16, %ecx
93 ; AVX1-NEXT: orl %eax, %ecx
94 ; AVX1-NEXT: cmpl $-1, %ecx
96 ; AVX1-NEXT: vzeroupper
99 ; AVX2-LABEL: allones_v32i8_sign:
101 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
102 ; AVX2-NEXT: cmpl $-1, %eax
103 ; AVX2-NEXT: sete %al
104 ; AVX2-NEXT: vzeroupper
107 ; KNL-LABEL: allones_v32i8_sign:
109 ; KNL-NEXT: vpmovmskb %ymm0, %eax
110 ; KNL-NEXT: cmpl $-1, %eax
112 ; KNL-NEXT: vzeroupper
115 ; SKX-LABEL: allones_v32i8_sign:
117 ; SKX-NEXT: vpmovb2m %ymm0, %k0
118 ; SKX-NEXT: kortestd %k0, %k0
120 ; SKX-NEXT: vzeroupper
122 %tmp = icmp slt <32 x i8> %arg, zeroinitializer
123 %tmp1 = bitcast <32 x i1> %tmp to i32
124 %tmp2 = icmp eq i32 %tmp1, -1
128 define i1 @allzeros_v32i8_sign(<32 x i8> %arg) {
129 ; SSE2-LABEL: allzeros_v32i8_sign:
131 ; SSE2-NEXT: pmovmskb %xmm0, %eax
132 ; SSE2-NEXT: pmovmskb %xmm1, %ecx
133 ; SSE2-NEXT: shll $16, %ecx
134 ; SSE2-NEXT: orl %eax, %ecx
135 ; SSE2-NEXT: sete %al
138 ; AVX1-LABEL: allzeros_v32i8_sign:
140 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
141 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
142 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
143 ; AVX1-NEXT: shll $16, %ecx
144 ; AVX1-NEXT: orl %eax, %ecx
145 ; AVX1-NEXT: sete %al
146 ; AVX1-NEXT: vzeroupper
149 ; AVX2-LABEL: allzeros_v32i8_sign:
151 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
152 ; AVX2-NEXT: testl %eax, %eax
153 ; AVX2-NEXT: sete %al
154 ; AVX2-NEXT: vzeroupper
157 ; KNL-LABEL: allzeros_v32i8_sign:
159 ; KNL-NEXT: vpmovmskb %ymm0, %eax
160 ; KNL-NEXT: testl %eax, %eax
162 ; KNL-NEXT: vzeroupper
165 ; SKX-LABEL: allzeros_v32i8_sign:
167 ; SKX-NEXT: vpmovb2m %ymm0, %k0
168 ; SKX-NEXT: kortestd %k0, %k0
170 ; SKX-NEXT: vzeroupper
172 %tmp = icmp slt <32 x i8> %arg, zeroinitializer
173 %tmp1 = bitcast <32 x i1> %tmp to i32
174 %tmp2 = icmp eq i32 %tmp1, 0
178 define i1 @allones_v64i8_sign(<64 x i8> %arg) {
179 ; SSE2-LABEL: allones_v64i8_sign:
181 ; SSE2-NEXT: pmovmskb %xmm0, %eax
182 ; SSE2-NEXT: pmovmskb %xmm1, %ecx
183 ; SSE2-NEXT: shll $16, %ecx
184 ; SSE2-NEXT: orl %eax, %ecx
185 ; SSE2-NEXT: pmovmskb %xmm2, %eax
186 ; SSE2-NEXT: pmovmskb %xmm3, %edx
187 ; SSE2-NEXT: shll $16, %edx
188 ; SSE2-NEXT: orl %eax, %edx
189 ; SSE2-NEXT: shlq $32, %rdx
190 ; SSE2-NEXT: orq %rcx, %rdx
191 ; SSE2-NEXT: cmpq $-1, %rdx
192 ; SSE2-NEXT: sete %al
195 ; AVX1-LABEL: allones_v64i8_sign:
197 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
198 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
199 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
200 ; AVX1-NEXT: shll $16, %ecx
201 ; AVX1-NEXT: orl %eax, %ecx
202 ; AVX1-NEXT: vpmovmskb %xmm1, %eax
203 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
204 ; AVX1-NEXT: vpmovmskb %xmm0, %edx
205 ; AVX1-NEXT: shll $16, %edx
206 ; AVX1-NEXT: orl %eax, %edx
207 ; AVX1-NEXT: shlq $32, %rdx
208 ; AVX1-NEXT: orq %rcx, %rdx
209 ; AVX1-NEXT: cmpq $-1, %rdx
210 ; AVX1-NEXT: sete %al
211 ; AVX1-NEXT: vzeroupper
214 ; AVX2-LABEL: allones_v64i8_sign:
216 ; AVX2-NEXT: vpmovmskb %ymm1, %eax
217 ; AVX2-NEXT: shlq $32, %rax
218 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx
219 ; AVX2-NEXT: orq %rax, %rcx
220 ; AVX2-NEXT: cmpq $-1, %rcx
221 ; AVX2-NEXT: sete %al
222 ; AVX2-NEXT: vzeroupper
225 ; KNL-LABEL: allones_v64i8_sign:
227 ; KNL-NEXT: vpmovmskb %ymm1, %eax
228 ; KNL-NEXT: shlq $32, %rax
229 ; KNL-NEXT: vpmovmskb %ymm0, %ecx
230 ; KNL-NEXT: orq %rax, %rcx
231 ; KNL-NEXT: cmpq $-1, %rcx
233 ; KNL-NEXT: vzeroupper
236 ; SKX-LABEL: allones_v64i8_sign:
238 ; SKX-NEXT: vpmovb2m %zmm0, %k0
239 ; SKX-NEXT: kortestq %k0, %k0
241 ; SKX-NEXT: vzeroupper
243 %tmp = icmp slt <64 x i8> %arg, zeroinitializer
244 %tmp1 = bitcast <64 x i1> %tmp to i64
245 %tmp2 = icmp eq i64 %tmp1, -1
249 define i1 @allzeros_v64i8_sign(<64 x i8> %arg) {
250 ; SSE2-LABEL: allzeros_v64i8_sign:
252 ; SSE2-NEXT: pmovmskb %xmm0, %eax
253 ; SSE2-NEXT: pmovmskb %xmm1, %ecx
254 ; SSE2-NEXT: shll $16, %ecx
255 ; SSE2-NEXT: orl %eax, %ecx
256 ; SSE2-NEXT: pmovmskb %xmm2, %eax
257 ; SSE2-NEXT: pmovmskb %xmm3, %edx
258 ; SSE2-NEXT: shll $16, %edx
259 ; SSE2-NEXT: orl %eax, %edx
260 ; SSE2-NEXT: shlq $32, %rdx
261 ; SSE2-NEXT: orq %rcx, %rdx
262 ; SSE2-NEXT: sete %al
265 ; AVX1-LABEL: allzeros_v64i8_sign:
267 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
268 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
269 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
270 ; AVX1-NEXT: shll $16, %ecx
271 ; AVX1-NEXT: orl %eax, %ecx
272 ; AVX1-NEXT: vpmovmskb %xmm1, %eax
273 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
274 ; AVX1-NEXT: vpmovmskb %xmm0, %edx
275 ; AVX1-NEXT: shll $16, %edx
276 ; AVX1-NEXT: orl %eax, %edx
277 ; AVX1-NEXT: shlq $32, %rdx
278 ; AVX1-NEXT: orq %rcx, %rdx
279 ; AVX1-NEXT: sete %al
280 ; AVX1-NEXT: vzeroupper
283 ; AVX2-LABEL: allzeros_v64i8_sign:
285 ; AVX2-NEXT: vpmovmskb %ymm1, %eax
286 ; AVX2-NEXT: shlq $32, %rax
287 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx
288 ; AVX2-NEXT: orq %rax, %rcx
289 ; AVX2-NEXT: sete %al
290 ; AVX2-NEXT: vzeroupper
293 ; KNL-LABEL: allzeros_v64i8_sign:
295 ; KNL-NEXT: vpmovmskb %ymm1, %eax
296 ; KNL-NEXT: shlq $32, %rax
297 ; KNL-NEXT: vpmovmskb %ymm0, %ecx
298 ; KNL-NEXT: orq %rax, %rcx
300 ; KNL-NEXT: vzeroupper
303 ; SKX-LABEL: allzeros_v64i8_sign:
305 ; SKX-NEXT: vpmovb2m %zmm0, %k0
306 ; SKX-NEXT: kortestq %k0, %k0
308 ; SKX-NEXT: vzeroupper
310 %tmp = icmp slt <64 x i8> %arg, zeroinitializer
311 %tmp1 = bitcast <64 x i1> %tmp to i64
312 %tmp2 = icmp eq i64 %tmp1, 0
316 define i1 @allones_v8i16_sign(<8 x i16> %arg) {
317 ; SSE2-LABEL: allones_v8i16_sign:
319 ; SSE2-NEXT: packsswb %xmm0, %xmm0
320 ; SSE2-NEXT: pmovmskb %xmm0, %eax
321 ; SSE2-NEXT: cmpb $-1, %al
322 ; SSE2-NEXT: sete %al
325 ; AVX-LABEL: allones_v8i16_sign:
327 ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
328 ; AVX-NEXT: vpmovmskb %xmm0, %eax
329 ; AVX-NEXT: cmpb $-1, %al
333 ; KNL-LABEL: allones_v8i16_sign:
335 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
336 ; KNL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
337 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
338 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
339 ; KNL-NEXT: kmovw %k0, %eax
340 ; KNL-NEXT: cmpb $-1, %al
342 ; KNL-NEXT: vzeroupper
345 ; SKX-LABEL: allones_v8i16_sign:
347 ; SKX-NEXT: vpmovw2m %xmm0, %k0
348 ; SKX-NEXT: kortestb %k0, %k0
351 %tmp = icmp slt <8 x i16> %arg, zeroinitializer
352 %tmp1 = bitcast <8 x i1> %tmp to i8
353 %tmp2 = icmp eq i8 %tmp1, -1
357 define i1 @allzeros_v8i16_sign(<8 x i16> %arg) {
358 ; SSE2-LABEL: allzeros_v8i16_sign:
360 ; SSE2-NEXT: packsswb %xmm0, %xmm0
361 ; SSE2-NEXT: pmovmskb %xmm0, %eax
362 ; SSE2-NEXT: testb %al, %al
363 ; SSE2-NEXT: sete %al
366 ; AVX-LABEL: allzeros_v8i16_sign:
368 ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
369 ; AVX-NEXT: vpmovmskb %xmm0, %eax
370 ; AVX-NEXT: testb %al, %al
374 ; KNL-LABEL: allzeros_v8i16_sign:
376 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
377 ; KNL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
378 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
379 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
380 ; KNL-NEXT: kmovw %k0, %eax
381 ; KNL-NEXT: testb %al, %al
383 ; KNL-NEXT: vzeroupper
386 ; SKX-LABEL: allzeros_v8i16_sign:
388 ; SKX-NEXT: vpmovw2m %xmm0, %k0
389 ; SKX-NEXT: kortestb %k0, %k0
392 %tmp = icmp slt <8 x i16> %arg, zeroinitializer
393 %tmp1 = bitcast <8 x i1> %tmp to i8
394 %tmp2 = icmp eq i8 %tmp1, 0
398 define i1 @allones_v16i16_sign(<16 x i16> %arg) {
399 ; SSE2-LABEL: allones_v16i16_sign:
401 ; SSE2-NEXT: packsswb %xmm1, %xmm0
402 ; SSE2-NEXT: pmovmskb %xmm0, %eax
403 ; SSE2-NEXT: cmpw $-1, %ax
404 ; SSE2-NEXT: sete %al
407 ; AVX1-LABEL: allones_v16i16_sign:
409 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
410 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
411 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
412 ; AVX1-NEXT: cmpw $-1, %ax
413 ; AVX1-NEXT: sete %al
414 ; AVX1-NEXT: vzeroupper
417 ; AVX2-LABEL: allones_v16i16_sign:
419 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
420 ; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
421 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
422 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
423 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
424 ; AVX2-NEXT: cmpw $-1, %ax
425 ; AVX2-NEXT: sete %al
426 ; AVX2-NEXT: vzeroupper
429 ; KNL-LABEL: allones_v16i16_sign:
431 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
432 ; KNL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
433 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
434 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
435 ; KNL-NEXT: kortestw %k0, %k0
437 ; KNL-NEXT: vzeroupper
440 ; SKX-LABEL: allones_v16i16_sign:
442 ; SKX-NEXT: vpmovw2m %ymm0, %k0
443 ; SKX-NEXT: kortestw %k0, %k0
445 ; SKX-NEXT: vzeroupper
447 %tmp = icmp slt <16 x i16> %arg, zeroinitializer
448 %tmp1 = bitcast <16 x i1> %tmp to i16
449 %tmp2 = icmp eq i16 %tmp1, -1
453 define i1 @allzeros_v16i16_sign(<16 x i16> %arg) {
454 ; SSE2-LABEL: allzeros_v16i16_sign:
456 ; SSE2-NEXT: packsswb %xmm1, %xmm0
457 ; SSE2-NEXT: pmovmskb %xmm0, %eax
458 ; SSE2-NEXT: testw %ax, %ax
459 ; SSE2-NEXT: sete %al
462 ; AVX1-LABEL: allzeros_v16i16_sign:
464 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
465 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
466 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
467 ; AVX1-NEXT: testw %ax, %ax
468 ; AVX1-NEXT: sete %al
469 ; AVX1-NEXT: vzeroupper
472 ; AVX2-LABEL: allzeros_v16i16_sign:
474 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
475 ; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
476 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
477 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
478 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
479 ; AVX2-NEXT: testw %ax, %ax
480 ; AVX2-NEXT: sete %al
481 ; AVX2-NEXT: vzeroupper
484 ; KNL-LABEL: allzeros_v16i16_sign:
486 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
487 ; KNL-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
488 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
489 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
490 ; KNL-NEXT: kortestw %k0, %k0
492 ; KNL-NEXT: vzeroupper
495 ; SKX-LABEL: allzeros_v16i16_sign:
497 ; SKX-NEXT: vpmovw2m %ymm0, %k0
498 ; SKX-NEXT: kortestw %k0, %k0
500 ; SKX-NEXT: vzeroupper
502 %tmp = icmp slt <16 x i16> %arg, zeroinitializer
503 %tmp1 = bitcast <16 x i1> %tmp to i16
504 %tmp2 = icmp eq i16 %tmp1, 0
508 define i1 @allones_v32i16_sign(<32 x i16> %arg) {
509 ; SSE2-LABEL: allones_v32i16_sign:
511 ; SSE2-NEXT: packsswb %xmm1, %xmm0
512 ; SSE2-NEXT: pmovmskb %xmm0, %eax
513 ; SSE2-NEXT: packsswb %xmm3, %xmm2
514 ; SSE2-NEXT: pmovmskb %xmm2, %ecx
515 ; SSE2-NEXT: shll $16, %ecx
516 ; SSE2-NEXT: orl %eax, %ecx
517 ; SSE2-NEXT: cmpl $-1, %ecx
518 ; SSE2-NEXT: sete %al
521 ; AVX1-LABEL: allones_v32i16_sign:
523 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
524 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
525 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
526 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
527 ; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
528 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
529 ; AVX1-NEXT: shll $16, %ecx
530 ; AVX1-NEXT: orl %eax, %ecx
531 ; AVX1-NEXT: cmpl $-1, %ecx
532 ; AVX1-NEXT: sete %al
533 ; AVX1-NEXT: vzeroupper
536 ; AVX2-LABEL: allones_v32i16_sign:
538 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
539 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
540 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
541 ; AVX2-NEXT: cmpl $-1, %eax
542 ; AVX2-NEXT: sete %al
543 ; AVX2-NEXT: vzeroupper
546 ; KNL-LABEL: allones_v32i16_sign:
548 ; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
549 ; KNL-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
550 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
551 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
552 ; KNL-NEXT: kmovw %k0, %eax
553 ; KNL-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm0
554 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
555 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
556 ; KNL-NEXT: kmovw %k0, %ecx
557 ; KNL-NEXT: shll $16, %ecx
558 ; KNL-NEXT: orl %eax, %ecx
559 ; KNL-NEXT: cmpl $-1, %ecx
561 ; KNL-NEXT: vzeroupper
564 ; SKX-LABEL: allones_v32i16_sign:
566 ; SKX-NEXT: vpmovw2m %zmm0, %k0
567 ; SKX-NEXT: kortestd %k0, %k0
569 ; SKX-NEXT: vzeroupper
571 %tmp = icmp slt <32 x i16> %arg, zeroinitializer
572 %tmp1 = bitcast <32 x i1> %tmp to i32
573 %tmp2 = icmp eq i32 %tmp1, -1
577 define i1 @allzeros_v32i16_sign(<32 x i16> %arg) {
578 ; SSE2-LABEL: allzeros_v32i16_sign:
580 ; SSE2-NEXT: packsswb %xmm1, %xmm0
581 ; SSE2-NEXT: pmovmskb %xmm0, %eax
582 ; SSE2-NEXT: packsswb %xmm3, %xmm2
583 ; SSE2-NEXT: pmovmskb %xmm2, %ecx
584 ; SSE2-NEXT: shll $16, %ecx
585 ; SSE2-NEXT: orl %eax, %ecx
586 ; SSE2-NEXT: sete %al
589 ; AVX1-LABEL: allzeros_v32i16_sign:
591 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
592 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
593 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
594 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
595 ; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
596 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
597 ; AVX1-NEXT: shll $16, %ecx
598 ; AVX1-NEXT: orl %eax, %ecx
599 ; AVX1-NEXT: sete %al
600 ; AVX1-NEXT: vzeroupper
603 ; AVX2-LABEL: allzeros_v32i16_sign:
605 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
606 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
607 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
608 ; AVX2-NEXT: testl %eax, %eax
609 ; AVX2-NEXT: sete %al
610 ; AVX2-NEXT: vzeroupper
613 ; KNL-LABEL: allzeros_v32i16_sign:
615 ; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
616 ; KNL-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
617 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
618 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
619 ; KNL-NEXT: kmovw %k0, %eax
620 ; KNL-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm0
621 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
622 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
623 ; KNL-NEXT: kmovw %k0, %ecx
624 ; KNL-NEXT: shll $16, %ecx
625 ; KNL-NEXT: orl %eax, %ecx
627 ; KNL-NEXT: vzeroupper
630 ; SKX-LABEL: allzeros_v32i16_sign:
632 ; SKX-NEXT: vpmovw2m %zmm0, %k0
633 ; SKX-NEXT: kortestd %k0, %k0
635 ; SKX-NEXT: vzeroupper
637 %tmp = icmp slt <32 x i16> %arg, zeroinitializer
638 %tmp1 = bitcast <32 x i1> %tmp to i32
639 %tmp2 = icmp eq i32 %tmp1, 0
643 define i1 @allones_v4i32_sign(<4 x i32> %arg) {
644 ; SSE2-LABEL: allones_v4i32_sign:
646 ; SSE2-NEXT: movmskps %xmm0, %eax
647 ; SSE2-NEXT: cmpb $15, %al
648 ; SSE2-NEXT: sete %al
651 ; AVX-LABEL: allones_v4i32_sign:
653 ; AVX-NEXT: vmovmskps %xmm0, %eax
654 ; AVX-NEXT: cmpb $15, %al
658 ; KNL-LABEL: allones_v4i32_sign:
660 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
661 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
662 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
663 ; KNL-NEXT: kmovw %k0, %eax
664 ; KNL-NEXT: andb $15, %al
665 ; KNL-NEXT: cmpb $15, %al
667 ; KNL-NEXT: vzeroupper
670 ; SKX-LABEL: allones_v4i32_sign:
672 ; SKX-NEXT: vpmovd2m %xmm0, %k0
673 ; SKX-NEXT: kmovd %k0, %eax
674 ; SKX-NEXT: andb $15, %al
675 ; SKX-NEXT: cmpb $15, %al
678 %tmp = icmp slt <4 x i32> %arg, zeroinitializer
679 %tmp1 = bitcast <4 x i1> %tmp to i4
680 %tmp2 = icmp eq i4 %tmp1, -1
684 define i1 @allzeros_v4i32_sign(<4 x i32> %arg) {
685 ; SSE2-LABEL: allzeros_v4i32_sign:
687 ; SSE2-NEXT: movmskps %xmm0, %eax
688 ; SSE2-NEXT: testb %al, %al
689 ; SSE2-NEXT: sete %al
692 ; AVX-LABEL: allzeros_v4i32_sign:
694 ; AVX-NEXT: vmovmskps %xmm0, %eax
695 ; AVX-NEXT: testb %al, %al
699 ; KNL-LABEL: allzeros_v4i32_sign:
701 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
702 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
703 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
704 ; KNL-NEXT: kmovw %k0, %eax
705 ; KNL-NEXT: testb $15, %al
707 ; KNL-NEXT: vzeroupper
710 ; SKX-LABEL: allzeros_v4i32_sign:
712 ; SKX-NEXT: vpmovd2m %xmm0, %k0
713 ; SKX-NEXT: kmovd %k0, %eax
714 ; SKX-NEXT: testb $15, %al
717 %tmp = icmp slt <4 x i32> %arg, zeroinitializer
718 %tmp1 = bitcast <4 x i1> %tmp to i4
719 %tmp2 = icmp eq i4 %tmp1, 0
723 define i1 @allones_v8i32_sign(<8 x i32> %arg) {
724 ; SSE2-LABEL: allones_v8i32_sign:
726 ; SSE2-NEXT: packssdw %xmm1, %xmm0
727 ; SSE2-NEXT: packsswb %xmm0, %xmm0
728 ; SSE2-NEXT: pmovmskb %xmm0, %eax
729 ; SSE2-NEXT: cmpb $-1, %al
730 ; SSE2-NEXT: sete %al
733 ; AVX-LABEL: allones_v8i32_sign:
735 ; AVX-NEXT: vmovmskps %ymm0, %eax
736 ; AVX-NEXT: cmpb $-1, %al
738 ; AVX-NEXT: vzeroupper
741 ; KNL-LABEL: allones_v8i32_sign:
743 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
744 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
745 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
746 ; KNL-NEXT: kmovw %k0, %eax
747 ; KNL-NEXT: cmpb $-1, %al
749 ; KNL-NEXT: vzeroupper
752 ; SKX-LABEL: allones_v8i32_sign:
754 ; SKX-NEXT: vpmovd2m %ymm0, %k0
755 ; SKX-NEXT: kortestb %k0, %k0
757 ; SKX-NEXT: vzeroupper
759 %tmp = icmp slt <8 x i32> %arg, zeroinitializer
760 %tmp1 = bitcast <8 x i1> %tmp to i8
761 %tmp2 = icmp eq i8 %tmp1, -1
765 define i1 @allzeros_v8i32_sign(<8 x i32> %arg) {
766 ; SSE2-LABEL: allzeros_v8i32_sign:
768 ; SSE2-NEXT: packssdw %xmm1, %xmm0
769 ; SSE2-NEXT: packsswb %xmm0, %xmm0
770 ; SSE2-NEXT: pmovmskb %xmm0, %eax
771 ; SSE2-NEXT: testb %al, %al
772 ; SSE2-NEXT: sete %al
775 ; AVX-LABEL: allzeros_v8i32_sign:
777 ; AVX-NEXT: vmovmskps %ymm0, %eax
778 ; AVX-NEXT: testb %al, %al
780 ; AVX-NEXT: vzeroupper
783 ; KNL-LABEL: allzeros_v8i32_sign:
785 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
786 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
787 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
788 ; KNL-NEXT: kmovw %k0, %eax
789 ; KNL-NEXT: testb %al, %al
791 ; KNL-NEXT: vzeroupper
794 ; SKX-LABEL: allzeros_v8i32_sign:
796 ; SKX-NEXT: vpmovd2m %ymm0, %k0
797 ; SKX-NEXT: kortestb %k0, %k0
799 ; SKX-NEXT: vzeroupper
801 %tmp = icmp slt <8 x i32> %arg, zeroinitializer
802 %tmp1 = bitcast <8 x i1> %tmp to i8
803 %tmp2 = icmp eq i8 %tmp1, 0
807 define i1 @allones_v16i32_sign(<16 x i32> %arg) {
808 ; SSE2-LABEL: allones_v16i32_sign:
810 ; SSE2-NEXT: packssdw %xmm3, %xmm2
811 ; SSE2-NEXT: packssdw %xmm1, %xmm0
812 ; SSE2-NEXT: packsswb %xmm2, %xmm0
813 ; SSE2-NEXT: pmovmskb %xmm0, %eax
814 ; SSE2-NEXT: cmpw $-1, %ax
815 ; SSE2-NEXT: sete %al
818 ; AVX1-LABEL: allones_v16i32_sign:
820 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
821 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
822 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
823 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
824 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
825 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
826 ; AVX1-NEXT: cmpw $-1, %ax
827 ; AVX1-NEXT: sete %al
828 ; AVX1-NEXT: vzeroupper
831 ; AVX2-LABEL: allones_v16i32_sign:
833 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
834 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm2, %ymm1
835 ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm2, %ymm0
836 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
837 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
838 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
839 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
840 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
841 ; AVX2-NEXT: cmpw $-1, %ax
842 ; AVX2-NEXT: sete %al
843 ; AVX2-NEXT: vzeroupper
846 ; KNL-LABEL: allones_v16i32_sign:
848 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
849 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
850 ; KNL-NEXT: kortestw %k0, %k0
852 ; KNL-NEXT: vzeroupper
855 ; SKX-LABEL: allones_v16i32_sign:
857 ; SKX-NEXT: vpmovd2m %zmm0, %k0
858 ; SKX-NEXT: kortestw %k0, %k0
860 ; SKX-NEXT: vzeroupper
862 %tmp = icmp slt <16 x i32> %arg, zeroinitializer
863 %tmp1 = bitcast <16 x i1> %tmp to i16
864 %tmp2 = icmp eq i16 %tmp1, -1
868 define i1 @allzeros_v16i32_sign(<16 x i32> %arg) {
869 ; SSE2-LABEL: allzeros_v16i32_sign:
871 ; SSE2-NEXT: packssdw %xmm3, %xmm2
872 ; SSE2-NEXT: packssdw %xmm1, %xmm0
873 ; SSE2-NEXT: packsswb %xmm2, %xmm0
874 ; SSE2-NEXT: pmovmskb %xmm0, %eax
875 ; SSE2-NEXT: testw %ax, %ax
876 ; SSE2-NEXT: sete %al
879 ; AVX1-LABEL: allzeros_v16i32_sign:
881 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
882 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
883 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
884 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
885 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
886 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
887 ; AVX1-NEXT: testw %ax, %ax
888 ; AVX1-NEXT: sete %al
889 ; AVX1-NEXT: vzeroupper
892 ; AVX2-LABEL: allzeros_v16i32_sign:
894 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
895 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm2, %ymm1
896 ; AVX2-NEXT: vpcmpgtd %ymm0, %ymm2, %ymm0
897 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
898 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
899 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
900 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
901 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
902 ; AVX2-NEXT: testw %ax, %ax
903 ; AVX2-NEXT: sete %al
904 ; AVX2-NEXT: vzeroupper
907 ; KNL-LABEL: allzeros_v16i32_sign:
909 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
910 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
911 ; KNL-NEXT: kortestw %k0, %k0
913 ; KNL-NEXT: vzeroupper
916 ; SKX-LABEL: allzeros_v16i32_sign:
918 ; SKX-NEXT: vpmovd2m %zmm0, %k0
919 ; SKX-NEXT: kortestw %k0, %k0
921 ; SKX-NEXT: vzeroupper
923 %tmp = icmp slt <16 x i32> %arg, zeroinitializer
924 %tmp1 = bitcast <16 x i1> %tmp to i16
925 %tmp2 = icmp eq i16 %tmp1, 0
929 define i1 @allones_v4i64_sign(<4 x i64> %arg) {
930 ; SSE2-LABEL: allones_v4i64_sign:
932 ; SSE2-NEXT: packssdw %xmm1, %xmm0
933 ; SSE2-NEXT: movmskps %xmm0, %eax
934 ; SSE2-NEXT: cmpb $15, %al
935 ; SSE2-NEXT: sete %al
938 ; AVX-LABEL: allones_v4i64_sign:
940 ; AVX-NEXT: vmovmskpd %ymm0, %eax
941 ; AVX-NEXT: cmpb $15, %al
943 ; AVX-NEXT: vzeroupper
946 ; KNL-LABEL: allones_v4i64_sign:
948 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
949 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
950 ; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
951 ; KNL-NEXT: kmovw %k0, %eax
952 ; KNL-NEXT: andb $15, %al
953 ; KNL-NEXT: cmpb $15, %al
955 ; KNL-NEXT: vzeroupper
958 ; SKX-LABEL: allones_v4i64_sign:
960 ; SKX-NEXT: vpmovq2m %ymm0, %k0
961 ; SKX-NEXT: kmovd %k0, %eax
962 ; SKX-NEXT: andb $15, %al
963 ; SKX-NEXT: cmpb $15, %al
965 ; SKX-NEXT: vzeroupper
967 %tmp = icmp slt <4 x i64> %arg, zeroinitializer
968 %tmp1 = bitcast <4 x i1> %tmp to i4
969 %tmp2 = icmp eq i4 %tmp1, -1
973 define i1 @allzeros_v4i64_sign(<4 x i64> %arg) {
974 ; SSE2-LABEL: allzeros_v4i64_sign:
976 ; SSE2-NEXT: packssdw %xmm1, %xmm0
977 ; SSE2-NEXT: movmskps %xmm0, %eax
978 ; SSE2-NEXT: testb %al, %al
979 ; SSE2-NEXT: sete %al
982 ; AVX-LABEL: allzeros_v4i64_sign:
984 ; AVX-NEXT: vmovmskpd %ymm0, %eax
985 ; AVX-NEXT: testb %al, %al
987 ; AVX-NEXT: vzeroupper
990 ; KNL-LABEL: allzeros_v4i64_sign:
992 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
993 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
994 ; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
995 ; KNL-NEXT: kmovw %k0, %eax
996 ; KNL-NEXT: testb $15, %al
998 ; KNL-NEXT: vzeroupper
1001 ; SKX-LABEL: allzeros_v4i64_sign:
1003 ; SKX-NEXT: vpmovq2m %ymm0, %k0
1004 ; SKX-NEXT: kmovd %k0, %eax
1005 ; SKX-NEXT: testb $15, %al
1006 ; SKX-NEXT: sete %al
1007 ; SKX-NEXT: vzeroupper
1009 %tmp = icmp slt <4 x i64> %arg, zeroinitializer
1010 %tmp1 = bitcast <4 x i1> %tmp to i4
1011 %tmp2 = icmp eq i4 %tmp1, 0
1015 define i1 @allones_v8i64_sign(<8 x i64> %arg) {
1016 ; SSE2-LABEL: allones_v8i64_sign:
1018 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
1019 ; SSE2-NEXT: pxor %xmm4, %xmm3
1020 ; SSE2-NEXT: movdqa %xmm4, %xmm5
1021 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
1022 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
1023 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm3
1024 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1025 ; SSE2-NEXT: pand %xmm6, %xmm3
1026 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
1027 ; SSE2-NEXT: por %xmm3, %xmm5
1028 ; SSE2-NEXT: pxor %xmm4, %xmm2
1029 ; SSE2-NEXT: movdqa %xmm4, %xmm3
1030 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
1031 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
1032 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
1033 ; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
1034 ; SSE2-NEXT: pand %xmm6, %xmm7
1035 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
1036 ; SSE2-NEXT: por %xmm7, %xmm2
1037 ; SSE2-NEXT: packssdw %xmm5, %xmm2
1038 ; SSE2-NEXT: pxor %xmm4, %xmm1
1039 ; SSE2-NEXT: movdqa %xmm4, %xmm3
1040 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
1041 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
1042 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
1043 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1044 ; SSE2-NEXT: pand %xmm5, %xmm1
1045 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1046 ; SSE2-NEXT: por %xmm1, %xmm3
1047 ; SSE2-NEXT: pxor %xmm4, %xmm0
1048 ; SSE2-NEXT: movdqa %xmm4, %xmm1
1049 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
1050 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2]
1051 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
1052 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1053 ; SSE2-NEXT: pand %xmm5, %xmm0
1054 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1055 ; SSE2-NEXT: por %xmm0, %xmm1
1056 ; SSE2-NEXT: packssdw %xmm3, %xmm1
1057 ; SSE2-NEXT: packssdw %xmm2, %xmm1
1058 ; SSE2-NEXT: packsswb %xmm0, %xmm1
1059 ; SSE2-NEXT: pmovmskb %xmm1, %eax
1060 ; SSE2-NEXT: cmpb $-1, %al
1061 ; SSE2-NEXT: sete %al
1064 ; AVX1-LABEL: allones_v8i64_sign:
1066 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1067 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1068 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
1069 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
1070 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
1071 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1072 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
1073 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1074 ; AVX1-NEXT: vmovmskps %ymm0, %eax
1075 ; AVX1-NEXT: cmpb $-1, %al
1076 ; AVX1-NEXT: sete %al
1077 ; AVX1-NEXT: vzeroupper
1080 ; AVX2-LABEL: allones_v8i64_sign:
1082 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1083 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm1
1084 ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0
1085 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
1086 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1087 ; AVX2-NEXT: vmovmskps %ymm0, %eax
1088 ; AVX2-NEXT: cmpb $-1, %al
1089 ; AVX2-NEXT: sete %al
1090 ; AVX2-NEXT: vzeroupper
1093 ; KNL-LABEL: allones_v8i64_sign:
1095 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1096 ; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
1097 ; KNL-NEXT: kmovw %k0, %eax
1098 ; KNL-NEXT: cmpb $-1, %al
1099 ; KNL-NEXT: sete %al
1100 ; KNL-NEXT: vzeroupper
1103 ; SKX-LABEL: allones_v8i64_sign:
1105 ; SKX-NEXT: vpmovq2m %zmm0, %k0
1106 ; SKX-NEXT: kortestb %k0, %k0
1107 ; SKX-NEXT: setb %al
1108 ; SKX-NEXT: vzeroupper
1110 %tmp = icmp slt <8 x i64> %arg, zeroinitializer
1111 %tmp1 = bitcast <8 x i1> %tmp to i8
1112 %tmp2 = icmp eq i8 %tmp1, -1
1116 define i1 @allzeros_v8i64_sign(<8 x i64> %arg) {
1117 ; SSE2-LABEL: allzeros_v8i64_sign:
1119 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
1120 ; SSE2-NEXT: pxor %xmm4, %xmm3
1121 ; SSE2-NEXT: movdqa %xmm4, %xmm5
1122 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
1123 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
1124 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm3
1125 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1126 ; SSE2-NEXT: pand %xmm6, %xmm3
1127 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
1128 ; SSE2-NEXT: por %xmm3, %xmm5
1129 ; SSE2-NEXT: pxor %xmm4, %xmm2
1130 ; SSE2-NEXT: movdqa %xmm4, %xmm3
1131 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
1132 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
1133 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
1134 ; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
1135 ; SSE2-NEXT: pand %xmm6, %xmm7
1136 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
1137 ; SSE2-NEXT: por %xmm7, %xmm2
1138 ; SSE2-NEXT: packssdw %xmm5, %xmm2
1139 ; SSE2-NEXT: pxor %xmm4, %xmm1
1140 ; SSE2-NEXT: movdqa %xmm4, %xmm3
1141 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
1142 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
1143 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
1144 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1145 ; SSE2-NEXT: pand %xmm5, %xmm1
1146 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1147 ; SSE2-NEXT: por %xmm1, %xmm3
1148 ; SSE2-NEXT: pxor %xmm4, %xmm0
1149 ; SSE2-NEXT: movdqa %xmm4, %xmm1
1150 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
1151 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2]
1152 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
1153 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1154 ; SSE2-NEXT: pand %xmm5, %xmm0
1155 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1156 ; SSE2-NEXT: por %xmm0, %xmm1
1157 ; SSE2-NEXT: packssdw %xmm3, %xmm1
1158 ; SSE2-NEXT: packssdw %xmm2, %xmm1
1159 ; SSE2-NEXT: packsswb %xmm0, %xmm1
1160 ; SSE2-NEXT: pmovmskb %xmm1, %eax
1161 ; SSE2-NEXT: testb %al, %al
1162 ; SSE2-NEXT: sete %al
1165 ; AVX1-LABEL: allzeros_v8i64_sign:
1167 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1168 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1169 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
1170 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
1171 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
1172 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1173 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
1174 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1175 ; AVX1-NEXT: vmovmskps %ymm0, %eax
1176 ; AVX1-NEXT: testb %al, %al
1177 ; AVX1-NEXT: sete %al
1178 ; AVX1-NEXT: vzeroupper
1181 ; AVX2-LABEL: allzeros_v8i64_sign:
1183 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1184 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm1
1185 ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0
1186 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
1187 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1188 ; AVX2-NEXT: vmovmskps %ymm0, %eax
1189 ; AVX2-NEXT: testb %al, %al
1190 ; AVX2-NEXT: sete %al
1191 ; AVX2-NEXT: vzeroupper
1194 ; KNL-LABEL: allzeros_v8i64_sign:
1196 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1197 ; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
1198 ; KNL-NEXT: kmovw %k0, %eax
1199 ; KNL-NEXT: testb %al, %al
1200 ; KNL-NEXT: sete %al
1201 ; KNL-NEXT: vzeroupper
1204 ; SKX-LABEL: allzeros_v8i64_sign:
1206 ; SKX-NEXT: vpmovq2m %zmm0, %k0
1207 ; SKX-NEXT: kortestb %k0, %k0
1208 ; SKX-NEXT: sete %al
1209 ; SKX-NEXT: vzeroupper
1211 %tmp = icmp slt <8 x i64> %arg, zeroinitializer
1212 %tmp1 = bitcast <8 x i1> %tmp to i8
1213 %tmp2 = icmp eq i8 %tmp1, 0
1217 define i1 @allones_v16i8_and1(<16 x i8> %arg) {
1218 ; SSE2-LABEL: allones_v16i8_and1:
1220 ; SSE2-NEXT: psllw $7, %xmm0
1221 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1222 ; SSE2-NEXT: cmpw $-1, %ax
1223 ; SSE2-NEXT: sete %al
1226 ; AVX-LABEL: allones_v16i8_and1:
1228 ; AVX-NEXT: vpsllw $7, %xmm0, %xmm0
1229 ; AVX-NEXT: vpmovmskb %xmm0, %eax
1230 ; AVX-NEXT: cmpw $-1, %ax
1231 ; AVX-NEXT: sete %al
1234 ; KNL-LABEL: allones_v16i8_and1:
1236 ; KNL-NEXT: vpsllw $7, %xmm0, %xmm0
1237 ; KNL-NEXT: vpmovmskb %xmm0, %eax
1238 ; KNL-NEXT: cmpw $-1, %ax
1239 ; KNL-NEXT: sete %al
1242 ; SKX-LABEL: allones_v16i8_and1:
1244 ; SKX-NEXT: vptestmb {{.*}}(%rip), %xmm0, %k0
1245 ; SKX-NEXT: kortestw %k0, %k0
1246 ; SKX-NEXT: setb %al
1248 %tmp = and <16 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1249 %tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
1250 %tmp2 = bitcast <16 x i1> %tmp1 to i16
1251 %tmp3 = icmp eq i16 %tmp2, -1
1255 define i1 @allzeros_v16i8_and1(<16 x i8> %arg) {
1256 ; SSE2-LABEL: allzeros_v16i8_and1:
1258 ; SSE2-NEXT: psllw $7, %xmm0
1259 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1260 ; SSE2-NEXT: testw %ax, %ax
1261 ; SSE2-NEXT: sete %al
1264 ; AVX-LABEL: allzeros_v16i8_and1:
1266 ; AVX-NEXT: vpsllw $7, %xmm0, %xmm0
1267 ; AVX-NEXT: vpmovmskb %xmm0, %eax
1268 ; AVX-NEXT: testw %ax, %ax
1269 ; AVX-NEXT: sete %al
1272 ; KNL-LABEL: allzeros_v16i8_and1:
1274 ; KNL-NEXT: vpsllw $7, %xmm0, %xmm0
1275 ; KNL-NEXT: vpmovmskb %xmm0, %eax
1276 ; KNL-NEXT: testw %ax, %ax
1277 ; KNL-NEXT: sete %al
1280 ; SKX-LABEL: allzeros_v16i8_and1:
1282 ; SKX-NEXT: vptestmb {{.*}}(%rip), %xmm0, %k0
1283 ; SKX-NEXT: kortestw %k0, %k0
1284 ; SKX-NEXT: sete %al
1286 %tmp = and <16 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1287 %tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
1288 %tmp2 = bitcast <16 x i1> %tmp1 to i16
1289 %tmp3 = icmp eq i16 %tmp2, 0
1293 define i1 @allones_v32i8_and1(<32 x i8> %arg) {
1294 ; SSE2-LABEL: allones_v32i8_and1:
1296 ; SSE2-NEXT: psllw $7, %xmm0
1297 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1298 ; SSE2-NEXT: psllw $7, %xmm1
1299 ; SSE2-NEXT: pmovmskb %xmm1, %ecx
1300 ; SSE2-NEXT: shll $16, %ecx
1301 ; SSE2-NEXT: orl %eax, %ecx
1302 ; SSE2-NEXT: cmpl $-1, %ecx
1303 ; SSE2-NEXT: sete %al
1306 ; AVX1-LABEL: allones_v32i8_and1:
1308 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm1
1309 ; AVX1-NEXT: vpmovmskb %xmm1, %eax
1310 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1311 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
1312 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1313 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
1314 ; AVX1-NEXT: shll $16, %ecx
1315 ; AVX1-NEXT: orl %eax, %ecx
1316 ; AVX1-NEXT: cmpl $-1, %ecx
1317 ; AVX1-NEXT: sete %al
1318 ; AVX1-NEXT: vzeroupper
1321 ; AVX2-LABEL: allones_v32i8_and1:
1323 ; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
1324 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1325 ; AVX2-NEXT: cmpl $-1, %eax
1326 ; AVX2-NEXT: sete %al
1327 ; AVX2-NEXT: vzeroupper
1330 ; KNL-LABEL: allones_v32i8_and1:
1332 ; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
1333 ; KNL-NEXT: vpmovmskb %ymm0, %eax
1334 ; KNL-NEXT: cmpl $-1, %eax
1335 ; KNL-NEXT: sete %al
1336 ; KNL-NEXT: vzeroupper
1339 ; SKX-LABEL: allones_v32i8_and1:
1341 ; SKX-NEXT: vptestmb {{.*}}(%rip), %ymm0, %k0
1342 ; SKX-NEXT: kortestd %k0, %k0
1343 ; SKX-NEXT: setb %al
1344 ; SKX-NEXT: vzeroupper
1346 %tmp = and <32 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1347 %tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
1348 %tmp2 = bitcast <32 x i1> %tmp1 to i32
1349 %tmp3 = icmp eq i32 %tmp2, -1
1353 define i1 @allzeros_v32i8_and1(<32 x i8> %arg) {
1354 ; SSE2-LABEL: allzeros_v32i8_and1:
1356 ; SSE2-NEXT: psllw $7, %xmm0
1357 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1358 ; SSE2-NEXT: psllw $7, %xmm1
1359 ; SSE2-NEXT: pmovmskb %xmm1, %ecx
1360 ; SSE2-NEXT: shll $16, %ecx
1361 ; SSE2-NEXT: orl %eax, %ecx
1362 ; SSE2-NEXT: sete %al
1365 ; AVX1-LABEL: allzeros_v32i8_and1:
1367 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm1
1368 ; AVX1-NEXT: vpmovmskb %xmm1, %eax
1369 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1370 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
1371 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1372 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
1373 ; AVX1-NEXT: shll $16, %ecx
1374 ; AVX1-NEXT: orl %eax, %ecx
1375 ; AVX1-NEXT: sete %al
1376 ; AVX1-NEXT: vzeroupper
1379 ; AVX2-LABEL: allzeros_v32i8_and1:
1381 ; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
1382 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1383 ; AVX2-NEXT: testl %eax, %eax
1384 ; AVX2-NEXT: sete %al
1385 ; AVX2-NEXT: vzeroupper
1388 ; KNL-LABEL: allzeros_v32i8_and1:
1390 ; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
1391 ; KNL-NEXT: vpmovmskb %ymm0, %eax
1392 ; KNL-NEXT: testl %eax, %eax
1393 ; KNL-NEXT: sete %al
1394 ; KNL-NEXT: vzeroupper
1397 ; SKX-LABEL: allzeros_v32i8_and1:
1399 ; SKX-NEXT: vptestmb {{.*}}(%rip), %ymm0, %k0
1400 ; SKX-NEXT: kortestd %k0, %k0
1401 ; SKX-NEXT: sete %al
1402 ; SKX-NEXT: vzeroupper
1404 %tmp = and <32 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1405 %tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
1406 %tmp2 = bitcast <32 x i1> %tmp1 to i32
1407 %tmp3 = icmp eq i32 %tmp2, 0
1411 define i1 @allones_v64i8_and1(<64 x i8> %arg) {
1412 ; SSE2-LABEL: allones_v64i8_and1:
1414 ; SSE2-NEXT: psllw $7, %xmm0
1415 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1416 ; SSE2-NEXT: psllw $7, %xmm1
1417 ; SSE2-NEXT: pmovmskb %xmm1, %ecx
1418 ; SSE2-NEXT: shll $16, %ecx
1419 ; SSE2-NEXT: orl %eax, %ecx
1420 ; SSE2-NEXT: psllw $7, %xmm2
1421 ; SSE2-NEXT: pmovmskb %xmm2, %eax
1422 ; SSE2-NEXT: psllw $7, %xmm3
1423 ; SSE2-NEXT: pmovmskb %xmm3, %edx
1424 ; SSE2-NEXT: shll $16, %edx
1425 ; SSE2-NEXT: orl %eax, %edx
1426 ; SSE2-NEXT: shlq $32, %rdx
1427 ; SSE2-NEXT: orq %rcx, %rdx
1428 ; SSE2-NEXT: cmpq $-1, %rdx
1429 ; SSE2-NEXT: sete %al
1432 ; AVX1-LABEL: allones_v64i8_and1:
1434 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm2
1435 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
1436 ; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
1437 ; AVX1-NEXT: vpmovmskb %xmm2, %eax
1438 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1439 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
1440 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
1441 ; AVX1-NEXT: shll $16, %ecx
1442 ; AVX1-NEXT: orl %eax, %ecx
1443 ; AVX1-NEXT: vpsllw $7, %xmm1, %xmm0
1444 ; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
1445 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1446 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
1447 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
1448 ; AVX1-NEXT: vpmovmskb %xmm0, %edx
1449 ; AVX1-NEXT: shll $16, %edx
1450 ; AVX1-NEXT: orl %eax, %edx
1451 ; AVX1-NEXT: shlq $32, %rdx
1452 ; AVX1-NEXT: orq %rcx, %rdx
1453 ; AVX1-NEXT: cmpq $-1, %rdx
1454 ; AVX1-NEXT: sete %al
1455 ; AVX1-NEXT: vzeroupper
1458 ; AVX2-LABEL: allones_v64i8_and1:
1460 ; AVX2-NEXT: vpsllw $7, %ymm1, %ymm1
1461 ; AVX2-NEXT: vpmovmskb %ymm1, %eax
1462 ; AVX2-NEXT: shlq $32, %rax
1463 ; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
1464 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx
1465 ; AVX2-NEXT: orq %rax, %rcx
1466 ; AVX2-NEXT: cmpq $-1, %rcx
1467 ; AVX2-NEXT: sete %al
1468 ; AVX2-NEXT: vzeroupper
1471 ; KNL-LABEL: allones_v64i8_and1:
1473 ; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
1474 ; KNL-NEXT: vpsllw $7, %ymm1, %ymm1
1475 ; KNL-NEXT: vpmovmskb %ymm1, %eax
1476 ; KNL-NEXT: shlq $32, %rax
1477 ; KNL-NEXT: vpmovmskb %ymm0, %ecx
1478 ; KNL-NEXT: orq %rax, %rcx
1479 ; KNL-NEXT: cmpq $-1, %rcx
1480 ; KNL-NEXT: sete %al
1481 ; KNL-NEXT: vzeroupper
1484 ; SKX-LABEL: allones_v64i8_and1:
1486 ; SKX-NEXT: vptestmb {{.*}}(%rip), %zmm0, %k0
1487 ; SKX-NEXT: kortestq %k0, %k0
1488 ; SKX-NEXT: setb %al
1489 ; SKX-NEXT: vzeroupper
1491 %tmp = and <64 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1492 %tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
1493 %tmp2 = bitcast <64 x i1> %tmp1 to i64
1494 %tmp3 = icmp eq i64 %tmp2, -1
1498 define i1 @allzeros_v64i8_and1(<64 x i8> %arg) {
1499 ; SSE2-LABEL: allzeros_v64i8_and1:
1501 ; SSE2-NEXT: psllw $7, %xmm0
1502 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1503 ; SSE2-NEXT: psllw $7, %xmm1
1504 ; SSE2-NEXT: pmovmskb %xmm1, %ecx
1505 ; SSE2-NEXT: shll $16, %ecx
1506 ; SSE2-NEXT: orl %eax, %ecx
1507 ; SSE2-NEXT: psllw $7, %xmm2
1508 ; SSE2-NEXT: pmovmskb %xmm2, %eax
1509 ; SSE2-NEXT: psllw $7, %xmm3
1510 ; SSE2-NEXT: pmovmskb %xmm3, %edx
1511 ; SSE2-NEXT: shll $16, %edx
1512 ; SSE2-NEXT: orl %eax, %edx
1513 ; SSE2-NEXT: shlq $32, %rdx
1514 ; SSE2-NEXT: orq %rcx, %rdx
1515 ; SSE2-NEXT: sete %al
1518 ; AVX1-LABEL: allzeros_v64i8_and1:
1520 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm2
1521 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
1522 ; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
1523 ; AVX1-NEXT: vpmovmskb %xmm2, %eax
1524 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1525 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
1526 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
1527 ; AVX1-NEXT: shll $16, %ecx
1528 ; AVX1-NEXT: orl %eax, %ecx
1529 ; AVX1-NEXT: vpsllw $7, %xmm1, %xmm0
1530 ; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
1531 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1532 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
1533 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
1534 ; AVX1-NEXT: vpmovmskb %xmm0, %edx
1535 ; AVX1-NEXT: shll $16, %edx
1536 ; AVX1-NEXT: orl %eax, %edx
1537 ; AVX1-NEXT: shlq $32, %rdx
1538 ; AVX1-NEXT: orq %rcx, %rdx
1539 ; AVX1-NEXT: sete %al
1540 ; AVX1-NEXT: vzeroupper
1543 ; AVX2-LABEL: allzeros_v64i8_and1:
1545 ; AVX2-NEXT: vpsllw $7, %ymm1, %ymm1
1546 ; AVX2-NEXT: vpmovmskb %ymm1, %eax
1547 ; AVX2-NEXT: shlq $32, %rax
1548 ; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
1549 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx
1550 ; AVX2-NEXT: orq %rax, %rcx
1551 ; AVX2-NEXT: sete %al
1552 ; AVX2-NEXT: vzeroupper
1555 ; KNL-LABEL: allzeros_v64i8_and1:
1557 ; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
1558 ; KNL-NEXT: vpsllw $7, %ymm1, %ymm1
1559 ; KNL-NEXT: vpmovmskb %ymm1, %eax
1560 ; KNL-NEXT: shlq $32, %rax
1561 ; KNL-NEXT: vpmovmskb %ymm0, %ecx
1562 ; KNL-NEXT: orq %rax, %rcx
1563 ; KNL-NEXT: sete %al
1564 ; KNL-NEXT: vzeroupper
1567 ; SKX-LABEL: allzeros_v64i8_and1:
1569 ; SKX-NEXT: vptestmb {{.*}}(%rip), %zmm0, %k0
1570 ; SKX-NEXT: kortestq %k0, %k0
1571 ; SKX-NEXT: sete %al
1572 ; SKX-NEXT: vzeroupper
1574 %tmp = and <64 x i8> %arg, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1575 %tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
1576 %tmp2 = bitcast <64 x i1> %tmp1 to i64
1577 %tmp3 = icmp eq i64 %tmp2, 0
1581 define i1 @allones_v8i16_and1(<8 x i16> %arg) {
1582 ; SSE2-LABEL: allones_v8i16_and1:
1584 ; SSE2-NEXT: psllw $15, %xmm0
1585 ; SSE2-NEXT: packsswb %xmm0, %xmm0
1586 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1587 ; SSE2-NEXT: cmpb $-1, %al
1588 ; SSE2-NEXT: sete %al
1591 ; AVX-LABEL: allones_v8i16_and1:
1593 ; AVX-NEXT: vpsllw $15, %xmm0, %xmm0
1594 ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
1595 ; AVX-NEXT: vpmovmskb %xmm0, %eax
1596 ; AVX-NEXT: cmpb $-1, %al
1597 ; AVX-NEXT: sete %al
1600 ; KNL-LABEL: allones_v8i16_and1:
1602 ; KNL-NEXT: vpsllw $15, %xmm0, %xmm0
1603 ; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
1604 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
1605 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
1606 ; KNL-NEXT: kmovw %k0, %eax
1607 ; KNL-NEXT: cmpb $-1, %al
1608 ; KNL-NEXT: sete %al
1609 ; KNL-NEXT: vzeroupper
1612 ; SKX-LABEL: allones_v8i16_and1:
1614 ; SKX-NEXT: vptestmw {{.*}}(%rip), %xmm0, %k0
1615 ; SKX-NEXT: kortestb %k0, %k0
1616 ; SKX-NEXT: setb %al
1618 %tmp = and <8 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1619 %tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
1620 %tmp2 = bitcast <8 x i1> %tmp1 to i8
1621 %tmp3 = icmp eq i8 %tmp2, -1
1625 define i1 @allzeros_v8i16_and1(<8 x i16> %arg) {
1626 ; SSE2-LABEL: allzeros_v8i16_and1:
1628 ; SSE2-NEXT: psllw $15, %xmm0
1629 ; SSE2-NEXT: packsswb %xmm0, %xmm0
1630 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1631 ; SSE2-NEXT: testb %al, %al
1632 ; SSE2-NEXT: sete %al
1635 ; AVX-LABEL: allzeros_v8i16_and1:
1637 ; AVX-NEXT: vpsllw $15, %xmm0, %xmm0
1638 ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
1639 ; AVX-NEXT: vpmovmskb %xmm0, %eax
1640 ; AVX-NEXT: testb %al, %al
1641 ; AVX-NEXT: sete %al
1644 ; KNL-LABEL: allzeros_v8i16_and1:
1646 ; KNL-NEXT: vpsllw $15, %xmm0, %xmm0
1647 ; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
1648 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
1649 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
1650 ; KNL-NEXT: kmovw %k0, %eax
1651 ; KNL-NEXT: testb %al, %al
1652 ; KNL-NEXT: sete %al
1653 ; KNL-NEXT: vzeroupper
1656 ; SKX-LABEL: allzeros_v8i16_and1:
1658 ; SKX-NEXT: vptestmw {{.*}}(%rip), %xmm0, %k0
1659 ; SKX-NEXT: kortestb %k0, %k0
1660 ; SKX-NEXT: sete %al
1662 %tmp = and <8 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1663 %tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
1664 %tmp2 = bitcast <8 x i1> %tmp1 to i8
1665 %tmp3 = icmp eq i8 %tmp2, 0
1669 define i1 @allones_v16i16_and1(<16 x i16> %arg) {
1670 ; SSE2-LABEL: allones_v16i16_and1:
1672 ; SSE2-NEXT: psllw $15, %xmm1
1673 ; SSE2-NEXT: psllw $15, %xmm0
1674 ; SSE2-NEXT: packsswb %xmm1, %xmm0
1675 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1676 ; SSE2-NEXT: cmpw $-1, %ax
1677 ; SSE2-NEXT: sete %al
1680 ; AVX1-LABEL: allones_v16i16_and1:
1682 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1683 ; AVX1-NEXT: vpsllw $15, %xmm1, %xmm1
1684 ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
1685 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1686 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1687 ; AVX1-NEXT: cmpw $-1, %ax
1688 ; AVX1-NEXT: sete %al
1689 ; AVX1-NEXT: vzeroupper
1692 ; AVX2-LABEL: allones_v16i16_and1:
1694 ; AVX2-NEXT: vpsllw $15, %ymm0, %ymm0
1695 ; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0
1696 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1697 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1698 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
1699 ; AVX2-NEXT: cmpw $-1, %ax
1700 ; AVX2-NEXT: sete %al
1701 ; AVX2-NEXT: vzeroupper
1704 ; KNL-LABEL: allones_v16i16_and1:
1706 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
1707 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
1708 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
1709 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1710 ; KNL-NEXT: kortestw %k0, %k0
1711 ; KNL-NEXT: setb %al
1712 ; KNL-NEXT: vzeroupper
1715 ; SKX-LABEL: allones_v16i16_and1:
1717 ; SKX-NEXT: vptestmw {{.*}}(%rip), %ymm0, %k0
1718 ; SKX-NEXT: kortestw %k0, %k0
1719 ; SKX-NEXT: setb %al
1720 ; SKX-NEXT: vzeroupper
1722 %tmp = and <16 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1723 %tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
1724 %tmp2 = bitcast <16 x i1> %tmp1 to i16
1725 %tmp3 = icmp eq i16 %tmp2, -1
1729 define i1 @allones_v32i16_and1(<32 x i16> %arg) {
1730 ; SSE2-LABEL: allones_v32i16_and1:
1732 ; SSE2-NEXT: psllw $15, %xmm1
1733 ; SSE2-NEXT: psllw $15, %xmm0
1734 ; SSE2-NEXT: packsswb %xmm1, %xmm0
1735 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1736 ; SSE2-NEXT: psllw $15, %xmm3
1737 ; SSE2-NEXT: psllw $15, %xmm2
1738 ; SSE2-NEXT: packsswb %xmm3, %xmm2
1739 ; SSE2-NEXT: pmovmskb %xmm2, %ecx
1740 ; SSE2-NEXT: shll $16, %ecx
1741 ; SSE2-NEXT: orl %eax, %ecx
1742 ; SSE2-NEXT: cmpl $-1, %ecx
1743 ; SSE2-NEXT: sete %al
1746 ; AVX1-LABEL: allones_v32i16_and1:
1748 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1749 ; AVX1-NEXT: vpsllw $15, %xmm2, %xmm2
1750 ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
1751 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
1752 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1753 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
1754 ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
1755 ; AVX1-NEXT: vpsllw $15, %xmm1, %xmm1
1756 ; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
1757 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
1758 ; AVX1-NEXT: shll $16, %ecx
1759 ; AVX1-NEXT: orl %eax, %ecx
1760 ; AVX1-NEXT: cmpl $-1, %ecx
1761 ; AVX1-NEXT: sete %al
1762 ; AVX1-NEXT: vzeroupper
1765 ; AVX2-LABEL: allones_v32i16_and1:
1767 ; AVX2-NEXT: vpsllw $15, %ymm1, %ymm1
1768 ; AVX2-NEXT: vpsllw $15, %ymm0, %ymm0
1769 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
1770 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1771 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1772 ; AVX2-NEXT: cmpl $-1, %eax
1773 ; AVX2-NEXT: sete %al
1774 ; AVX2-NEXT: vzeroupper
1777 ; KNL-LABEL: allones_v32i16_and1:
1779 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
1780 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
1781 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
1782 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1783 ; KNL-NEXT: kmovw %k0, %eax
1784 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm0
1785 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
1786 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
1787 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1788 ; KNL-NEXT: kmovw %k0, %ecx
1789 ; KNL-NEXT: shll $16, %ecx
1790 ; KNL-NEXT: orl %eax, %ecx
1791 ; KNL-NEXT: cmpl $-1, %ecx
1792 ; KNL-NEXT: sete %al
1793 ; KNL-NEXT: vzeroupper
1796 ; SKX-LABEL: allones_v32i16_and1:
1798 ; SKX-NEXT: vptestmw {{.*}}(%rip), %zmm0, %k0
1799 ; SKX-NEXT: kortestd %k0, %k0
1800 ; SKX-NEXT: setb %al
1801 ; SKX-NEXT: vzeroupper
1803 %tmp = and <32 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1804 %tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
1805 %tmp2 = bitcast <32 x i1> %tmp1 to i32
1806 %tmp3 = icmp eq i32 %tmp2, -1
1810 define i1 @allzeros_v32i16_and1(<32 x i16> %arg) {
1811 ; SSE2-LABEL: allzeros_v32i16_and1:
1813 ; SSE2-NEXT: psllw $15, %xmm1
1814 ; SSE2-NEXT: psllw $15, %xmm0
1815 ; SSE2-NEXT: packsswb %xmm1, %xmm0
1816 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1817 ; SSE2-NEXT: psllw $15, %xmm3
1818 ; SSE2-NEXT: psllw $15, %xmm2
1819 ; SSE2-NEXT: packsswb %xmm3, %xmm2
1820 ; SSE2-NEXT: pmovmskb %xmm2, %ecx
1821 ; SSE2-NEXT: shll $16, %ecx
1822 ; SSE2-NEXT: orl %eax, %ecx
1823 ; SSE2-NEXT: sete %al
1826 ; AVX1-LABEL: allzeros_v32i16_and1:
1828 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1829 ; AVX1-NEXT: vpsllw $15, %xmm2, %xmm2
1830 ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
1831 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
1832 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1833 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
1834 ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
1835 ; AVX1-NEXT: vpsllw $15, %xmm1, %xmm1
1836 ; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
1837 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
1838 ; AVX1-NEXT: shll $16, %ecx
1839 ; AVX1-NEXT: orl %eax, %ecx
1840 ; AVX1-NEXT: sete %al
1841 ; AVX1-NEXT: vzeroupper
1844 ; AVX2-LABEL: allzeros_v32i16_and1:
1846 ; AVX2-NEXT: vpsllw $15, %ymm1, %ymm1
1847 ; AVX2-NEXT: vpsllw $15, %ymm0, %ymm0
1848 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
1849 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
1850 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
1851 ; AVX2-NEXT: testl %eax, %eax
1852 ; AVX2-NEXT: sete %al
1853 ; AVX2-NEXT: vzeroupper
1856 ; KNL-LABEL: allzeros_v32i16_and1:
1858 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
1859 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
1860 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
1861 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1862 ; KNL-NEXT: kmovw %k0, %eax
1863 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm0
1864 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
1865 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
1866 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1867 ; KNL-NEXT: kmovw %k0, %ecx
1868 ; KNL-NEXT: shll $16, %ecx
1869 ; KNL-NEXT: orl %eax, %ecx
1870 ; KNL-NEXT: sete %al
1871 ; KNL-NEXT: vzeroupper
1874 ; SKX-LABEL: allzeros_v32i16_and1:
1876 ; SKX-NEXT: vptestmw {{.*}}(%rip), %zmm0, %k0
1877 ; SKX-NEXT: kortestd %k0, %k0
1878 ; SKX-NEXT: sete %al
1879 ; SKX-NEXT: vzeroupper
1881 %tmp = and <32 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1882 %tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
1883 %tmp2 = bitcast <32 x i1> %tmp1 to i32
1884 %tmp3 = icmp eq i32 %tmp2, 0
1888 define i1 @allzeros_v16i16_and1(<16 x i16> %arg) {
1889 ; SSE2-LABEL: allzeros_v16i16_and1:
1891 ; SSE2-NEXT: psllw $15, %xmm1
1892 ; SSE2-NEXT: psllw $15, %xmm0
1893 ; SSE2-NEXT: packsswb %xmm1, %xmm0
1894 ; SSE2-NEXT: pmovmskb %xmm0, %eax
1895 ; SSE2-NEXT: testw %ax, %ax
1896 ; SSE2-NEXT: sete %al
1899 ; AVX1-LABEL: allzeros_v16i16_and1:
1901 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1902 ; AVX1-NEXT: vpsllw $15, %xmm1, %xmm1
1903 ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
1904 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1905 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
1906 ; AVX1-NEXT: testw %ax, %ax
1907 ; AVX1-NEXT: sete %al
1908 ; AVX1-NEXT: vzeroupper
1911 ; AVX2-LABEL: allzeros_v16i16_and1:
1913 ; AVX2-NEXT: vpsllw $15, %ymm0, %ymm0
1914 ; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0
1915 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1916 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
1917 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
1918 ; AVX2-NEXT: testw %ax, %ax
1919 ; AVX2-NEXT: sete %al
1920 ; AVX2-NEXT: vzeroupper
1923 ; KNL-LABEL: allzeros_v16i16_and1:
1925 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
1926 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
1927 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
1928 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1929 ; KNL-NEXT: kortestw %k0, %k0
1930 ; KNL-NEXT: sete %al
1931 ; KNL-NEXT: vzeroupper
1934 ; SKX-LABEL: allzeros_v16i16_and1:
1936 ; SKX-NEXT: vptestmw {{.*}}(%rip), %ymm0, %k0
1937 ; SKX-NEXT: kortestw %k0, %k0
1938 ; SKX-NEXT: sete %al
1939 ; SKX-NEXT: vzeroupper
1941 %tmp = and <16 x i16> %arg, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1942 %tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
1943 %tmp2 = bitcast <16 x i1> %tmp1 to i16
1944 %tmp3 = icmp eq i16 %tmp2, 0
1948 define i1 @allones_v4i32_and1(<4 x i32> %arg) {
1949 ; SSE2-LABEL: allones_v4i32_and1:
1951 ; SSE2-NEXT: pslld $31, %xmm0
1952 ; SSE2-NEXT: movmskps %xmm0, %eax
1953 ; SSE2-NEXT: cmpb $15, %al
1954 ; SSE2-NEXT: sete %al
1957 ; AVX-LABEL: allones_v4i32_and1:
1959 ; AVX-NEXT: vpslld $31, %xmm0, %xmm0
1960 ; AVX-NEXT: vmovmskps %xmm0, %eax
1961 ; AVX-NEXT: cmpb $15, %al
1962 ; AVX-NEXT: sete %al
1965 ; KNL-LABEL: allones_v4i32_and1:
1967 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1968 ; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
1969 ; KNL-NEXT: kmovw %k0, %eax
1970 ; KNL-NEXT: andb $15, %al
1971 ; KNL-NEXT: cmpb $15, %al
1972 ; KNL-NEXT: sete %al
1973 ; KNL-NEXT: vzeroupper
1976 ; SKX-LABEL: allones_v4i32_and1:
1978 ; SKX-NEXT: vptestmd {{.*}}(%rip){1to4}, %xmm0, %k0
1979 ; SKX-NEXT: kmovd %k0, %eax
1980 ; SKX-NEXT: andb $15, %al
1981 ; SKX-NEXT: cmpb $15, %al
1982 ; SKX-NEXT: sete %al
1984 %tmp = and <4 x i32> %arg, <i32 1, i32 1, i32 1, i32 1>
1985 %tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
1986 %tmp2 = bitcast <4 x i1> %tmp1 to i4
1987 %tmp3 = icmp eq i4 %tmp2, -1
1991 define i1 @allzeros_v4i32_and1(<4 x i32> %arg) {
1992 ; SSE2-LABEL: allzeros_v4i32_and1:
1994 ; SSE2-NEXT: pslld $31, %xmm0
1995 ; SSE2-NEXT: movmskps %xmm0, %eax
1996 ; SSE2-NEXT: testb %al, %al
1997 ; SSE2-NEXT: sete %al
2000 ; AVX-LABEL: allzeros_v4i32_and1:
2002 ; AVX-NEXT: vpslld $31, %xmm0, %xmm0
2003 ; AVX-NEXT: vmovmskps %xmm0, %eax
2004 ; AVX-NEXT: testb %al, %al
2005 ; AVX-NEXT: sete %al
2008 ; KNL-LABEL: allzeros_v4i32_and1:
2010 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2011 ; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
2012 ; KNL-NEXT: kmovw %k0, %eax
2013 ; KNL-NEXT: testb $15, %al
2014 ; KNL-NEXT: sete %al
2015 ; KNL-NEXT: vzeroupper
2018 ; SKX-LABEL: allzeros_v4i32_and1:
2020 ; SKX-NEXT: vptestmd {{.*}}(%rip){1to4}, %xmm0, %k0
2021 ; SKX-NEXT: kmovd %k0, %eax
2022 ; SKX-NEXT: testb $15, %al
2023 ; SKX-NEXT: sete %al
2025 %tmp = and <4 x i32> %arg, <i32 1, i32 1, i32 1, i32 1>
2026 %tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
2027 %tmp2 = bitcast <4 x i1> %tmp1 to i4
2028 %tmp3 = icmp eq i4 %tmp2, 0
2032 define i1 @allones_v8i32_and1(<8 x i32> %arg) {
2033 ; SSE2-LABEL: allones_v8i32_and1:
2035 ; SSE2-NEXT: pslld $31, %xmm1
2036 ; SSE2-NEXT: pslld $31, %xmm0
2037 ; SSE2-NEXT: packssdw %xmm1, %xmm0
2038 ; SSE2-NEXT: packsswb %xmm0, %xmm0
2039 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2040 ; SSE2-NEXT: cmpb $-1, %al
2041 ; SSE2-NEXT: sete %al
2044 ; AVX1-LABEL: allones_v8i32_and1:
2046 ; AVX1-NEXT: vpslld $31, %xmm0, %xmm1
2047 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2048 ; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
2049 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2050 ; AVX1-NEXT: vmovmskps %ymm0, %eax
2051 ; AVX1-NEXT: cmpb $-1, %al
2052 ; AVX1-NEXT: sete %al
2053 ; AVX1-NEXT: vzeroupper
2056 ; AVX2-LABEL: allones_v8i32_and1:
2058 ; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
2059 ; AVX2-NEXT: vmovmskps %ymm0, %eax
2060 ; AVX2-NEXT: cmpb $-1, %al
2061 ; AVX2-NEXT: sete %al
2062 ; AVX2-NEXT: vzeroupper
2065 ; KNL-LABEL: allones_v8i32_and1:
2067 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2068 ; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
2069 ; KNL-NEXT: kmovw %k0, %eax
2070 ; KNL-NEXT: cmpb $-1, %al
2071 ; KNL-NEXT: sete %al
2072 ; KNL-NEXT: vzeroupper
2075 ; SKX-LABEL: allones_v8i32_and1:
2077 ; SKX-NEXT: vptestmd {{.*}}(%rip){1to8}, %ymm0, %k0
2078 ; SKX-NEXT: kortestb %k0, %k0
2079 ; SKX-NEXT: setb %al
2080 ; SKX-NEXT: vzeroupper
2082 %tmp = and <8 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
2083 %tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
2084 %tmp2 = bitcast <8 x i1> %tmp1 to i8
2085 %tmp3 = icmp eq i8 %tmp2, -1
2089 define i1 @allzeros_v8i32_and1(<8 x i32> %arg) {
2090 ; SSE2-LABEL: allzeros_v8i32_and1:
2092 ; SSE2-NEXT: pslld $31, %xmm1
2093 ; SSE2-NEXT: pslld $31, %xmm0
2094 ; SSE2-NEXT: packssdw %xmm1, %xmm0
2095 ; SSE2-NEXT: packsswb %xmm0, %xmm0
2096 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2097 ; SSE2-NEXT: testb %al, %al
2098 ; SSE2-NEXT: sete %al
2101 ; AVX1-LABEL: allzeros_v8i32_and1:
2103 ; AVX1-NEXT: vpslld $31, %xmm0, %xmm1
2104 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2105 ; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
2106 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2107 ; AVX1-NEXT: vmovmskps %ymm0, %eax
2108 ; AVX1-NEXT: testb %al, %al
2109 ; AVX1-NEXT: sete %al
2110 ; AVX1-NEXT: vzeroupper
2113 ; AVX2-LABEL: allzeros_v8i32_and1:
2115 ; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
2116 ; AVX2-NEXT: vmovmskps %ymm0, %eax
2117 ; AVX2-NEXT: testb %al, %al
2118 ; AVX2-NEXT: sete %al
2119 ; AVX2-NEXT: vzeroupper
2122 ; KNL-LABEL: allzeros_v8i32_and1:
2124 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2125 ; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
2126 ; KNL-NEXT: kmovw %k0, %eax
2127 ; KNL-NEXT: testb %al, %al
2128 ; KNL-NEXT: sete %al
2129 ; KNL-NEXT: vzeroupper
2132 ; SKX-LABEL: allzeros_v8i32_and1:
2134 ; SKX-NEXT: vptestmd {{.*}}(%rip){1to8}, %ymm0, %k0
2135 ; SKX-NEXT: kortestb %k0, %k0
2136 ; SKX-NEXT: sete %al
2137 ; SKX-NEXT: vzeroupper
2139 %tmp = and <8 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
2140 %tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
2141 %tmp2 = bitcast <8 x i1> %tmp1 to i8
2142 %tmp3 = icmp eq i8 %tmp2, 0
2146 define i1 @allones_v16i32_and1(<16 x i32> %arg) {
2147 ; SSE2-LABEL: allones_v16i32_and1:
2149 ; SSE2-NEXT: pslld $31, %xmm3
2150 ; SSE2-NEXT: pslld $31, %xmm2
2151 ; SSE2-NEXT: packssdw %xmm3, %xmm2
2152 ; SSE2-NEXT: pslld $31, %xmm1
2153 ; SSE2-NEXT: pslld $31, %xmm0
2154 ; SSE2-NEXT: packssdw %xmm1, %xmm0
2155 ; SSE2-NEXT: packsswb %xmm2, %xmm0
2156 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2157 ; SSE2-NEXT: cmpw $-1, %ax
2158 ; SSE2-NEXT: sete %al
2161 ; AVX1-LABEL: allones_v16i32_and1:
2163 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2164 ; AVX1-NEXT: vpslld $31, %xmm2, %xmm2
2165 ; AVX1-NEXT: vpslld $31, %xmm1, %xmm1
2166 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2167 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2168 ; AVX1-NEXT: vpslld $31, %xmm2, %xmm2
2169 ; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
2170 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2171 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
2172 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
2173 ; AVX1-NEXT: cmpw $-1, %ax
2174 ; AVX1-NEXT: sete %al
2175 ; AVX1-NEXT: vzeroupper
2178 ; AVX2-LABEL: allones_v16i32_and1:
2180 ; AVX2-NEXT: vpslld $31, %ymm1, %ymm1
2181 ; AVX2-NEXT: vpsrad $31, %ymm1, %ymm1
2182 ; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
2183 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
2184 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
2185 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
2186 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2187 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
2188 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
2189 ; AVX2-NEXT: cmpw $-1, %ax
2190 ; AVX2-NEXT: sete %al
2191 ; AVX2-NEXT: vzeroupper
2194 ; KNL-LABEL: allones_v16i32_and1:
2196 ; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
2197 ; KNL-NEXT: kortestw %k0, %k0
2198 ; KNL-NEXT: setb %al
2199 ; KNL-NEXT: vzeroupper
2202 ; SKX-LABEL: allones_v16i32_and1:
2204 ; SKX-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
2205 ; SKX-NEXT: kortestw %k0, %k0
2206 ; SKX-NEXT: setb %al
2207 ; SKX-NEXT: vzeroupper
2209 %tmp = and <16 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
2210 %tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
2211 %tmp2 = bitcast <16 x i1> %tmp1 to i16
2212 %tmp3 = icmp eq i16 %tmp2, -1
2216 define i1 @allzeros_v16i32_and1(<16 x i32> %arg) {
2217 ; SSE2-LABEL: allzeros_v16i32_and1:
2219 ; SSE2-NEXT: pslld $31, %xmm3
2220 ; SSE2-NEXT: pslld $31, %xmm2
2221 ; SSE2-NEXT: packssdw %xmm3, %xmm2
2222 ; SSE2-NEXT: pslld $31, %xmm1
2223 ; SSE2-NEXT: pslld $31, %xmm0
2224 ; SSE2-NEXT: packssdw %xmm1, %xmm0
2225 ; SSE2-NEXT: packsswb %xmm2, %xmm0
2226 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2227 ; SSE2-NEXT: testw %ax, %ax
2228 ; SSE2-NEXT: sete %al
2231 ; AVX1-LABEL: allzeros_v16i32_and1:
2233 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2234 ; AVX1-NEXT: vpslld $31, %xmm2, %xmm2
2235 ; AVX1-NEXT: vpslld $31, %xmm1, %xmm1
2236 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2237 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2238 ; AVX1-NEXT: vpslld $31, %xmm2, %xmm2
2239 ; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
2240 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2241 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
2242 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
2243 ; AVX1-NEXT: testw %ax, %ax
2244 ; AVX1-NEXT: sete %al
2245 ; AVX1-NEXT: vzeroupper
2248 ; AVX2-LABEL: allzeros_v16i32_and1:
2250 ; AVX2-NEXT: vpslld $31, %ymm1, %ymm1
2251 ; AVX2-NEXT: vpsrad $31, %ymm1, %ymm1
2252 ; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
2253 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
2254 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
2255 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
2256 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2257 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
2258 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
2259 ; AVX2-NEXT: testw %ax, %ax
2260 ; AVX2-NEXT: sete %al
2261 ; AVX2-NEXT: vzeroupper
2264 ; KNL-LABEL: allzeros_v16i32_and1:
2266 ; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
2267 ; KNL-NEXT: kortestw %k0, %k0
2268 ; KNL-NEXT: sete %al
2269 ; KNL-NEXT: vzeroupper
2272 ; SKX-LABEL: allzeros_v16i32_and1:
2274 ; SKX-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
2275 ; SKX-NEXT: kortestw %k0, %k0
2276 ; SKX-NEXT: sete %al
2277 ; SKX-NEXT: vzeroupper
2279 %tmp = and <16 x i32> %arg, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
2280 %tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
2281 %tmp2 = bitcast <16 x i1> %tmp1 to i16
2282 %tmp3 = icmp eq i16 %tmp2, 0
2286 define i1 @allones_v2i64_and1(<2 x i64> %arg) {
2287 ; SSE2-LABEL: allones_v2i64_and1:
2289 ; SSE2-NEXT: psllq $63, %xmm0
2290 ; SSE2-NEXT: movmskpd %xmm0, %eax
2291 ; SSE2-NEXT: cmpb $3, %al
2292 ; SSE2-NEXT: sete %al
2295 ; AVX-LABEL: allones_v2i64_and1:
2297 ; AVX-NEXT: vpsllq $63, %xmm0, %xmm0
2298 ; AVX-NEXT: vmovmskpd %xmm0, %eax
2299 ; AVX-NEXT: cmpb $3, %al
2300 ; AVX-NEXT: sete %al
2303 ; KNL-LABEL: allones_v2i64_and1:
2305 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2306 ; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1]
2307 ; KNL-NEXT: vptestmq %zmm1, %zmm0, %k0
2308 ; KNL-NEXT: kmovw %k0, %eax
2309 ; KNL-NEXT: andb $3, %al
2310 ; KNL-NEXT: cmpb $3, %al
2311 ; KNL-NEXT: sete %al
2312 ; KNL-NEXT: vzeroupper
2315 ; SKX-LABEL: allones_v2i64_and1:
2317 ; SKX-NEXT: vptestmq {{.*}}(%rip), %xmm0, %k0
2318 ; SKX-NEXT: kmovd %k0, %eax
2319 ; SKX-NEXT: andb $3, %al
2320 ; SKX-NEXT: cmpb $3, %al
2321 ; SKX-NEXT: sete %al
2323 %tmp = and <2 x i64> %arg, <i64 1, i64 1>
2324 %tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
2325 %tmp2 = bitcast <2 x i1> %tmp1 to i2
2326 %tmp3 = icmp eq i2 %tmp2, -1
2330 define i1 @allzeros_v2i64_and1(<2 x i64> %arg) {
2331 ; SSE2-LABEL: allzeros_v2i64_and1:
2333 ; SSE2-NEXT: psllq $63, %xmm0
2334 ; SSE2-NEXT: movmskpd %xmm0, %eax
2335 ; SSE2-NEXT: testb %al, %al
2336 ; SSE2-NEXT: sete %al
2339 ; AVX-LABEL: allzeros_v2i64_and1:
2341 ; AVX-NEXT: vpsllq $63, %xmm0, %xmm0
2342 ; AVX-NEXT: vmovmskpd %xmm0, %eax
2343 ; AVX-NEXT: testb %al, %al
2344 ; AVX-NEXT: sete %al
2347 ; KNL-LABEL: allzeros_v2i64_and1:
2349 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2350 ; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1]
2351 ; KNL-NEXT: vptestmq %zmm1, %zmm0, %k0
2352 ; KNL-NEXT: kmovw %k0, %eax
2353 ; KNL-NEXT: testb $3, %al
2354 ; KNL-NEXT: sete %al
2355 ; KNL-NEXT: vzeroupper
2358 ; SKX-LABEL: allzeros_v2i64_and1:
2360 ; SKX-NEXT: vptestmq {{.*}}(%rip), %xmm0, %k0
2361 ; SKX-NEXT: kmovd %k0, %eax
2362 ; SKX-NEXT: testb $3, %al
2363 ; SKX-NEXT: sete %al
2365 %tmp = and <2 x i64> %arg, <i64 1, i64 1>
2366 %tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
2367 %tmp2 = bitcast <2 x i1> %tmp1 to i2
2368 %tmp3 = icmp eq i2 %tmp2, 0
2372 define i1 @allones_v4i64_and1(<4 x i64> %arg) {
2373 ; SSE2-LABEL: allones_v4i64_and1:
2375 ; SSE2-NEXT: psllq $63, %xmm1
2376 ; SSE2-NEXT: psllq $63, %xmm0
2377 ; SSE2-NEXT: packssdw %xmm1, %xmm0
2378 ; SSE2-NEXT: movmskps %xmm0, %eax
2379 ; SSE2-NEXT: cmpb $15, %al
2380 ; SSE2-NEXT: sete %al
2383 ; AVX1-LABEL: allones_v4i64_and1:
2385 ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm1
2386 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2387 ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0
2388 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2389 ; AVX1-NEXT: vmovmskpd %ymm0, %eax
2390 ; AVX1-NEXT: cmpb $15, %al
2391 ; AVX1-NEXT: sete %al
2392 ; AVX1-NEXT: vzeroupper
2395 ; AVX2-LABEL: allones_v4i64_and1:
2397 ; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0
2398 ; AVX2-NEXT: vmovmskpd %ymm0, %eax
2399 ; AVX2-NEXT: cmpb $15, %al
2400 ; AVX2-NEXT: sete %al
2401 ; AVX2-NEXT: vzeroupper
2404 ; KNL-LABEL: allones_v4i64_and1:
2406 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2407 ; KNL-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
2408 ; KNL-NEXT: kmovw %k0, %eax
2409 ; KNL-NEXT: andb $15, %al
2410 ; KNL-NEXT: cmpb $15, %al
2411 ; KNL-NEXT: sete %al
2412 ; KNL-NEXT: vzeroupper
2415 ; SKX-LABEL: allones_v4i64_and1:
2417 ; SKX-NEXT: vptestmq {{.*}}(%rip){1to4}, %ymm0, %k0
2418 ; SKX-NEXT: kmovd %k0, %eax
2419 ; SKX-NEXT: andb $15, %al
2420 ; SKX-NEXT: cmpb $15, %al
2421 ; SKX-NEXT: sete %al
2422 ; SKX-NEXT: vzeroupper
2424 %tmp = and <4 x i64> %arg, <i64 1, i64 1, i64 1, i64 1>
2425 %tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
2426 %tmp2 = bitcast <4 x i1> %tmp1 to i4
2427 %tmp3 = icmp eq i4 %tmp2, -1
2431 define i1 @allzeros_v4i64_and1(<4 x i64> %arg) {
2432 ; SSE2-LABEL: allzeros_v4i64_and1:
2434 ; SSE2-NEXT: psllq $63, %xmm1
2435 ; SSE2-NEXT: psllq $63, %xmm0
2436 ; SSE2-NEXT: packssdw %xmm1, %xmm0
2437 ; SSE2-NEXT: movmskps %xmm0, %eax
2438 ; SSE2-NEXT: testb %al, %al
2439 ; SSE2-NEXT: sete %al
2442 ; AVX1-LABEL: allzeros_v4i64_and1:
2444 ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm1
2445 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2446 ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0
2447 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2448 ; AVX1-NEXT: vmovmskpd %ymm0, %eax
2449 ; AVX1-NEXT: testb %al, %al
2450 ; AVX1-NEXT: sete %al
2451 ; AVX1-NEXT: vzeroupper
2454 ; AVX2-LABEL: allzeros_v4i64_and1:
2456 ; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0
2457 ; AVX2-NEXT: vmovmskpd %ymm0, %eax
2458 ; AVX2-NEXT: testb %al, %al
2459 ; AVX2-NEXT: sete %al
2460 ; AVX2-NEXT: vzeroupper
2463 ; KNL-LABEL: allzeros_v4i64_and1:
2465 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
2466 ; KNL-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
2467 ; KNL-NEXT: kmovw %k0, %eax
2468 ; KNL-NEXT: testb $15, %al
2469 ; KNL-NEXT: sete %al
2470 ; KNL-NEXT: vzeroupper
2473 ; SKX-LABEL: allzeros_v4i64_and1:
2475 ; SKX-NEXT: vptestmq {{.*}}(%rip){1to4}, %ymm0, %k0
2476 ; SKX-NEXT: kmovd %k0, %eax
2477 ; SKX-NEXT: testb $15, %al
2478 ; SKX-NEXT: sete %al
2479 ; SKX-NEXT: vzeroupper
2481 %tmp = and <4 x i64> %arg, <i64 1, i64 1, i64 1, i64 1>
2482 %tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
2483 %tmp2 = bitcast <4 x i1> %tmp1 to i4
2484 %tmp3 = icmp eq i4 %tmp2, 0
2488 define i1 @allones_v8i64_and1(<8 x i64> %arg) {
2489 ; SSE2-LABEL: allones_v8i64_and1:
2491 ; SSE2-NEXT: psllq $63, %xmm3
2492 ; SSE2-NEXT: psllq $63, %xmm2
2493 ; SSE2-NEXT: packssdw %xmm3, %xmm2
2494 ; SSE2-NEXT: psllq $63, %xmm1
2495 ; SSE2-NEXT: psllq $63, %xmm0
2496 ; SSE2-NEXT: packssdw %xmm1, %xmm0
2497 ; SSE2-NEXT: packssdw %xmm2, %xmm0
2498 ; SSE2-NEXT: packsswb %xmm0, %xmm0
2499 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2500 ; SSE2-NEXT: cmpb $-1, %al
2501 ; SSE2-NEXT: sete %al
2504 ; AVX1-LABEL: allones_v8i64_and1:
2506 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2507 ; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2
2508 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
2509 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
2510 ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0
2511 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
2512 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2513 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2514 ; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2
2515 ; AVX1-NEXT: vpsllq $63, %xmm1, %xmm1
2516 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2517 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2518 ; AVX1-NEXT: vmovmskps %ymm0, %eax
2519 ; AVX1-NEXT: cmpb $-1, %al
2520 ; AVX1-NEXT: sete %al
2521 ; AVX1-NEXT: vzeroupper
2524 ; AVX2-LABEL: allones_v8i64_and1:
2526 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
2527 ; AVX2-NEXT: vpsllq $63, %ymm1, %ymm1
2528 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm1
2529 ; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0
2530 ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0
2531 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
2532 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
2533 ; AVX2-NEXT: vmovmskps %ymm0, %eax
2534 ; AVX2-NEXT: cmpb $-1, %al
2535 ; AVX2-NEXT: sete %al
2536 ; AVX2-NEXT: vzeroupper
2539 ; KNL-LABEL: allones_v8i64_and1:
2541 ; KNL-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
2542 ; KNL-NEXT: kmovw %k0, %eax
2543 ; KNL-NEXT: cmpb $-1, %al
2544 ; KNL-NEXT: sete %al
2545 ; KNL-NEXT: vzeroupper
2548 ; SKX-LABEL: allones_v8i64_and1:
2550 ; SKX-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
2551 ; SKX-NEXT: kortestb %k0, %k0
2552 ; SKX-NEXT: setb %al
2553 ; SKX-NEXT: vzeroupper
2555 %tmp = and <8 x i64> %arg, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
2556 %tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
2557 %tmp2 = bitcast <8 x i1> %tmp1 to i8
2558 %tmp3 = icmp eq i8 %tmp2, -1
2562 define i1 @allzeros_v8i64_and1(<8 x i64> %arg) {
2563 ; SSE2-LABEL: allzeros_v8i64_and1:
2565 ; SSE2-NEXT: psllq $63, %xmm3
2566 ; SSE2-NEXT: psllq $63, %xmm2
2567 ; SSE2-NEXT: packssdw %xmm3, %xmm2
2568 ; SSE2-NEXT: psllq $63, %xmm1
2569 ; SSE2-NEXT: psllq $63, %xmm0
2570 ; SSE2-NEXT: packssdw %xmm1, %xmm0
2571 ; SSE2-NEXT: packssdw %xmm2, %xmm0
2572 ; SSE2-NEXT: packsswb %xmm0, %xmm0
2573 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2574 ; SSE2-NEXT: testb %al, %al
2575 ; SSE2-NEXT: sete %al
2578 ; AVX1-LABEL: allzeros_v8i64_and1:
2580 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2581 ; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2
2582 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
2583 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
2584 ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0
2585 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
2586 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
2587 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2588 ; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2
2589 ; AVX1-NEXT: vpsllq $63, %xmm1, %xmm1
2590 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2591 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2592 ; AVX1-NEXT: vmovmskps %ymm0, %eax
2593 ; AVX1-NEXT: testb %al, %al
2594 ; AVX1-NEXT: sete %al
2595 ; AVX1-NEXT: vzeroupper
2598 ; AVX2-LABEL: allzeros_v8i64_and1:
2600 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
2601 ; AVX2-NEXT: vpsllq $63, %ymm1, %ymm1
2602 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm1
2603 ; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0
2604 ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0
2605 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
2606 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
2607 ; AVX2-NEXT: vmovmskps %ymm0, %eax
2608 ; AVX2-NEXT: testb %al, %al
2609 ; AVX2-NEXT: sete %al
2610 ; AVX2-NEXT: vzeroupper
2613 ; KNL-LABEL: allzeros_v8i64_and1:
2615 ; KNL-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
2616 ; KNL-NEXT: kmovw %k0, %eax
2617 ; KNL-NEXT: testb %al, %al
2618 ; KNL-NEXT: sete %al
2619 ; KNL-NEXT: vzeroupper
2622 ; SKX-LABEL: allzeros_v8i64_and1:
2624 ; SKX-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
2625 ; SKX-NEXT: kortestb %k0, %k0
2626 ; SKX-NEXT: sete %al
2627 ; SKX-NEXT: vzeroupper
2629 %tmp = and <8 x i64> %arg, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
2630 %tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
2631 %tmp2 = bitcast <8 x i1> %tmp1 to i8
2632 %tmp3 = icmp eq i8 %tmp2, 0
2636 define i1 @allones_v16i8_and4(<16 x i8> %arg) {
2637 ; SSE2-LABEL: allones_v16i8_and4:
2639 ; SSE2-NEXT: psllw $5, %xmm0
2640 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2641 ; SSE2-NEXT: cmpw $-1, %ax
2642 ; SSE2-NEXT: sete %al
2645 ; AVX-LABEL: allones_v16i8_and4:
2647 ; AVX-NEXT: vpsllw $5, %xmm0, %xmm0
2648 ; AVX-NEXT: vpmovmskb %xmm0, %eax
2649 ; AVX-NEXT: cmpw $-1, %ax
2650 ; AVX-NEXT: sete %al
2653 ; KNL-LABEL: allones_v16i8_and4:
2655 ; KNL-NEXT: vpsllw $5, %xmm0, %xmm0
2656 ; KNL-NEXT: vpmovmskb %xmm0, %eax
2657 ; KNL-NEXT: cmpw $-1, %ax
2658 ; KNL-NEXT: sete %al
2661 ; SKX-LABEL: allones_v16i8_and4:
2663 ; SKX-NEXT: vptestmb {{.*}}(%rip), %xmm0, %k0
2664 ; SKX-NEXT: kortestw %k0, %k0
2665 ; SKX-NEXT: setb %al
2667 %tmp = and <16 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
2668 %tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
2669 %tmp2 = bitcast <16 x i1> %tmp1 to i16
2670 %tmp3 = icmp eq i16 %tmp2, -1
2674 define i1 @allzeros_v16i8_and4(<16 x i8> %arg) {
2675 ; SSE2-LABEL: allzeros_v16i8_and4:
2677 ; SSE2-NEXT: psllw $5, %xmm0
2678 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2679 ; SSE2-NEXT: testw %ax, %ax
2680 ; SSE2-NEXT: sete %al
2683 ; AVX-LABEL: allzeros_v16i8_and4:
2685 ; AVX-NEXT: vpsllw $5, %xmm0, %xmm0
2686 ; AVX-NEXT: vpmovmskb %xmm0, %eax
2687 ; AVX-NEXT: testw %ax, %ax
2688 ; AVX-NEXT: sete %al
2691 ; KNL-LABEL: allzeros_v16i8_and4:
2693 ; KNL-NEXT: vpsllw $5, %xmm0, %xmm0
2694 ; KNL-NEXT: vpmovmskb %xmm0, %eax
2695 ; KNL-NEXT: testw %ax, %ax
2696 ; KNL-NEXT: sete %al
2699 ; SKX-LABEL: allzeros_v16i8_and4:
2701 ; SKX-NEXT: vptestmb {{.*}}(%rip), %xmm0, %k0
2702 ; SKX-NEXT: kortestw %k0, %k0
2703 ; SKX-NEXT: sete %al
2705 %tmp = and <16 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
2706 %tmp1 = icmp ne <16 x i8> %tmp, zeroinitializer
2707 %tmp2 = bitcast <16 x i1> %tmp1 to i16
2708 %tmp3 = icmp eq i16 %tmp2, 0
2712 define i1 @allones_v32i8_and4(<32 x i8> %arg) {
2713 ; SSE2-LABEL: allones_v32i8_and4:
2715 ; SSE2-NEXT: psllw $5, %xmm0
2716 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2717 ; SSE2-NEXT: psllw $5, %xmm1
2718 ; SSE2-NEXT: pmovmskb %xmm1, %ecx
2719 ; SSE2-NEXT: shll $16, %ecx
2720 ; SSE2-NEXT: orl %eax, %ecx
2721 ; SSE2-NEXT: cmpl $-1, %ecx
2722 ; SSE2-NEXT: sete %al
2725 ; AVX1-LABEL: allones_v32i8_and4:
2727 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm1
2728 ; AVX1-NEXT: vpmovmskb %xmm1, %eax
2729 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2730 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0
2731 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
2732 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
2733 ; AVX1-NEXT: shll $16, %ecx
2734 ; AVX1-NEXT: orl %eax, %ecx
2735 ; AVX1-NEXT: cmpl $-1, %ecx
2736 ; AVX1-NEXT: sete %al
2737 ; AVX1-NEXT: vzeroupper
2740 ; AVX2-LABEL: allones_v32i8_and4:
2742 ; AVX2-NEXT: vpsllw $5, %ymm0, %ymm0
2743 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
2744 ; AVX2-NEXT: cmpl $-1, %eax
2745 ; AVX2-NEXT: sete %al
2746 ; AVX2-NEXT: vzeroupper
2749 ; KNL-LABEL: allones_v32i8_and4:
2751 ; KNL-NEXT: vpsllw $5, %ymm0, %ymm0
2752 ; KNL-NEXT: vpmovmskb %ymm0, %eax
2753 ; KNL-NEXT: cmpl $-1, %eax
2754 ; KNL-NEXT: sete %al
2755 ; KNL-NEXT: vzeroupper
2758 ; SKX-LABEL: allones_v32i8_and4:
2760 ; SKX-NEXT: vptestmb {{.*}}(%rip), %ymm0, %k0
2761 ; SKX-NEXT: kortestd %k0, %k0
2762 ; SKX-NEXT: setb %al
2763 ; SKX-NEXT: vzeroupper
2765 %tmp = and <32 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
2766 %tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
2767 %tmp2 = bitcast <32 x i1> %tmp1 to i32
2768 %tmp3 = icmp eq i32 %tmp2, -1
2772 define i1 @allzeros_v32i8_and4(<32 x i8> %arg) {
2773 ; SSE2-LABEL: allzeros_v32i8_and4:
2775 ; SSE2-NEXT: psllw $5, %xmm0
2776 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2777 ; SSE2-NEXT: psllw $5, %xmm1
2778 ; SSE2-NEXT: pmovmskb %xmm1, %ecx
2779 ; SSE2-NEXT: shll $16, %ecx
2780 ; SSE2-NEXT: orl %eax, %ecx
2781 ; SSE2-NEXT: sete %al
2784 ; AVX1-LABEL: allzeros_v32i8_and4:
2786 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm1
2787 ; AVX1-NEXT: vpmovmskb %xmm1, %eax
2788 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2789 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0
2790 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
2791 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
2792 ; AVX1-NEXT: shll $16, %ecx
2793 ; AVX1-NEXT: orl %eax, %ecx
2794 ; AVX1-NEXT: sete %al
2795 ; AVX1-NEXT: vzeroupper
2798 ; AVX2-LABEL: allzeros_v32i8_and4:
2800 ; AVX2-NEXT: vpsllw $5, %ymm0, %ymm0
2801 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
2802 ; AVX2-NEXT: testl %eax, %eax
2803 ; AVX2-NEXT: sete %al
2804 ; AVX2-NEXT: vzeroupper
2807 ; KNL-LABEL: allzeros_v32i8_and4:
2809 ; KNL-NEXT: vpsllw $5, %ymm0, %ymm0
2810 ; KNL-NEXT: vpmovmskb %ymm0, %eax
2811 ; KNL-NEXT: testl %eax, %eax
2812 ; KNL-NEXT: sete %al
2813 ; KNL-NEXT: vzeroupper
2816 ; SKX-LABEL: allzeros_v32i8_and4:
2818 ; SKX-NEXT: vptestmb {{.*}}(%rip), %ymm0, %k0
2819 ; SKX-NEXT: kortestd %k0, %k0
2820 ; SKX-NEXT: sete %al
2821 ; SKX-NEXT: vzeroupper
2823 %tmp = and <32 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
2824 %tmp1 = icmp ne <32 x i8> %tmp, zeroinitializer
2825 %tmp2 = bitcast <32 x i1> %tmp1 to i32
2826 %tmp3 = icmp eq i32 %tmp2, 0
2830 define i1 @allones_v64i8_and4(<64 x i8> %arg) {
2831 ; SSE2-LABEL: allones_v64i8_and4:
2833 ; SSE2-NEXT: psllw $5, %xmm0
2834 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2835 ; SSE2-NEXT: psllw $5, %xmm1
2836 ; SSE2-NEXT: pmovmskb %xmm1, %ecx
2837 ; SSE2-NEXT: shll $16, %ecx
2838 ; SSE2-NEXT: orl %eax, %ecx
2839 ; SSE2-NEXT: psllw $5, %xmm2
2840 ; SSE2-NEXT: pmovmskb %xmm2, %eax
2841 ; SSE2-NEXT: psllw $5, %xmm3
2842 ; SSE2-NEXT: pmovmskb %xmm3, %edx
2843 ; SSE2-NEXT: shll $16, %edx
2844 ; SSE2-NEXT: orl %eax, %edx
2845 ; SSE2-NEXT: shlq $32, %rdx
2846 ; SSE2-NEXT: orq %rcx, %rdx
2847 ; SSE2-NEXT: cmpq $-1, %rdx
2848 ; SSE2-NEXT: sete %al
2851 ; AVX1-LABEL: allones_v64i8_and4:
2853 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm2
2854 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224]
2855 ; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
2856 ; AVX1-NEXT: vpmovmskb %xmm2, %eax
2857 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2858 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0
2859 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
2860 ; AVX1-NEXT: shll $16, %ecx
2861 ; AVX1-NEXT: orl %eax, %ecx
2862 ; AVX1-NEXT: vpsllw $5, %xmm1, %xmm0
2863 ; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
2864 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
2865 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
2866 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0
2867 ; AVX1-NEXT: vpmovmskb %xmm0, %edx
2868 ; AVX1-NEXT: shll $16, %edx
2869 ; AVX1-NEXT: orl %eax, %edx
2870 ; AVX1-NEXT: shlq $32, %rdx
2871 ; AVX1-NEXT: orq %rcx, %rdx
2872 ; AVX1-NEXT: cmpq $-1, %rdx
2873 ; AVX1-NEXT: sete %al
2874 ; AVX1-NEXT: vzeroupper
2877 ; AVX2-LABEL: allones_v64i8_and4:
2879 ; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1
2880 ; AVX2-NEXT: vpmovmskb %ymm1, %eax
2881 ; AVX2-NEXT: shlq $32, %rax
2882 ; AVX2-NEXT: vpsllw $5, %ymm0, %ymm0
2883 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx
2884 ; AVX2-NEXT: orq %rax, %rcx
2885 ; AVX2-NEXT: cmpq $-1, %rcx
2886 ; AVX2-NEXT: sete %al
2887 ; AVX2-NEXT: vzeroupper
2890 ; KNL-LABEL: allones_v64i8_and4:
2892 ; KNL-NEXT: vpsllw $5, %ymm0, %ymm0
2893 ; KNL-NEXT: vpsllw $5, %ymm1, %ymm1
2894 ; KNL-NEXT: vpmovmskb %ymm1, %eax
2895 ; KNL-NEXT: shlq $32, %rax
2896 ; KNL-NEXT: vpmovmskb %ymm0, %ecx
2897 ; KNL-NEXT: orq %rax, %rcx
2898 ; KNL-NEXT: cmpq $-1, %rcx
2899 ; KNL-NEXT: sete %al
2900 ; KNL-NEXT: vzeroupper
2903 ; SKX-LABEL: allones_v64i8_and4:
2905 ; SKX-NEXT: vptestmb {{.*}}(%rip), %zmm0, %k0
2906 ; SKX-NEXT: kortestq %k0, %k0
2907 ; SKX-NEXT: setb %al
2908 ; SKX-NEXT: vzeroupper
2910 %tmp = and <64 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
2911 %tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
2912 %tmp2 = bitcast <64 x i1> %tmp1 to i64
2913 %tmp3 = icmp eq i64 %tmp2, -1
2917 define i1 @allzeros_v64i8_and4(<64 x i8> %arg) {
2918 ; SSE2-LABEL: allzeros_v64i8_and4:
2920 ; SSE2-NEXT: psllw $5, %xmm0
2921 ; SSE2-NEXT: pmovmskb %xmm0, %eax
2922 ; SSE2-NEXT: psllw $5, %xmm1
2923 ; SSE2-NEXT: pmovmskb %xmm1, %ecx
2924 ; SSE2-NEXT: shll $16, %ecx
2925 ; SSE2-NEXT: orl %eax, %ecx
2926 ; SSE2-NEXT: psllw $5, %xmm2
2927 ; SSE2-NEXT: pmovmskb %xmm2, %eax
2928 ; SSE2-NEXT: psllw $5, %xmm3
2929 ; SSE2-NEXT: pmovmskb %xmm3, %edx
2930 ; SSE2-NEXT: shll $16, %edx
2931 ; SSE2-NEXT: orl %eax, %edx
2932 ; SSE2-NEXT: shlq $32, %rdx
2933 ; SSE2-NEXT: orq %rcx, %rdx
2934 ; SSE2-NEXT: sete %al
2937 ; AVX1-LABEL: allzeros_v64i8_and4:
2939 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm2
2940 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224]
2941 ; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
2942 ; AVX1-NEXT: vpmovmskb %xmm2, %eax
2943 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2944 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0
2945 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
2946 ; AVX1-NEXT: shll $16, %ecx
2947 ; AVX1-NEXT: orl %eax, %ecx
2948 ; AVX1-NEXT: vpsllw $5, %xmm1, %xmm0
2949 ; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
2950 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
2951 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
2952 ; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0
2953 ; AVX1-NEXT: vpmovmskb %xmm0, %edx
2954 ; AVX1-NEXT: shll $16, %edx
2955 ; AVX1-NEXT: orl %eax, %edx
2956 ; AVX1-NEXT: shlq $32, %rdx
2957 ; AVX1-NEXT: orq %rcx, %rdx
2958 ; AVX1-NEXT: sete %al
2959 ; AVX1-NEXT: vzeroupper
2962 ; AVX2-LABEL: allzeros_v64i8_and4:
2964 ; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1
2965 ; AVX2-NEXT: vpmovmskb %ymm1, %eax
2966 ; AVX2-NEXT: shlq $32, %rax
2967 ; AVX2-NEXT: vpsllw $5, %ymm0, %ymm0
2968 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx
2969 ; AVX2-NEXT: orq %rax, %rcx
2970 ; AVX2-NEXT: sete %al
2971 ; AVX2-NEXT: vzeroupper
2974 ; KNL-LABEL: allzeros_v64i8_and4:
2976 ; KNL-NEXT: vpsllw $5, %ymm0, %ymm0
2977 ; KNL-NEXT: vpsllw $5, %ymm1, %ymm1
2978 ; KNL-NEXT: vpmovmskb %ymm1, %eax
2979 ; KNL-NEXT: shlq $32, %rax
2980 ; KNL-NEXT: vpmovmskb %ymm0, %ecx
2981 ; KNL-NEXT: orq %rax, %rcx
2982 ; KNL-NEXT: sete %al
2983 ; KNL-NEXT: vzeroupper
2986 ; SKX-LABEL: allzeros_v64i8_and4:
2988 ; SKX-NEXT: vptestmb {{.*}}(%rip), %zmm0, %k0
2989 ; SKX-NEXT: kortestq %k0, %k0
2990 ; SKX-NEXT: sete %al
2991 ; SKX-NEXT: vzeroupper
2993 %tmp = and <64 x i8> %arg, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
2994 %tmp1 = icmp ne <64 x i8> %tmp, zeroinitializer
2995 %tmp2 = bitcast <64 x i1> %tmp1 to i64
2996 %tmp3 = icmp eq i64 %tmp2, 0
3000 define i1 @allones_v8i16_and4(<8 x i16> %arg) {
3001 ; SSE2-LABEL: allones_v8i16_and4:
3003 ; SSE2-NEXT: psllw $13, %xmm0
3004 ; SSE2-NEXT: packsswb %xmm0, %xmm0
3005 ; SSE2-NEXT: pmovmskb %xmm0, %eax
3006 ; SSE2-NEXT: cmpb $-1, %al
3007 ; SSE2-NEXT: sete %al
3010 ; AVX-LABEL: allones_v8i16_and4:
3012 ; AVX-NEXT: vpsllw $13, %xmm0, %xmm0
3013 ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
3014 ; AVX-NEXT: vpmovmskb %xmm0, %eax
3015 ; AVX-NEXT: cmpb $-1, %al
3016 ; AVX-NEXT: sete %al
3019 ; KNL-LABEL: allones_v8i16_and4:
3021 ; KNL-NEXT: vpsllw $13, %xmm0, %xmm0
3022 ; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
3023 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
3024 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
3025 ; KNL-NEXT: kmovw %k0, %eax
3026 ; KNL-NEXT: cmpb $-1, %al
3027 ; KNL-NEXT: sete %al
3028 ; KNL-NEXT: vzeroupper
3031 ; SKX-LABEL: allones_v8i16_and4:
3033 ; SKX-NEXT: vptestmw {{.*}}(%rip), %xmm0, %k0
3034 ; SKX-NEXT: kortestb %k0, %k0
3035 ; SKX-NEXT: setb %al
3037 %tmp = and <8 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
3038 %tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
3039 %tmp2 = bitcast <8 x i1> %tmp1 to i8
3040 %tmp3 = icmp eq i8 %tmp2, -1
3044 define i1 @allzeros_v8i16_and4(<8 x i16> %arg) {
3045 ; SSE2-LABEL: allzeros_v8i16_and4:
3047 ; SSE2-NEXT: psllw $13, %xmm0
3048 ; SSE2-NEXT: packsswb %xmm0, %xmm0
3049 ; SSE2-NEXT: pmovmskb %xmm0, %eax
3050 ; SSE2-NEXT: testb %al, %al
3051 ; SSE2-NEXT: sete %al
3054 ; AVX-LABEL: allzeros_v8i16_and4:
3056 ; AVX-NEXT: vpsllw $13, %xmm0, %xmm0
3057 ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
3058 ; AVX-NEXT: vpmovmskb %xmm0, %eax
3059 ; AVX-NEXT: testb %al, %al
3060 ; AVX-NEXT: sete %al
3063 ; KNL-LABEL: allzeros_v8i16_and4:
3065 ; KNL-NEXT: vpsllw $13, %xmm0, %xmm0
3066 ; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
3067 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
3068 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
3069 ; KNL-NEXT: kmovw %k0, %eax
3070 ; KNL-NEXT: testb %al, %al
3071 ; KNL-NEXT: sete %al
3072 ; KNL-NEXT: vzeroupper
3075 ; SKX-LABEL: allzeros_v8i16_and4:
3077 ; SKX-NEXT: vptestmw {{.*}}(%rip), %xmm0, %k0
3078 ; SKX-NEXT: kortestb %k0, %k0
3079 ; SKX-NEXT: sete %al
3081 %tmp = and <8 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
3082 %tmp1 = icmp ne <8 x i16> %tmp, zeroinitializer
3083 %tmp2 = bitcast <8 x i1> %tmp1 to i8
3084 %tmp3 = icmp eq i8 %tmp2, 0
3088 define i1 @allones_v16i16_and4(<16 x i16> %arg) {
3089 ; SSE2-LABEL: allones_v16i16_and4:
3091 ; SSE2-NEXT: psllw $13, %xmm1
3092 ; SSE2-NEXT: psllw $13, %xmm0
3093 ; SSE2-NEXT: packsswb %xmm1, %xmm0
3094 ; SSE2-NEXT: pmovmskb %xmm0, %eax
3095 ; SSE2-NEXT: cmpw $-1, %ax
3096 ; SSE2-NEXT: sete %al
3099 ; AVX1-LABEL: allones_v16i16_and4:
3101 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3102 ; AVX1-NEXT: vpsllw $13, %xmm1, %xmm1
3103 ; AVX1-NEXT: vpsllw $13, %xmm0, %xmm0
3104 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
3105 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
3106 ; AVX1-NEXT: cmpw $-1, %ax
3107 ; AVX1-NEXT: sete %al
3108 ; AVX1-NEXT: vzeroupper
3111 ; AVX2-LABEL: allones_v16i16_and4:
3113 ; AVX2-NEXT: vpsllw $13, %ymm0, %ymm0
3114 ; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0
3115 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3116 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
3117 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
3118 ; AVX2-NEXT: cmpw $-1, %ax
3119 ; AVX2-NEXT: sete %al
3120 ; AVX2-NEXT: vzeroupper
3123 ; KNL-LABEL: allones_v16i16_and4:
3125 ; KNL-NEXT: vpsllw $13, %ymm0, %ymm0
3126 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
3127 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
3128 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
3129 ; KNL-NEXT: kortestw %k0, %k0
3130 ; KNL-NEXT: setb %al
3131 ; KNL-NEXT: vzeroupper
3134 ; SKX-LABEL: allones_v16i16_and4:
3136 ; SKX-NEXT: vptestmw {{.*}}(%rip), %ymm0, %k0
3137 ; SKX-NEXT: kortestw %k0, %k0
3138 ; SKX-NEXT: setb %al
3139 ; SKX-NEXT: vzeroupper
3141 %tmp = and <16 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
3142 %tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
3143 %tmp2 = bitcast <16 x i1> %tmp1 to i16
3144 %tmp3 = icmp eq i16 %tmp2, -1
3148 define i1 @allones_v32i16_and4(<32 x i16> %arg) {
3149 ; SSE2-LABEL: allones_v32i16_and4:
3151 ; SSE2-NEXT: psllw $13, %xmm1
3152 ; SSE2-NEXT: psllw $13, %xmm0
3153 ; SSE2-NEXT: packsswb %xmm1, %xmm0
3154 ; SSE2-NEXT: pmovmskb %xmm0, %eax
3155 ; SSE2-NEXT: psllw $13, %xmm3
3156 ; SSE2-NEXT: psllw $13, %xmm2
3157 ; SSE2-NEXT: packsswb %xmm3, %xmm2
3158 ; SSE2-NEXT: pmovmskb %xmm2, %ecx
3159 ; SSE2-NEXT: shll $16, %ecx
3160 ; SSE2-NEXT: orl %eax, %ecx
3161 ; SSE2-NEXT: cmpl $-1, %ecx
3162 ; SSE2-NEXT: sete %al
3165 ; AVX1-LABEL: allones_v32i16_and4:
3167 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3168 ; AVX1-NEXT: vpsllw $13, %xmm2, %xmm2
3169 ; AVX1-NEXT: vpsllw $13, %xmm0, %xmm0
3170 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
3171 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
3172 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
3173 ; AVX1-NEXT: vpsllw $13, %xmm0, %xmm0
3174 ; AVX1-NEXT: vpsllw $13, %xmm1, %xmm1
3175 ; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
3176 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
3177 ; AVX1-NEXT: shll $16, %ecx
3178 ; AVX1-NEXT: orl %eax, %ecx
3179 ; AVX1-NEXT: cmpl $-1, %ecx
3180 ; AVX1-NEXT: sete %al
3181 ; AVX1-NEXT: vzeroupper
3184 ; AVX2-LABEL: allones_v32i16_and4:
3186 ; AVX2-NEXT: vpsllw $13, %ymm1, %ymm1
3187 ; AVX2-NEXT: vpsllw $13, %ymm0, %ymm0
3188 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
3189 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
3190 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
3191 ; AVX2-NEXT: cmpl $-1, %eax
3192 ; AVX2-NEXT: sete %al
3193 ; AVX2-NEXT: vzeroupper
3196 ; KNL-LABEL: allones_v32i16_and4:
3198 ; KNL-NEXT: vpsllw $13, %ymm0, %ymm0
3199 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
3200 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
3201 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
3202 ; KNL-NEXT: kmovw %k0, %eax
3203 ; KNL-NEXT: vpsllw $13, %ymm1, %ymm0
3204 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
3205 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
3206 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
3207 ; KNL-NEXT: kmovw %k0, %ecx
3208 ; KNL-NEXT: shll $16, %ecx
3209 ; KNL-NEXT: orl %eax, %ecx
3210 ; KNL-NEXT: cmpl $-1, %ecx
3211 ; KNL-NEXT: sete %al
3212 ; KNL-NEXT: vzeroupper
3215 ; SKX-LABEL: allones_v32i16_and4:
3217 ; SKX-NEXT: vptestmw {{.*}}(%rip), %zmm0, %k0
3218 ; SKX-NEXT: kortestd %k0, %k0
3219 ; SKX-NEXT: setb %al
3220 ; SKX-NEXT: vzeroupper
3222 %tmp = and <32 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
3223 %tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
3224 %tmp2 = bitcast <32 x i1> %tmp1 to i32
3225 %tmp3 = icmp eq i32 %tmp2, -1
3229 define i1 @allzeros_v32i16_and4(<32 x i16> %arg) {
3230 ; SSE2-LABEL: allzeros_v32i16_and4:
3232 ; SSE2-NEXT: psllw $13, %xmm1
3233 ; SSE2-NEXT: psllw $13, %xmm0
3234 ; SSE2-NEXT: packsswb %xmm1, %xmm0
3235 ; SSE2-NEXT: pmovmskb %xmm0, %eax
3236 ; SSE2-NEXT: psllw $13, %xmm3
3237 ; SSE2-NEXT: psllw $13, %xmm2
3238 ; SSE2-NEXT: packsswb %xmm3, %xmm2
3239 ; SSE2-NEXT: pmovmskb %xmm2, %ecx
3240 ; SSE2-NEXT: shll $16, %ecx
3241 ; SSE2-NEXT: orl %eax, %ecx
3242 ; SSE2-NEXT: sete %al
3245 ; AVX1-LABEL: allzeros_v32i16_and4:
3247 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3248 ; AVX1-NEXT: vpsllw $13, %xmm2, %xmm2
3249 ; AVX1-NEXT: vpsllw $13, %xmm0, %xmm0
3250 ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
3251 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
3252 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
3253 ; AVX1-NEXT: vpsllw $13, %xmm0, %xmm0
3254 ; AVX1-NEXT: vpsllw $13, %xmm1, %xmm1
3255 ; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
3256 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
3257 ; AVX1-NEXT: shll $16, %ecx
3258 ; AVX1-NEXT: orl %eax, %ecx
3259 ; AVX1-NEXT: sete %al
3260 ; AVX1-NEXT: vzeroupper
3263 ; AVX2-LABEL: allzeros_v32i16_and4:
3265 ; AVX2-NEXT: vpsllw $13, %ymm1, %ymm1
3266 ; AVX2-NEXT: vpsllw $13, %ymm0, %ymm0
3267 ; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
3268 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
3269 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
3270 ; AVX2-NEXT: testl %eax, %eax
3271 ; AVX2-NEXT: sete %al
3272 ; AVX2-NEXT: vzeroupper
3275 ; KNL-LABEL: allzeros_v32i16_and4:
3277 ; KNL-NEXT: vpsllw $13, %ymm0, %ymm0
3278 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
3279 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
3280 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
3281 ; KNL-NEXT: kmovw %k0, %eax
3282 ; KNL-NEXT: vpsllw $13, %ymm1, %ymm0
3283 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
3284 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
3285 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
3286 ; KNL-NEXT: kmovw %k0, %ecx
3287 ; KNL-NEXT: shll $16, %ecx
3288 ; KNL-NEXT: orl %eax, %ecx
3289 ; KNL-NEXT: sete %al
3290 ; KNL-NEXT: vzeroupper
3293 ; SKX-LABEL: allzeros_v32i16_and4:
3295 ; SKX-NEXT: vptestmw {{.*}}(%rip), %zmm0, %k0
3296 ; SKX-NEXT: kortestd %k0, %k0
3297 ; SKX-NEXT: sete %al
3298 ; SKX-NEXT: vzeroupper
3300 %tmp = and <32 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
3301 %tmp1 = icmp ne <32 x i16> %tmp, zeroinitializer
3302 %tmp2 = bitcast <32 x i1> %tmp1 to i32
3303 %tmp3 = icmp eq i32 %tmp2, 0
3307 define i1 @allzeros_v16i16_and4(<16 x i16> %arg) {
3308 ; SSE2-LABEL: allzeros_v16i16_and4:
3310 ; SSE2-NEXT: psllw $13, %xmm1
3311 ; SSE2-NEXT: psllw $13, %xmm0
3312 ; SSE2-NEXT: packsswb %xmm1, %xmm0
3313 ; SSE2-NEXT: pmovmskb %xmm0, %eax
3314 ; SSE2-NEXT: testw %ax, %ax
3315 ; SSE2-NEXT: sete %al
3318 ; AVX1-LABEL: allzeros_v16i16_and4:
3320 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3321 ; AVX1-NEXT: vpsllw $13, %xmm1, %xmm1
3322 ; AVX1-NEXT: vpsllw $13, %xmm0, %xmm0
3323 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
3324 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
3325 ; AVX1-NEXT: testw %ax, %ax
3326 ; AVX1-NEXT: sete %al
3327 ; AVX1-NEXT: vzeroupper
3330 ; AVX2-LABEL: allzeros_v16i16_and4:
3332 ; AVX2-NEXT: vpsllw $13, %ymm0, %ymm0
3333 ; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0
3334 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3335 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
3336 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
3337 ; AVX2-NEXT: testw %ax, %ax
3338 ; AVX2-NEXT: sete %al
3339 ; AVX2-NEXT: vzeroupper
3342 ; KNL-LABEL: allzeros_v16i16_and4:
3344 ; KNL-NEXT: vpsllw $13, %ymm0, %ymm0
3345 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
3346 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
3347 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
3348 ; KNL-NEXT: kortestw %k0, %k0
3349 ; KNL-NEXT: sete %al
3350 ; KNL-NEXT: vzeroupper
3353 ; SKX-LABEL: allzeros_v16i16_and4:
3355 ; SKX-NEXT: vptestmw {{.*}}(%rip), %ymm0, %k0
3356 ; SKX-NEXT: kortestw %k0, %k0
3357 ; SKX-NEXT: sete %al
3358 ; SKX-NEXT: vzeroupper
3360 %tmp = and <16 x i16> %arg, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
3361 %tmp1 = icmp ne <16 x i16> %tmp, zeroinitializer
3362 %tmp2 = bitcast <16 x i1> %tmp1 to i16
3363 %tmp3 = icmp eq i16 %tmp2, 0
3367 define i1 @allones_v4i32_and4(<4 x i32> %arg) {
3368 ; SSE2-LABEL: allones_v4i32_and4:
3370 ; SSE2-NEXT: pslld $29, %xmm0
3371 ; SSE2-NEXT: movmskps %xmm0, %eax
3372 ; SSE2-NEXT: cmpb $15, %al
3373 ; SSE2-NEXT: sete %al
3376 ; AVX-LABEL: allones_v4i32_and4:
3378 ; AVX-NEXT: vpslld $29, %xmm0, %xmm0
3379 ; AVX-NEXT: vmovmskps %xmm0, %eax
3380 ; AVX-NEXT: cmpb $15, %al
3381 ; AVX-NEXT: sete %al
3384 ; KNL-LABEL: allones_v4i32_and4:
3386 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3387 ; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
3388 ; KNL-NEXT: kmovw %k0, %eax
3389 ; KNL-NEXT: andb $15, %al
3390 ; KNL-NEXT: cmpb $15, %al
3391 ; KNL-NEXT: sete %al
3392 ; KNL-NEXT: vzeroupper
3395 ; SKX-LABEL: allones_v4i32_and4:
3397 ; SKX-NEXT: vptestmd {{.*}}(%rip){1to4}, %xmm0, %k0
3398 ; SKX-NEXT: kmovd %k0, %eax
3399 ; SKX-NEXT: andb $15, %al
3400 ; SKX-NEXT: cmpb $15, %al
3401 ; SKX-NEXT: sete %al
3403 %tmp = and <4 x i32> %arg, <i32 4, i32 4, i32 4, i32 4>
3404 %tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
3405 %tmp2 = bitcast <4 x i1> %tmp1 to i4
3406 %tmp3 = icmp eq i4 %tmp2, -1
3410 define i1 @allzeros_v4i32_and4(<4 x i32> %arg) {
3411 ; SSE2-LABEL: allzeros_v4i32_and4:
3413 ; SSE2-NEXT: pslld $29, %xmm0
3414 ; SSE2-NEXT: movmskps %xmm0, %eax
3415 ; SSE2-NEXT: testb %al, %al
3416 ; SSE2-NEXT: sete %al
3419 ; AVX-LABEL: allzeros_v4i32_and4:
3421 ; AVX-NEXT: vpslld $29, %xmm0, %xmm0
3422 ; AVX-NEXT: vmovmskps %xmm0, %eax
3423 ; AVX-NEXT: testb %al, %al
3424 ; AVX-NEXT: sete %al
3427 ; KNL-LABEL: allzeros_v4i32_and4:
3429 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3430 ; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
3431 ; KNL-NEXT: kmovw %k0, %eax
3432 ; KNL-NEXT: testb $15, %al
3433 ; KNL-NEXT: sete %al
3434 ; KNL-NEXT: vzeroupper
3437 ; SKX-LABEL: allzeros_v4i32_and4:
3439 ; SKX-NEXT: vptestmd {{.*}}(%rip){1to4}, %xmm0, %k0
3440 ; SKX-NEXT: kmovd %k0, %eax
3441 ; SKX-NEXT: testb $15, %al
3442 ; SKX-NEXT: sete %al
3444 %tmp = and <4 x i32> %arg, <i32 4, i32 4, i32 4, i32 4>
3445 %tmp1 = icmp ne <4 x i32> %tmp, zeroinitializer
3446 %tmp2 = bitcast <4 x i1> %tmp1 to i4
3447 %tmp3 = icmp eq i4 %tmp2, 0
3451 define i1 @allones_v8i32_and4(<8 x i32> %arg) {
3452 ; SSE2-LABEL: allones_v8i32_and4:
3454 ; SSE2-NEXT: pslld $29, %xmm1
3455 ; SSE2-NEXT: pslld $29, %xmm0
3456 ; SSE2-NEXT: packssdw %xmm1, %xmm0
3457 ; SSE2-NEXT: packsswb %xmm0, %xmm0
3458 ; SSE2-NEXT: pmovmskb %xmm0, %eax
3459 ; SSE2-NEXT: cmpb $-1, %al
3460 ; SSE2-NEXT: sete %al
3463 ; AVX1-LABEL: allones_v8i32_and4:
3465 ; AVX1-NEXT: vpslld $29, %xmm0, %xmm1
3466 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3467 ; AVX1-NEXT: vpslld $29, %xmm0, %xmm0
3468 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3469 ; AVX1-NEXT: vmovmskps %ymm0, %eax
3470 ; AVX1-NEXT: cmpb $-1, %al
3471 ; AVX1-NEXT: sete %al
3472 ; AVX1-NEXT: vzeroupper
3475 ; AVX2-LABEL: allones_v8i32_and4:
3477 ; AVX2-NEXT: vpslld $29, %ymm0, %ymm0
3478 ; AVX2-NEXT: vmovmskps %ymm0, %eax
3479 ; AVX2-NEXT: cmpb $-1, %al
3480 ; AVX2-NEXT: sete %al
3481 ; AVX2-NEXT: vzeroupper
3484 ; KNL-LABEL: allones_v8i32_and4:
3486 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3487 ; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
3488 ; KNL-NEXT: kmovw %k0, %eax
3489 ; KNL-NEXT: cmpb $-1, %al
3490 ; KNL-NEXT: sete %al
3491 ; KNL-NEXT: vzeroupper
3494 ; SKX-LABEL: allones_v8i32_and4:
3496 ; SKX-NEXT: vptestmd {{.*}}(%rip){1to8}, %ymm0, %k0
3497 ; SKX-NEXT: kortestb %k0, %k0
3498 ; SKX-NEXT: setb %al
3499 ; SKX-NEXT: vzeroupper
3501 %tmp = and <8 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
3502 %tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
3503 %tmp2 = bitcast <8 x i1> %tmp1 to i8
3504 %tmp3 = icmp eq i8 %tmp2, -1
3508 define i1 @allzeros_v8i32_and4(<8 x i32> %arg) {
3509 ; SSE2-LABEL: allzeros_v8i32_and4:
3511 ; SSE2-NEXT: pslld $29, %xmm1
3512 ; SSE2-NEXT: pslld $29, %xmm0
3513 ; SSE2-NEXT: packssdw %xmm1, %xmm0
3514 ; SSE2-NEXT: packsswb %xmm0, %xmm0
3515 ; SSE2-NEXT: pmovmskb %xmm0, %eax
3516 ; SSE2-NEXT: testb %al, %al
3517 ; SSE2-NEXT: sete %al
3520 ; AVX1-LABEL: allzeros_v8i32_and4:
3522 ; AVX1-NEXT: vpslld $29, %xmm0, %xmm1
3523 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3524 ; AVX1-NEXT: vpslld $29, %xmm0, %xmm0
3525 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3526 ; AVX1-NEXT: vmovmskps %ymm0, %eax
3527 ; AVX1-NEXT: testb %al, %al
3528 ; AVX1-NEXT: sete %al
3529 ; AVX1-NEXT: vzeroupper
3532 ; AVX2-LABEL: allzeros_v8i32_and4:
3534 ; AVX2-NEXT: vpslld $29, %ymm0, %ymm0
3535 ; AVX2-NEXT: vmovmskps %ymm0, %eax
3536 ; AVX2-NEXT: testb %al, %al
3537 ; AVX2-NEXT: sete %al
3538 ; AVX2-NEXT: vzeroupper
3541 ; KNL-LABEL: allzeros_v8i32_and4:
3543 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3544 ; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
3545 ; KNL-NEXT: kmovw %k0, %eax
3546 ; KNL-NEXT: testb %al, %al
3547 ; KNL-NEXT: sete %al
3548 ; KNL-NEXT: vzeroupper
3551 ; SKX-LABEL: allzeros_v8i32_and4:
3553 ; SKX-NEXT: vptestmd {{.*}}(%rip){1to8}, %ymm0, %k0
3554 ; SKX-NEXT: kortestb %k0, %k0
3555 ; SKX-NEXT: sete %al
3556 ; SKX-NEXT: vzeroupper
3558 %tmp = and <8 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
3559 %tmp1 = icmp ne <8 x i32> %tmp, zeroinitializer
3560 %tmp2 = bitcast <8 x i1> %tmp1 to i8
3561 %tmp3 = icmp eq i8 %tmp2, 0
3565 define i1 @allones_v16i32_and4(<16 x i32> %arg) {
3566 ; SSE2-LABEL: allones_v16i32_and4:
3568 ; SSE2-NEXT: pslld $29, %xmm3
3569 ; SSE2-NEXT: pslld $29, %xmm2
3570 ; SSE2-NEXT: packssdw %xmm3, %xmm2
3571 ; SSE2-NEXT: pslld $29, %xmm1
3572 ; SSE2-NEXT: pslld $29, %xmm0
3573 ; SSE2-NEXT: packssdw %xmm1, %xmm0
3574 ; SSE2-NEXT: packsswb %xmm2, %xmm0
3575 ; SSE2-NEXT: pmovmskb %xmm0, %eax
3576 ; SSE2-NEXT: cmpw $-1, %ax
3577 ; SSE2-NEXT: sete %al
3580 ; AVX1-LABEL: allones_v16i32_and4:
3582 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3583 ; AVX1-NEXT: vpslld $29, %xmm2, %xmm2
3584 ; AVX1-NEXT: vpslld $29, %xmm1, %xmm1
3585 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
3586 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3587 ; AVX1-NEXT: vpslld $29, %xmm2, %xmm2
3588 ; AVX1-NEXT: vpslld $29, %xmm0, %xmm0
3589 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
3590 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
3591 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
3592 ; AVX1-NEXT: cmpw $-1, %ax
3593 ; AVX1-NEXT: sete %al
3594 ; AVX1-NEXT: vzeroupper
3597 ; AVX2-LABEL: allones_v16i32_and4:
3599 ; AVX2-NEXT: vpslld $29, %ymm1, %ymm1
3600 ; AVX2-NEXT: vpsrad $31, %ymm1, %ymm1
3601 ; AVX2-NEXT: vpslld $29, %ymm0, %ymm0
3602 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
3603 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
3604 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
3605 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3606 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
3607 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
3608 ; AVX2-NEXT: cmpw $-1, %ax
3609 ; AVX2-NEXT: sete %al
3610 ; AVX2-NEXT: vzeroupper
3613 ; KNL-LABEL: allones_v16i32_and4:
3615 ; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
3616 ; KNL-NEXT: kortestw %k0, %k0
3617 ; KNL-NEXT: setb %al
3618 ; KNL-NEXT: vzeroupper
3621 ; SKX-LABEL: allones_v16i32_and4:
3623 ; SKX-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
3624 ; SKX-NEXT: kortestw %k0, %k0
3625 ; SKX-NEXT: setb %al
3626 ; SKX-NEXT: vzeroupper
3628 %tmp = and <16 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
3629 %tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
3630 %tmp2 = bitcast <16 x i1> %tmp1 to i16
3631 %tmp3 = icmp eq i16 %tmp2, -1
3635 define i1 @allzeros_v16i32_and4(<16 x i32> %arg) {
3636 ; SSE2-LABEL: allzeros_v16i32_and4:
3638 ; SSE2-NEXT: pslld $29, %xmm3
3639 ; SSE2-NEXT: pslld $29, %xmm2
3640 ; SSE2-NEXT: packssdw %xmm3, %xmm2
3641 ; SSE2-NEXT: pslld $29, %xmm1
3642 ; SSE2-NEXT: pslld $29, %xmm0
3643 ; SSE2-NEXT: packssdw %xmm1, %xmm0
3644 ; SSE2-NEXT: packsswb %xmm2, %xmm0
3645 ; SSE2-NEXT: pmovmskb %xmm0, %eax
3646 ; SSE2-NEXT: testw %ax, %ax
3647 ; SSE2-NEXT: sete %al
3650 ; AVX1-LABEL: allzeros_v16i32_and4:
3652 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3653 ; AVX1-NEXT: vpslld $29, %xmm2, %xmm2
3654 ; AVX1-NEXT: vpslld $29, %xmm1, %xmm1
3655 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
3656 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3657 ; AVX1-NEXT: vpslld $29, %xmm2, %xmm2
3658 ; AVX1-NEXT: vpslld $29, %xmm0, %xmm0
3659 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
3660 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
3661 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
3662 ; AVX1-NEXT: testw %ax, %ax
3663 ; AVX1-NEXT: sete %al
3664 ; AVX1-NEXT: vzeroupper
3667 ; AVX2-LABEL: allzeros_v16i32_and4:
3669 ; AVX2-NEXT: vpslld $29, %ymm1, %ymm1
3670 ; AVX2-NEXT: vpsrad $31, %ymm1, %ymm1
3671 ; AVX2-NEXT: vpslld $29, %ymm0, %ymm0
3672 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
3673 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
3674 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
3675 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3676 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
3677 ; AVX2-NEXT: vpmovmskb %xmm0, %eax
3678 ; AVX2-NEXT: testw %ax, %ax
3679 ; AVX2-NEXT: sete %al
3680 ; AVX2-NEXT: vzeroupper
3683 ; KNL-LABEL: allzeros_v16i32_and4:
3685 ; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
3686 ; KNL-NEXT: kortestw %k0, %k0
3687 ; KNL-NEXT: sete %al
3688 ; KNL-NEXT: vzeroupper
3691 ; SKX-LABEL: allzeros_v16i32_and4:
3693 ; SKX-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
3694 ; SKX-NEXT: kortestw %k0, %k0
3695 ; SKX-NEXT: sete %al
3696 ; SKX-NEXT: vzeroupper
3698 %tmp = and <16 x i32> %arg, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
3699 %tmp1 = icmp ne <16 x i32> %tmp, zeroinitializer
3700 %tmp2 = bitcast <16 x i1> %tmp1 to i16
3701 %tmp3 = icmp eq i16 %tmp2, 0
3705 define i1 @allones_v2i64_and4(<2 x i64> %arg) {
3706 ; SSE2-LABEL: allones_v2i64_and4:
3708 ; SSE2-NEXT: psllq $61, %xmm0
3709 ; SSE2-NEXT: movmskpd %xmm0, %eax
3710 ; SSE2-NEXT: cmpb $3, %al
3711 ; SSE2-NEXT: sete %al
3714 ; AVX-LABEL: allones_v2i64_and4:
3716 ; AVX-NEXT: vpsllq $61, %xmm0, %xmm0
3717 ; AVX-NEXT: vmovmskpd %xmm0, %eax
3718 ; AVX-NEXT: cmpb $3, %al
3719 ; AVX-NEXT: sete %al
3722 ; KNL-LABEL: allones_v2i64_and4:
3724 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3725 ; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4]
3726 ; KNL-NEXT: vptestmq %zmm1, %zmm0, %k0
3727 ; KNL-NEXT: kmovw %k0, %eax
3728 ; KNL-NEXT: andb $3, %al
3729 ; KNL-NEXT: cmpb $3, %al
3730 ; KNL-NEXT: sete %al
3731 ; KNL-NEXT: vzeroupper
3734 ; SKX-LABEL: allones_v2i64_and4:
3736 ; SKX-NEXT: vptestmq {{.*}}(%rip), %xmm0, %k0
3737 ; SKX-NEXT: kmovd %k0, %eax
3738 ; SKX-NEXT: andb $3, %al
3739 ; SKX-NEXT: cmpb $3, %al
3740 ; SKX-NEXT: sete %al
3742 %tmp = and <2 x i64> %arg, <i64 4, i64 4>
3743 %tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
3744 %tmp2 = bitcast <2 x i1> %tmp1 to i2
3745 %tmp3 = icmp eq i2 %tmp2, -1
3749 define i1 @allzeros_v2i64_and4(<2 x i64> %arg) {
3750 ; SSE2-LABEL: allzeros_v2i64_and4:
3752 ; SSE2-NEXT: psllq $61, %xmm0
3753 ; SSE2-NEXT: movmskpd %xmm0, %eax
3754 ; SSE2-NEXT: testb %al, %al
3755 ; SSE2-NEXT: sete %al
3758 ; AVX-LABEL: allzeros_v2i64_and4:
3760 ; AVX-NEXT: vpsllq $61, %xmm0, %xmm0
3761 ; AVX-NEXT: vmovmskpd %xmm0, %eax
3762 ; AVX-NEXT: testb %al, %al
3763 ; AVX-NEXT: sete %al
3766 ; KNL-LABEL: allzeros_v2i64_and4:
3768 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3769 ; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4]
3770 ; KNL-NEXT: vptestmq %zmm1, %zmm0, %k0
3771 ; KNL-NEXT: kmovw %k0, %eax
3772 ; KNL-NEXT: testb $3, %al
3773 ; KNL-NEXT: sete %al
3774 ; KNL-NEXT: vzeroupper
3777 ; SKX-LABEL: allzeros_v2i64_and4:
3779 ; SKX-NEXT: vptestmq {{.*}}(%rip), %xmm0, %k0
3780 ; SKX-NEXT: kmovd %k0, %eax
3781 ; SKX-NEXT: testb $3, %al
3782 ; SKX-NEXT: sete %al
3784 %tmp = and <2 x i64> %arg, <i64 4, i64 4>
3785 %tmp1 = icmp ne <2 x i64> %tmp, zeroinitializer
3786 %tmp2 = bitcast <2 x i1> %tmp1 to i2
3787 %tmp3 = icmp eq i2 %tmp2, 0
3791 define i1 @allones_v4i64_and4(<4 x i64> %arg) {
3792 ; SSE2-LABEL: allones_v4i64_and4:
3794 ; SSE2-NEXT: psllq $61, %xmm1
3795 ; SSE2-NEXT: psllq $61, %xmm0
3796 ; SSE2-NEXT: packssdw %xmm1, %xmm0
3797 ; SSE2-NEXT: movmskps %xmm0, %eax
3798 ; SSE2-NEXT: cmpb $15, %al
3799 ; SSE2-NEXT: sete %al
3802 ; AVX1-LABEL: allones_v4i64_and4:
3804 ; AVX1-NEXT: vpsllq $61, %xmm0, %xmm1
3805 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3806 ; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0
3807 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3808 ; AVX1-NEXT: vmovmskpd %ymm0, %eax
3809 ; AVX1-NEXT: cmpb $15, %al
3810 ; AVX1-NEXT: sete %al
3811 ; AVX1-NEXT: vzeroupper
3814 ; AVX2-LABEL: allones_v4i64_and4:
3816 ; AVX2-NEXT: vpsllq $61, %ymm0, %ymm0
3817 ; AVX2-NEXT: vmovmskpd %ymm0, %eax
3818 ; AVX2-NEXT: cmpb $15, %al
3819 ; AVX2-NEXT: sete %al
3820 ; AVX2-NEXT: vzeroupper
3823 ; KNL-LABEL: allones_v4i64_and4:
3825 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3826 ; KNL-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
3827 ; KNL-NEXT: kmovw %k0, %eax
3828 ; KNL-NEXT: andb $15, %al
3829 ; KNL-NEXT: cmpb $15, %al
3830 ; KNL-NEXT: sete %al
3831 ; KNL-NEXT: vzeroupper
3834 ; SKX-LABEL: allones_v4i64_and4:
3836 ; SKX-NEXT: vptestmq {{.*}}(%rip){1to4}, %ymm0, %k0
3837 ; SKX-NEXT: kmovd %k0, %eax
3838 ; SKX-NEXT: andb $15, %al
3839 ; SKX-NEXT: cmpb $15, %al
3840 ; SKX-NEXT: sete %al
3841 ; SKX-NEXT: vzeroupper
3843 %tmp = and <4 x i64> %arg, <i64 4, i64 4, i64 4, i64 4>
3844 %tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
3845 %tmp2 = bitcast <4 x i1> %tmp1 to i4
3846 %tmp3 = icmp eq i4 %tmp2, -1
3850 define i1 @allzeros_v4i64_and4(<4 x i64> %arg) {
3851 ; SSE2-LABEL: allzeros_v4i64_and4:
3853 ; SSE2-NEXT: psllq $61, %xmm1
3854 ; SSE2-NEXT: psllq $61, %xmm0
3855 ; SSE2-NEXT: packssdw %xmm1, %xmm0
3856 ; SSE2-NEXT: movmskps %xmm0, %eax
3857 ; SSE2-NEXT: testb %al, %al
3858 ; SSE2-NEXT: sete %al
3861 ; AVX1-LABEL: allzeros_v4i64_and4:
3863 ; AVX1-NEXT: vpsllq $61, %xmm0, %xmm1
3864 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3865 ; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0
3866 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3867 ; AVX1-NEXT: vmovmskpd %ymm0, %eax
3868 ; AVX1-NEXT: testb %al, %al
3869 ; AVX1-NEXT: sete %al
3870 ; AVX1-NEXT: vzeroupper
3873 ; AVX2-LABEL: allzeros_v4i64_and4:
3875 ; AVX2-NEXT: vpsllq $61, %ymm0, %ymm0
3876 ; AVX2-NEXT: vmovmskpd %ymm0, %eax
3877 ; AVX2-NEXT: testb %al, %al
3878 ; AVX2-NEXT: sete %al
3879 ; AVX2-NEXT: vzeroupper
3882 ; KNL-LABEL: allzeros_v4i64_and4:
3884 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
3885 ; KNL-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
3886 ; KNL-NEXT: kmovw %k0, %eax
3887 ; KNL-NEXT: testb $15, %al
3888 ; KNL-NEXT: sete %al
3889 ; KNL-NEXT: vzeroupper
3892 ; SKX-LABEL: allzeros_v4i64_and4:
3894 ; SKX-NEXT: vptestmq {{.*}}(%rip){1to4}, %ymm0, %k0
3895 ; SKX-NEXT: kmovd %k0, %eax
3896 ; SKX-NEXT: testb $15, %al
3897 ; SKX-NEXT: sete %al
3898 ; SKX-NEXT: vzeroupper
3900 %tmp = and <4 x i64> %arg, <i64 4, i64 4, i64 4, i64 4>
3901 %tmp1 = icmp ne <4 x i64> %tmp, zeroinitializer
3902 %tmp2 = bitcast <4 x i1> %tmp1 to i4
3903 %tmp3 = icmp eq i4 %tmp2, 0
3907 define i1 @allones_v8i64_and4(<8 x i64> %arg) {
3908 ; SSE2-LABEL: allones_v8i64_and4:
3910 ; SSE2-NEXT: psllq $61, %xmm3
3911 ; SSE2-NEXT: psllq $61, %xmm2
3912 ; SSE2-NEXT: packssdw %xmm3, %xmm2
3913 ; SSE2-NEXT: psllq $61, %xmm1
3914 ; SSE2-NEXT: psllq $61, %xmm0
3915 ; SSE2-NEXT: packssdw %xmm1, %xmm0
3916 ; SSE2-NEXT: packssdw %xmm2, %xmm0
3917 ; SSE2-NEXT: packsswb %xmm0, %xmm0
3918 ; SSE2-NEXT: pmovmskb %xmm0, %eax
3919 ; SSE2-NEXT: cmpb $-1, %al
3920 ; SSE2-NEXT: sete %al
3923 ; AVX1-LABEL: allones_v8i64_and4:
3925 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
3926 ; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2
3927 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
3928 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
3929 ; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0
3930 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
3931 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
3932 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3933 ; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2
3934 ; AVX1-NEXT: vpsllq $61, %xmm1, %xmm1
3935 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
3936 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3937 ; AVX1-NEXT: vmovmskps %ymm0, %eax
3938 ; AVX1-NEXT: cmpb $-1, %al
3939 ; AVX1-NEXT: sete %al
3940 ; AVX1-NEXT: vzeroupper
3943 ; AVX2-LABEL: allones_v8i64_and4:
3945 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
3946 ; AVX2-NEXT: vpsllq $61, %ymm1, %ymm1
3947 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm1
3948 ; AVX2-NEXT: vpsllq $61, %ymm0, %ymm0
3949 ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0
3950 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
3951 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
3952 ; AVX2-NEXT: vmovmskps %ymm0, %eax
3953 ; AVX2-NEXT: cmpb $-1, %al
3954 ; AVX2-NEXT: sete %al
3955 ; AVX2-NEXT: vzeroupper
3958 ; KNL-LABEL: allones_v8i64_and4:
3960 ; KNL-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
3961 ; KNL-NEXT: kmovw %k0, %eax
3962 ; KNL-NEXT: cmpb $-1, %al
3963 ; KNL-NEXT: sete %al
3964 ; KNL-NEXT: vzeroupper
3967 ; SKX-LABEL: allones_v8i64_and4:
3969 ; SKX-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
3970 ; SKX-NEXT: kortestb %k0, %k0
3971 ; SKX-NEXT: setb %al
3972 ; SKX-NEXT: vzeroupper
3974 %tmp = and <8 x i64> %arg, <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4>
3975 %tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
3976 %tmp2 = bitcast <8 x i1> %tmp1 to i8
3977 %tmp3 = icmp eq i8 %tmp2, -1
3981 define i1 @allzeros_v8i64_and4(<8 x i64> %arg) {
3982 ; SSE2-LABEL: allzeros_v8i64_and4:
3984 ; SSE2-NEXT: psllq $61, %xmm3
3985 ; SSE2-NEXT: psllq $61, %xmm2
3986 ; SSE2-NEXT: packssdw %xmm3, %xmm2
3987 ; SSE2-NEXT: psllq $61, %xmm1
3988 ; SSE2-NEXT: psllq $61, %xmm0
3989 ; SSE2-NEXT: packssdw %xmm1, %xmm0
3990 ; SSE2-NEXT: packssdw %xmm2, %xmm0
3991 ; SSE2-NEXT: packsswb %xmm0, %xmm0
3992 ; SSE2-NEXT: pmovmskb %xmm0, %eax
3993 ; SSE2-NEXT: testb %al, %al
3994 ; SSE2-NEXT: sete %al
3997 ; AVX1-LABEL: allzeros_v8i64_and4:
3999 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
4000 ; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2
4001 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
4002 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
4003 ; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0
4004 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
4005 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
4006 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
4007 ; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2
4008 ; AVX1-NEXT: vpsllq $61, %xmm1, %xmm1
4009 ; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
4010 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4011 ; AVX1-NEXT: vmovmskps %ymm0, %eax
4012 ; AVX1-NEXT: testb %al, %al
4013 ; AVX1-NEXT: sete %al
4014 ; AVX1-NEXT: vzeroupper
4017 ; AVX2-LABEL: allzeros_v8i64_and4:
4019 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
4020 ; AVX2-NEXT: vpsllq $61, %ymm1, %ymm1
4021 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm1
4022 ; AVX2-NEXT: vpsllq $61, %ymm0, %ymm0
4023 ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0
4024 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
4025 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
4026 ; AVX2-NEXT: vmovmskps %ymm0, %eax
4027 ; AVX2-NEXT: testb %al, %al
4028 ; AVX2-NEXT: sete %al
4029 ; AVX2-NEXT: vzeroupper
4032 ; KNL-LABEL: allzeros_v8i64_and4:
4034 ; KNL-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
4035 ; KNL-NEXT: kmovw %k0, %eax
4036 ; KNL-NEXT: testb %al, %al
4037 ; KNL-NEXT: sete %al
4038 ; KNL-NEXT: vzeroupper
4041 ; SKX-LABEL: allzeros_v8i64_and4:
4043 ; SKX-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
4044 ; SKX-NEXT: kortestb %k0, %k0
4045 ; SKX-NEXT: sete %al
4046 ; SKX-NEXT: vzeroupper
4048 %tmp = and <8 x i64> %arg, <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4>
4049 %tmp1 = icmp ne <8 x i64> %tmp, zeroinitializer
4050 %tmp2 = bitcast <8 x i1> %tmp1 to i8
4051 %tmp3 = icmp eq i8 %tmp2, 0
4055 ; The below are IR patterns that should directly represent the behavior of a
4056 ; MOVMSK instruction.
4058 define i32 @movmskpd(<2 x double> %x) {
4059 ; SSE2-LABEL: movmskpd:
4061 ; SSE2-NEXT: movmskpd %xmm0, %eax
4064 ; AVX-LABEL: movmskpd:
4066 ; AVX-NEXT: vmovmskpd %xmm0, %eax
4069 ; KNL-LABEL: movmskpd:
4071 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4072 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
4073 ; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
4074 ; KNL-NEXT: kmovw %k0, %eax
4075 ; KNL-NEXT: andl $3, %eax
4076 ; KNL-NEXT: vzeroupper
4079 ; SKX-LABEL: movmskpd:
4081 ; SKX-NEXT: vpmovq2m %xmm0, %k0
4082 ; SKX-NEXT: kmovd %k0, %eax
4083 ; SKX-NEXT: andl $3, %eax
4085 %a = bitcast <2 x double> %x to <2 x i64>
4086 %b = icmp slt <2 x i64> %a, zeroinitializer
4087 %c = bitcast <2 x i1> %b to i2
4088 %d = zext i2 %c to i32
4092 define i32 @movmskps(<4 x float> %x) {
4093 ; SSE2-LABEL: movmskps:
4095 ; SSE2-NEXT: movmskps %xmm0, %eax
4098 ; AVX-LABEL: movmskps:
4100 ; AVX-NEXT: vmovmskps %xmm0, %eax
4103 ; KNL-LABEL: movmskps:
4105 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4106 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
4107 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
4108 ; KNL-NEXT: kmovw %k0, %eax
4109 ; KNL-NEXT: andl $15, %eax
4110 ; KNL-NEXT: vzeroupper
4113 ; SKX-LABEL: movmskps:
4115 ; SKX-NEXT: vpmovd2m %xmm0, %k0
4116 ; SKX-NEXT: kmovd %k0, %eax
4117 ; SKX-NEXT: andl $15, %eax
4119 %a = bitcast <4 x float> %x to <4 x i32>
4120 %b = icmp slt <4 x i32> %a, zeroinitializer
4121 %c = bitcast <4 x i1> %b to i4
4122 %d = zext i4 %c to i32
4126 define i32 @movmskpd256(<4 x double> %x) {
4127 ; SSE2-LABEL: movmskpd256:
4129 ; SSE2-NEXT: packssdw %xmm1, %xmm0
4130 ; SSE2-NEXT: movmskps %xmm0, %eax
4133 ; AVX-LABEL: movmskpd256:
4135 ; AVX-NEXT: vmovmskpd %ymm0, %eax
4136 ; AVX-NEXT: vzeroupper
4139 ; KNL-LABEL: movmskpd256:
4141 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4142 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
4143 ; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
4144 ; KNL-NEXT: kmovw %k0, %eax
4145 ; KNL-NEXT: andl $15, %eax
4146 ; KNL-NEXT: vzeroupper
4149 ; SKX-LABEL: movmskpd256:
4151 ; SKX-NEXT: vpmovq2m %ymm0, %k0
4152 ; SKX-NEXT: kmovd %k0, %eax
4153 ; SKX-NEXT: andl $15, %eax
4154 ; SKX-NEXT: vzeroupper
4156 %a = bitcast <4 x double> %x to <4 x i64>
4157 %b = icmp slt <4 x i64> %a, zeroinitializer
4158 %c = bitcast <4 x i1> %b to i4
4159 %d = zext i4 %c to i32
4163 define i32 @movmskps256(<8 x float> %x) {
4164 ; SSE2-LABEL: movmskps256:
4166 ; SSE2-NEXT: packssdw %xmm1, %xmm0
4167 ; SSE2-NEXT: packsswb %xmm0, %xmm0
4168 ; SSE2-NEXT: pmovmskb %xmm0, %eax
4169 ; SSE2-NEXT: movzbl %al, %eax
4172 ; AVX-LABEL: movmskps256:
4174 ; AVX-NEXT: vmovmskps %ymm0, %eax
4175 ; AVX-NEXT: vzeroupper
4178 ; KNL-LABEL: movmskps256:
4180 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4181 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
4182 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
4183 ; KNL-NEXT: kmovw %k0, %eax
4184 ; KNL-NEXT: movzbl %al, %eax
4185 ; KNL-NEXT: vzeroupper
4188 ; SKX-LABEL: movmskps256:
4190 ; SKX-NEXT: vpmovd2m %ymm0, %k0
4191 ; SKX-NEXT: kmovb %k0, %eax
4192 ; SKX-NEXT: vzeroupper
4194 %a = bitcast <8 x float> %x to <8 x i32>
4195 %b = icmp slt <8 x i32> %a, zeroinitializer
4196 %c = bitcast <8 x i1> %b to i8
4197 %d = zext i8 %c to i32
4201 define i32 @movmskb(<16 x i8> %x) {
4202 ; SSE2-LABEL: movmskb:
4204 ; SSE2-NEXT: pmovmskb %xmm0, %eax
4207 ; AVX-LABEL: movmskb:
4209 ; AVX-NEXT: vpmovmskb %xmm0, %eax
4212 ; KNL-LABEL: movmskb:
4214 ; KNL-NEXT: vpmovmskb %xmm0, %eax
4217 ; SKX-LABEL: movmskb:
4219 ; SKX-NEXT: vpmovb2m %xmm0, %k0
4220 ; SKX-NEXT: kmovw %k0, %eax
4222 %a = icmp slt <16 x i8> %x, zeroinitializer
4223 %b = bitcast <16 x i1> %a to i16
4224 %c = zext i16 %b to i32
4228 define i32 @movmskb256(<32 x i8> %x) {
4229 ; SSE2-LABEL: movmskb256:
4231 ; SSE2-NEXT: pmovmskb %xmm0, %ecx
4232 ; SSE2-NEXT: pmovmskb %xmm1, %eax
4233 ; SSE2-NEXT: shll $16, %eax
4234 ; SSE2-NEXT: orl %ecx, %eax
4237 ; AVX1-LABEL: movmskb256:
4239 ; AVX1-NEXT: vpmovmskb %xmm0, %ecx
4240 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
4241 ; AVX1-NEXT: vpmovmskb %xmm0, %eax
4242 ; AVX1-NEXT: shll $16, %eax
4243 ; AVX1-NEXT: orl %ecx, %eax
4244 ; AVX1-NEXT: vzeroupper
4247 ; AVX2-LABEL: movmskb256:
4249 ; AVX2-NEXT: vpmovmskb %ymm0, %eax
4250 ; AVX2-NEXT: vzeroupper
4253 ; KNL-LABEL: movmskb256:
4255 ; KNL-NEXT: vpmovmskb %ymm0, %eax
4256 ; KNL-NEXT: vzeroupper
4259 ; SKX-LABEL: movmskb256:
4261 ; SKX-NEXT: vpmovb2m %ymm0, %k0
4262 ; SKX-NEXT: kmovd %k0, %eax
4263 ; SKX-NEXT: vzeroupper
4265 %a = icmp slt <32 x i8> %x, zeroinitializer
4266 %b = bitcast <32 x i1> %a to i32
4270 ; Multiple extract elements from a vector compare.
4272 define i1 @movmsk_v16i8(<16 x i8> %x, <16 x i8> %y) {
4273 ; SSE2-LABEL: movmsk_v16i8:
4275 ; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
4276 ; SSE2-NEXT: pmovmskb %xmm0, %eax
4277 ; SSE2-NEXT: movl %eax, %ecx
4278 ; SSE2-NEXT: shrl $15, %ecx
4279 ; SSE2-NEXT: movl %eax, %edx
4280 ; SSE2-NEXT: shrl $8, %edx
4281 ; SSE2-NEXT: andl $1, %edx
4282 ; SSE2-NEXT: andl $8, %eax
4283 ; SSE2-NEXT: shrl $3, %eax
4284 ; SSE2-NEXT: xorl %edx, %eax
4285 ; SSE2-NEXT: andl %ecx, %eax
4286 ; SSE2-NEXT: # kill: def $al killed $al killed $eax
4289 ; AVX-LABEL: movmsk_v16i8:
4291 ; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
4292 ; AVX-NEXT: vpmovmskb %xmm0, %eax
4293 ; AVX-NEXT: movl %eax, %ecx
4294 ; AVX-NEXT: shrl $15, %ecx
4295 ; AVX-NEXT: movl %eax, %edx
4296 ; AVX-NEXT: shrl $8, %edx
4297 ; AVX-NEXT: andl $1, %edx
4298 ; AVX-NEXT: andl $8, %eax
4299 ; AVX-NEXT: shrl $3, %eax
4300 ; AVX-NEXT: xorl %edx, %eax
4301 ; AVX-NEXT: andl %ecx, %eax
4302 ; AVX-NEXT: # kill: def $al killed $al killed $eax
4305 ; KNL-LABEL: movmsk_v16i8:
4307 ; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
4308 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
4309 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
4310 ; KNL-NEXT: kshiftrw $15, %k0, %k1
4311 ; KNL-NEXT: kmovw %k1, %ecx
4312 ; KNL-NEXT: kshiftrw $8, %k0, %k1
4313 ; KNL-NEXT: kmovw %k1, %edx
4314 ; KNL-NEXT: kshiftrw $3, %k0, %k0
4315 ; KNL-NEXT: kmovw %k0, %eax
4316 ; KNL-NEXT: xorb %dl, %al
4317 ; KNL-NEXT: andb %cl, %al
4318 ; KNL-NEXT: # kill: def $al killed $al killed $eax
4319 ; KNL-NEXT: vzeroupper
4322 ; SKX-LABEL: movmsk_v16i8:
4324 ; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
4325 ; SKX-NEXT: kshiftrw $15, %k0, %k1
4326 ; SKX-NEXT: kmovd %k1, %ecx
4327 ; SKX-NEXT: kshiftrw $8, %k0, %k1
4328 ; SKX-NEXT: kmovd %k1, %edx
4329 ; SKX-NEXT: kshiftrw $3, %k0, %k0
4330 ; SKX-NEXT: kmovd %k0, %eax
4331 ; SKX-NEXT: xorb %dl, %al
4332 ; SKX-NEXT: andb %cl, %al
4333 ; SKX-NEXT: # kill: def $al killed $al killed $eax
4335 %cmp = icmp eq <16 x i8> %x, %y
4336 %e1 = extractelement <16 x i1> %cmp, i32 3
4337 %e2 = extractelement <16 x i1> %cmp, i32 8
4338 %e3 = extractelement <16 x i1> %cmp, i32 15
4339 %u1 = xor i1 %e1, %e2
4340 %u2 = and i1 %e3, %u1
4344 ; TODO: Replace shift+mask chain with NOT+TEST+SETE
4345 define i1 @movmsk_v8i16(<8 x i16> %x, <8 x i16> %y) {
4346 ; SSE2-LABEL: movmsk_v8i16:
4348 ; SSE2-NEXT: pcmpgtw %xmm1, %xmm0
4349 ; SSE2-NEXT: packsswb %xmm0, %xmm0
4350 ; SSE2-NEXT: pmovmskb %xmm0, %ecx
4351 ; SSE2-NEXT: movl %ecx, %eax
4352 ; SSE2-NEXT: shrb $7, %al
4353 ; SSE2-NEXT: movl %ecx, %edx
4354 ; SSE2-NEXT: andb $16, %dl
4355 ; SSE2-NEXT: shrb $4, %dl
4356 ; SSE2-NEXT: andb %al, %dl
4357 ; SSE2-NEXT: movl %ecx, %eax
4358 ; SSE2-NEXT: shrb %al
4359 ; SSE2-NEXT: andb %dl, %al
4360 ; SSE2-NEXT: andb %cl, %al
4363 ; AVX-LABEL: movmsk_v8i16:
4365 ; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
4366 ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
4367 ; AVX-NEXT: vpmovmskb %xmm0, %ecx
4368 ; AVX-NEXT: movl %ecx, %eax
4369 ; AVX-NEXT: shrb $7, %al
4370 ; AVX-NEXT: movl %ecx, %edx
4371 ; AVX-NEXT: andb $16, %dl
4372 ; AVX-NEXT: shrb $4, %dl
4373 ; AVX-NEXT: andb %al, %dl
4374 ; AVX-NEXT: movl %ecx, %eax
4375 ; AVX-NEXT: shrb %al
4376 ; AVX-NEXT: andb %dl, %al
4377 ; AVX-NEXT: andb %cl, %al
4380 ; KNL-LABEL: movmsk_v8i16:
4382 ; KNL-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
4383 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
4384 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
4385 ; KNL-NEXT: kshiftrw $4, %k0, %k1
4386 ; KNL-NEXT: kmovw %k1, %ecx
4387 ; KNL-NEXT: kshiftrw $7, %k0, %k1
4388 ; KNL-NEXT: kmovw %k1, %eax
4389 ; KNL-NEXT: kshiftrw $1, %k0, %k1
4390 ; KNL-NEXT: kmovw %k1, %edx
4391 ; KNL-NEXT: kmovw %k0, %esi
4392 ; KNL-NEXT: andb %cl, %al
4393 ; KNL-NEXT: andb %dl, %al
4394 ; KNL-NEXT: andb %sil, %al
4395 ; KNL-NEXT: # kill: def $al killed $al killed $eax
4396 ; KNL-NEXT: vzeroupper
4399 ; SKX-LABEL: movmsk_v8i16:
4401 ; SKX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
4402 ; SKX-NEXT: kshiftrb $4, %k0, %k1
4403 ; SKX-NEXT: kmovd %k1, %ecx
4404 ; SKX-NEXT: kshiftrb $7, %k0, %k1
4405 ; SKX-NEXT: kmovd %k1, %eax
4406 ; SKX-NEXT: kshiftrb $1, %k0, %k1
4407 ; SKX-NEXT: kmovd %k1, %edx
4408 ; SKX-NEXT: kmovd %k0, %esi
4409 ; SKX-NEXT: andb %cl, %al
4410 ; SKX-NEXT: andb %dl, %al
4411 ; SKX-NEXT: andb %sil, %al
4412 ; SKX-NEXT: # kill: def $al killed $al killed $eax
4414 %cmp = icmp sgt <8 x i16> %x, %y
4415 %e1 = extractelement <8 x i1> %cmp, i32 0
4416 %e2 = extractelement <8 x i1> %cmp, i32 1
4417 %e3 = extractelement <8 x i1> %cmp, i32 7
4418 %e4 = extractelement <8 x i1> %cmp, i32 4
4419 %u1 = and i1 %e1, %e2
4420 %u2 = and i1 %e3, %e4
4421 %u3 = and i1 %u1, %u2
4425 ; TODO: Replace shift+mask chain with AND+CMP.
4426 define i1 @movmsk_v4i32(<4 x i32> %x, <4 x i32> %y) {
4427 ; SSE2-LABEL: movmsk_v4i32:
4429 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
4430 ; SSE2-NEXT: movmskps %xmm1, %eax
4431 ; SSE2-NEXT: movl %eax, %ecx
4432 ; SSE2-NEXT: shrb $3, %cl
4433 ; SSE2-NEXT: andb $4, %al
4434 ; SSE2-NEXT: shrb $2, %al
4435 ; SSE2-NEXT: xorb %cl, %al
4436 ; SSE2-NEXT: # kill: def $al killed $al killed $eax
4439 ; AVX-LABEL: movmsk_v4i32:
4441 ; AVX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
4442 ; AVX-NEXT: vmovmskps %xmm0, %eax
4443 ; AVX-NEXT: movl %eax, %ecx
4444 ; AVX-NEXT: shrb $3, %cl
4445 ; AVX-NEXT: andb $4, %al
4446 ; AVX-NEXT: shrb $2, %al
4447 ; AVX-NEXT: xorb %cl, %al
4448 ; AVX-NEXT: # kill: def $al killed $al killed $eax
4451 ; KNL-LABEL: movmsk_v4i32:
4453 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
4454 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4455 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
4456 ; KNL-NEXT: kshiftrw $3, %k0, %k1
4457 ; KNL-NEXT: kmovw %k1, %ecx
4458 ; KNL-NEXT: kshiftrw $2, %k0, %k0
4459 ; KNL-NEXT: kmovw %k0, %eax
4460 ; KNL-NEXT: xorb %cl, %al
4461 ; KNL-NEXT: # kill: def $al killed $al killed $eax
4462 ; KNL-NEXT: vzeroupper
4465 ; SKX-LABEL: movmsk_v4i32:
4467 ; SKX-NEXT: vpcmpgtd %xmm0, %xmm1, %k0
4468 ; SKX-NEXT: kshiftrb $3, %k0, %k1
4469 ; SKX-NEXT: kmovd %k1, %ecx
4470 ; SKX-NEXT: kshiftrb $2, %k0, %k0
4471 ; SKX-NEXT: kmovd %k0, %eax
4472 ; SKX-NEXT: xorb %cl, %al
4473 ; SKX-NEXT: # kill: def $al killed $al killed $eax
4475 %cmp = icmp slt <4 x i32> %x, %y
4476 %e1 = extractelement <4 x i1> %cmp, i32 2
4477 %e2 = extractelement <4 x i1> %cmp, i32 3
4478 %u1 = xor i1 %e1, %e2
4482 define i1 @movmsk_v2i64(<2 x i64> %x, <2 x i64> %y) {
4483 ; SSE2-LABEL: movmsk_v2i64:
4485 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
4486 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
4487 ; SSE2-NEXT: pand %xmm0, %xmm1
4488 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
4489 ; SSE2-NEXT: pxor %xmm1, %xmm0
4490 ; SSE2-NEXT: movmskpd %xmm0, %ecx
4491 ; SSE2-NEXT: movl %ecx, %eax
4492 ; SSE2-NEXT: shrb %al
4493 ; SSE2-NEXT: andb %cl, %al
4496 ; AVX-LABEL: movmsk_v2i64:
4498 ; AVX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
4499 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
4500 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
4501 ; AVX-NEXT: vmovmskpd %xmm0, %ecx
4502 ; AVX-NEXT: movl %ecx, %eax
4503 ; AVX-NEXT: shrb %al
4504 ; AVX-NEXT: andb %cl, %al
4507 ; KNL-LABEL: movmsk_v2i64:
4509 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
4510 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4511 ; KNL-NEXT: vpcmpneqq %zmm1, %zmm0, %k0
4512 ; KNL-NEXT: kshiftrw $1, %k0, %k1
4513 ; KNL-NEXT: kmovw %k1, %ecx
4514 ; KNL-NEXT: kmovw %k0, %eax
4515 ; KNL-NEXT: andb %cl, %al
4516 ; KNL-NEXT: # kill: def $al killed $al killed $eax
4517 ; KNL-NEXT: vzeroupper
4520 ; SKX-LABEL: movmsk_v2i64:
4522 ; SKX-NEXT: vpcmpneqq %xmm1, %xmm0, %k0
4523 ; SKX-NEXT: kshiftrb $1, %k0, %k1
4524 ; SKX-NEXT: kmovd %k1, %ecx
4525 ; SKX-NEXT: kmovd %k0, %eax
4526 ; SKX-NEXT: andb %cl, %al
4527 ; SKX-NEXT: # kill: def $al killed $al killed $eax
4529 %cmp = icmp ne <2 x i64> %x, %y
4530 %e1 = extractelement <2 x i1> %cmp, i32 0
4531 %e2 = extractelement <2 x i1> %cmp, i32 1
4532 %u1 = and i1 %e1, %e2
4536 define i1 @movmsk_v4f32(<4 x float> %x, <4 x float> %y) {
4537 ; SSE2-LABEL: movmsk_v4f32:
4539 ; SSE2-NEXT: movaps %xmm0, %xmm2
4540 ; SSE2-NEXT: cmpeqps %xmm1, %xmm2
4541 ; SSE2-NEXT: cmpunordps %xmm1, %xmm0
4542 ; SSE2-NEXT: orps %xmm2, %xmm0
4543 ; SSE2-NEXT: movmskps %xmm0, %eax
4544 ; SSE2-NEXT: testb $14, %al
4545 ; SSE2-NEXT: setne %al
4548 ; AVX-LABEL: movmsk_v4f32:
4550 ; AVX-NEXT: vcmpeq_uqps %xmm1, %xmm0, %xmm0
4551 ; AVX-NEXT: vmovmskps %xmm0, %eax
4552 ; AVX-NEXT: testb $14, %al
4553 ; AVX-NEXT: setne %al
4556 ; KNL-LABEL: movmsk_v4f32:
4558 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
4559 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4560 ; KNL-NEXT: vcmpeq_uqps %zmm1, %zmm0, %k0
4561 ; KNL-NEXT: kshiftrw $3, %k0, %k1
4562 ; KNL-NEXT: kmovw %k1, %ecx
4563 ; KNL-NEXT: kshiftrw $2, %k0, %k1
4564 ; KNL-NEXT: kmovw %k1, %eax
4565 ; KNL-NEXT: kshiftrw $1, %k0, %k0
4566 ; KNL-NEXT: kmovw %k0, %edx
4567 ; KNL-NEXT: orb %cl, %al
4568 ; KNL-NEXT: orb %dl, %al
4569 ; KNL-NEXT: # kill: def $al killed $al killed $eax
4570 ; KNL-NEXT: vzeroupper
4573 ; SKX-LABEL: movmsk_v4f32:
4575 ; SKX-NEXT: vcmpeq_uqps %xmm1, %xmm0, %k0
4576 ; SKX-NEXT: kshiftrb $3, %k0, %k1
4577 ; SKX-NEXT: kmovd %k1, %ecx
4578 ; SKX-NEXT: kshiftrb $2, %k0, %k1
4579 ; SKX-NEXT: kmovd %k1, %eax
4580 ; SKX-NEXT: kshiftrb $1, %k0, %k0
4581 ; SKX-NEXT: kmovd %k0, %edx
4582 ; SKX-NEXT: orb %cl, %al
4583 ; SKX-NEXT: orb %dl, %al
4584 ; SKX-NEXT: # kill: def $al killed $al killed $eax
4586 %cmp = fcmp ueq <4 x float> %x, %y
4587 %e1 = extractelement <4 x i1> %cmp, i32 1
4588 %e2 = extractelement <4 x i1> %cmp, i32 2
4589 %e3 = extractelement <4 x i1> %cmp, i32 3
4590 %u1 = or i1 %e1, %e2
4591 %u2 = or i1 %u1, %e3
4595 define i1 @movmsk_v2f64(<2 x double> %x, <2 x double> %y) {
4596 ; SSE2-LABEL: movmsk_v2f64:
4598 ; SSE2-NEXT: cmplepd %xmm0, %xmm1
4599 ; SSE2-NEXT: movmskpd %xmm1, %ecx
4600 ; SSE2-NEXT: movl %ecx, %eax
4601 ; SSE2-NEXT: shrb %al
4602 ; SSE2-NEXT: andb %cl, %al
4605 ; AVX-LABEL: movmsk_v2f64:
4607 ; AVX-NEXT: vcmplepd %xmm0, %xmm1, %xmm0
4608 ; AVX-NEXT: vmovmskpd %xmm0, %ecx
4609 ; AVX-NEXT: movl %ecx, %eax
4610 ; AVX-NEXT: shrb %al
4611 ; AVX-NEXT: andb %cl, %al
4614 ; KNL-LABEL: movmsk_v2f64:
4616 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
4617 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4618 ; KNL-NEXT: vcmplepd %zmm0, %zmm1, %k0
4619 ; KNL-NEXT: kshiftrw $1, %k0, %k1
4620 ; KNL-NEXT: kmovw %k1, %ecx
4621 ; KNL-NEXT: kmovw %k0, %eax
4622 ; KNL-NEXT: andb %cl, %al
4623 ; KNL-NEXT: # kill: def $al killed $al killed $eax
4624 ; KNL-NEXT: vzeroupper
4627 ; SKX-LABEL: movmsk_v2f64:
4629 ; SKX-NEXT: vcmplepd %xmm0, %xmm1, %k0
4630 ; SKX-NEXT: kshiftrb $1, %k0, %k1
4631 ; SKX-NEXT: kmovd %k1, %ecx
4632 ; SKX-NEXT: kmovd %k0, %eax
4633 ; SKX-NEXT: andb %cl, %al
4634 ; SKX-NEXT: # kill: def $al killed $al killed $eax
4636 %cmp = fcmp oge <2 x double> %x, %y
4637 %e1 = extractelement <2 x i1> %cmp, i32 0
4638 %e2 = extractelement <2 x i1> %cmp, i32 1
4639 %u1 = and i1 %e1, %e2
4643 define i32 @PR39665_c_ray(<2 x double> %x, <2 x double> %y) {
4644 ; SSE2-LABEL: PR39665_c_ray:
4646 ; SSE2-NEXT: cmpltpd %xmm0, %xmm1
4647 ; SSE2-NEXT: movmskpd %xmm1, %eax
4648 ; SSE2-NEXT: cmpb $3, %al
4649 ; SSE2-NEXT: movl $42, %ecx
4650 ; SSE2-NEXT: movl $99, %eax
4651 ; SSE2-NEXT: cmovel %ecx, %eax
4654 ; AVX-LABEL: PR39665_c_ray:
4656 ; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
4657 ; AVX-NEXT: vmovmskpd %xmm0, %eax
4658 ; AVX-NEXT: cmpb $3, %al
4659 ; AVX-NEXT: movl $42, %ecx
4660 ; AVX-NEXT: movl $99, %eax
4661 ; AVX-NEXT: cmovel %ecx, %eax
4664 ; KNL-LABEL: PR39665_c_ray:
4666 ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
4667 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4668 ; KNL-NEXT: vcmpltpd %zmm0, %zmm1, %k0
4669 ; KNL-NEXT: kshiftrw $1, %k0, %k1
4670 ; KNL-NEXT: kmovw %k1, %eax
4671 ; KNL-NEXT: kmovw %k0, %ecx
4672 ; KNL-NEXT: testb $1, %al
4673 ; KNL-NEXT: movl $42, %eax
4674 ; KNL-NEXT: movl $99, %edx
4675 ; KNL-NEXT: cmovel %edx, %eax
4676 ; KNL-NEXT: testb $1, %cl
4677 ; KNL-NEXT: cmovel %edx, %eax
4678 ; KNL-NEXT: vzeroupper
4681 ; SKX-LABEL: PR39665_c_ray:
4683 ; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %k0
4684 ; SKX-NEXT: kshiftrb $1, %k0, %k1
4685 ; SKX-NEXT: kmovd %k1, %eax
4686 ; SKX-NEXT: kmovd %k0, %ecx
4687 ; SKX-NEXT: testb $1, %al
4688 ; SKX-NEXT: movl $42, %eax
4689 ; SKX-NEXT: movl $99, %edx
4690 ; SKX-NEXT: cmovel %edx, %eax
4691 ; SKX-NEXT: testb $1, %cl
4692 ; SKX-NEXT: cmovel %edx, %eax
4694 %cmp = fcmp ogt <2 x double> %x, %y
4695 %e1 = extractelement <2 x i1> %cmp, i32 0
4696 %e2 = extractelement <2 x i1> %cmp, i32 1
4697 %u = and i1 %e1, %e2
4698 %r = select i1 %u, i32 42, i32 99