// Returns true if this masked compare can be implemented legally with this
// type.
static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) {
- if (N->getOpcode() == X86ISD::PCMPEQM ||
- N->getOpcode() == X86ISD::PCMPGTM ||
- N->getOpcode() == X86ISD::CMPM ||
- N->getOpcode() == X86ISD::CMPMU) {
+ unsigned Opcode = N->getOpcode();
+ if (Opcode == X86ISD::PCMPEQM || Opcode == X86ISD::PCMPGTM ||
+ Opcode == X86ISD::CMPM || Opcode == X86ISD::TESTM ||
+ Opcode == X86ISD::TESTNM || Opcode == X86ISD::CMPMU) {
// We can get 256-bit 8 element types here without VLX being enabled. When
// this happens we will use 512-bit operations and the mask will not be
// zero extended.
switch (Opcode) {
default:
return false;
+ case X86ISD::TESTM:
+ case X86ISD::TESTNM:
case X86ISD::PCMPEQM:
case X86ISD::PCMPGTM:
case X86ISD::CMPM:
if (Swap)
std::swap(Op0, Op1);
+
+ // See if it is the case of CMP(EQ|NEQ,AND(A,B),ZERO) and change it to TESTM|NM.
+ if ((!Opc && SSECC == 4) || Opc == X86ISD::PCMPEQM) {
+ SDValue A = peekThroughBitcasts(Op0);
+ if ((A.getOpcode() == ISD::AND || A.getOpcode() == X86ISD::FAND) &&
+ ISD::isBuildVectorAllZeros(Op1.getNode())) {
+ MVT VT0 = Op0.getSimpleValueType();
+ SDValue RHS = DAG.getBitcast(VT0, A.getOperand(0));
+ SDValue LHS = DAG.getBitcast(VT0, A.getOperand(1));
+ return DAG.getNode(Opc == X86ISD::PCMPEQM ? X86ISD::TESTNM : X86ISD::TESTM,
+ dl, VT, RHS, LHS);
+ }
+ }
+
if (Opc)
return DAG.getNode(Opc, dl, VT, Op0, Op1);
Opc = Unsigned ? X86ISD::CMPMU: X86ISD::CMPM;
; CHECK: # BB#0:
; CHECK-NEXT: vpslld $31, %xmm0, %xmm0
; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k0
-; CHECK-NEXT: kshiftlb $4, %k0, %k0
-; CHECK-NEXT: kshiftrb $4, %k0, %k0
; CHECK-NEXT: vpmovm2w %k0, %xmm0
; CHECK-NEXT: retq
define zeroext i32 @TEST_mm512_test_epi16_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm512_test_epi16_mask:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vpcmpneqw %zmm1, %zmm0, %k0
+; CHECK-NEXT: vptestmw %zmm0, %zmm1, %k0
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
define zeroext i64 @TEST_mm512_test_epi8_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm512_test_epi8_mask:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vpcmpneqb %zmm1, %zmm0, %k0
+; CHECK-NEXT: vptestmb %zmm0, %zmm1, %k0
; CHECK-NEXT: kmovq %k0, %rax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
define zeroext i32 @TEST_mm512_mask_test_epi16_mask(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm512_mask_test_epi16_mask:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: vptestmw %zmm0, %zmm1, %k0 {%k1}
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
define zeroext i64 @TEST_mm512_mask_test_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm512_mask_test_epi8_mask:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-NEXT: kmovq %rdi, %k1
-; CHECK-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: vptestmb %zmm0, %zmm1, %k0 {%k1}
; CHECK-NEXT: kmovq %k0, %rax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
define zeroext i32 @TEST_mm512_testn_epi16_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm512_testn_epi16_mask:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
+; CHECK-NEXT: vptestnmw %zmm0, %zmm1, %k0
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
define zeroext i64 @TEST_mm512_testn_epi8_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm512_testn_epi8_mask:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
+; CHECK-NEXT: vptestnmb %zmm0, %zmm1, %k0
; CHECK-NEXT: kmovq %k0, %rax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
define zeroext i32 @TEST_mm512_mask_testn_epi16_mask(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm512_mask_testn_epi16_mask:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: vptestnmw %zmm0, %zmm1, %k0 {%k1}
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
define zeroext i64 @TEST_mm512_mask_testn_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm512_mask_testn_epi8_mask:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-NEXT: kmovq %rdi, %k1
-; CHECK-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: vptestnmb %zmm0, %zmm1, %k0 {%k1}
; CHECK-NEXT: kmovq %k0, %rax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
define zeroext i16 @TEST_mm_test_epi8_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm_test_epi8_mask:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vpcmpneqb %xmm1, %xmm0, %k0
+; CHECK-NEXT: vptestmb %xmm0, %xmm1, %k0
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq
define zeroext i16 @TEST_mm_mask_test_epi8_mask(i16 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm_mask_test_epi8_mask:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpneqb %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: vptestmb %xmm0, %xmm1, %k0 {%k1}
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq
define zeroext i8 @TEST_mm_test_epi16_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm_test_epi16_mask:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vpcmpneqw %xmm1, %xmm0, %k0
+; CHECK-NEXT: vptestmw %xmm0, %xmm1, %k0
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq
define zeroext i8 @TEST_mm_mask_test_epi16_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm_mask_test_epi16_mask:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpneqw %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: vptestmw %xmm0, %xmm1, %k0 {%k1}
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq
define zeroext i16 @TEST_mm_testn_epi8_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm_testn_epi8_mask:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
+; CHECK-NEXT: vptestnmb %xmm0, %xmm1, %k0
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq
define zeroext i16 @TEST_mm_mask_testn_epi8_mask(i16 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm_mask_testn_epi8_mask:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: vptestnmb %xmm0, %xmm1, %k0 {%k1}
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq
define zeroext i8 @TEST_mm_testn_epi16_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm_testn_epi16_mask:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
+; CHECK-NEXT: vptestnmw %xmm0, %xmm1, %k0
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq
define zeroext i8 @TEST_mm_mask_testn_epi16_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm_mask_testn_epi16_mask:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: vptestnmw %xmm0, %xmm1, %k0 {%k1}
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq
define i32 @TEST_mm256_test_epi8_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm256_test_epi8_mask:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vpand %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0
+; CHECK-NEXT: vptestmb %ymm0, %ymm1, %k0
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
define i32 @TEST_mm256_mask_test_epi8_mask(i32 %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm256_mask_test_epi8_mask:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vpand %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1}
+; CHECK-NEXT: vptestmb %ymm0, %ymm1, %k0 {%k1}
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
define zeroext i16 @TEST_mm256_test_epi16_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm256_test_epi16_mask:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vpand %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vpcmpneqw %ymm1, %ymm0, %k0
+; CHECK-NEXT: vptestmw %ymm0, %ymm1, %k0
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: vzeroupper
define zeroext i16 @TEST_mm256_mask_test_epi16_mask(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm256_mask_test_epi16_mask:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vpand %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpneqw %ymm1, %ymm0, %k0 {%k1}
+; CHECK-NEXT: vptestmw %ymm0, %ymm1, %k0 {%k1}
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: vzeroupper
define i32 @TEST_mm256_testn_epi8_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm256_testn_epi8_mask:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vpand %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0
+; CHECK-NEXT: vptestnmb %ymm0, %ymm1, %k0
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
define i32 @TEST_mm256_mask_testn_epi8_mask(i32 %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm256_mask_testn_epi8_mask:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vpand %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1}
+; CHECK-NEXT: vptestnmb %ymm0, %ymm1, %k0 {%k1}
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
define zeroext i16 @TEST_mm256_testn_epi16_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm256_testn_epi16_mask:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vpand %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
+; CHECK-NEXT: vptestnmw %ymm0, %ymm1, %k0
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: vzeroupper
define zeroext i16 @TEST_mm256_mask_testn_epi16_mask(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm256_mask_testn_epi16_mask:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vpand %ymm0, %ymm1, %ymm0
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1}
+; CHECK-NEXT: vptestnmw %ymm0, %ymm1, %k0 {%k1}
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: vzeroupper
define zeroext i8 @TEST_mm512_test_epi64_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm512_test_epi64_mask:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k0
+; CHECK-NEXT: vptestmq %zmm0, %zmm1, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: vzeroupper
define zeroext i16 @TEST_mm512_test_epi32_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm512_test_epi32_mask:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
+; CHECK-NEXT: vptestmd %zmm0, %zmm1, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: vzeroupper
define zeroext i8 @TEST_mm512_mask_test_epi64_mask(i8 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm512_mask_test_epi64_mask:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: vptestmq %zmm0, %zmm1, %k0 {%k1}
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: vzeroupper
define zeroext i16 @TEST_mm512_mask_test_epi32_mask(i16 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm512_mask_test_epi32_mask:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: vptestmd %zmm0, %zmm1, %k0 {%k1}
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: vzeroupper
define zeroext i8 @TEST_mm512_testn_epi64_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm512_testn_epi64_mask:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
+; CHECK-NEXT: vptestnmq %zmm0, %zmm1, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: vzeroupper
define zeroext i16 @TEST_mm512_testn_epi32_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm512_testn_epi32_mask:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; CHECK-NEXT: vptestnmd %zmm0, %zmm1, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: vzeroupper
define zeroext i8 @TEST_mm512_mask_testn_epi64_mask(i8 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm512_mask_testn_epi64_mask:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: vptestnmq %zmm0, %zmm1, %k0 {%k1}
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: vzeroupper
define zeroext i16 @TEST_mm512_mask_testn_epi32_mask(i16 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm512_mask_testn_epi32_mask:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: vptestnmd %zmm0, %zmm1, %k0 {%k1}
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: vzeroupper
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
define zeroext i8 @TEST_mm_test_epi64_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 {
; X86_64-LABEL: TEST_mm_test_epi64_mask:
; X86_64: # BB#0: # %entry
-; X86_64-NEXT: vpand %xmm0, %xmm1, %xmm0
-; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; X86_64-NEXT: vpcmpneqq %xmm1, %xmm0, %k0
+; X86_64-NEXT: vptestmq %xmm0, %xmm1, %k0
; X86_64-NEXT: kmovw %k0, %eax
; X86_64-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X86_64-NEXT: retq
;
; I386-LABEL: TEST_mm_test_epi64_mask:
; I386: # BB#0: # %entry
-; I386-NEXT: vpand %xmm0, %xmm1, %xmm0
-; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; I386-NEXT: vpcmpneqq %xmm1, %xmm0, %k0
+; I386-NEXT: vptestmq %xmm0, %xmm1, %k0
; I386-NEXT: kmovw %k0, %eax
; I386-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; I386-NEXT: retl
define zeroext i8 @TEST_mm_test_epi32_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 {
; X86_64-LABEL: TEST_mm_test_epi32_mask:
; X86_64: # BB#0: # %entry
-; X86_64-NEXT: vpand %xmm0, %xmm1, %xmm0
-; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; X86_64-NEXT: vpcmpneqd %xmm1, %xmm0, %k0
+; X86_64-NEXT: vptestmd %xmm0, %xmm1, %k0
; X86_64-NEXT: kmovw %k0, %eax
; X86_64-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X86_64-NEXT: retq
;
; I386-LABEL: TEST_mm_test_epi32_mask:
; I386: # BB#0: # %entry
-; I386-NEXT: vpand %xmm0, %xmm1, %xmm0
-; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; I386-NEXT: vpcmpneqd %xmm1, %xmm0, %k0
+; I386-NEXT: vptestmd %xmm0, %xmm1, %k0
; I386-NEXT: kmovw %k0, %eax
; I386-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; I386-NEXT: retl
define zeroext i8 @TEST_mm256_test_epi64_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 {
; X86_64-LABEL: TEST_mm256_test_epi64_mask:
; X86_64: # BB#0: # %entry
-; X86_64-NEXT: vpand %ymm0, %ymm1, %ymm0
-; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; X86_64-NEXT: vpcmpneqq %ymm1, %ymm0, %k0
+; X86_64-NEXT: vptestmq %ymm0, %ymm1, %k0
; X86_64-NEXT: kmovw %k0, %eax
; X86_64-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X86_64-NEXT: vzeroupper
;
; I386-LABEL: TEST_mm256_test_epi64_mask:
; I386: # BB#0: # %entry
-; I386-NEXT: vpand %ymm0, %ymm1, %ymm0
-; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; I386-NEXT: vpcmpneqq %ymm1, %ymm0, %k0
+; I386-NEXT: vptestmq %ymm0, %ymm1, %k0
; I386-NEXT: kmovw %k0, %eax
; I386-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; I386-NEXT: vzeroupper
define zeroext i8 @TEST_mm256_test_epi32_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 {
; X86_64-LABEL: TEST_mm256_test_epi32_mask:
; X86_64: # BB#0: # %entry
-; X86_64-NEXT: vpand %ymm0, %ymm1, %ymm0
-; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; X86_64-NEXT: vpcmpneqd %ymm1, %ymm0, %k0
+; X86_64-NEXT: vptestmd %ymm0, %ymm1, %k0
; X86_64-NEXT: kmovw %k0, %eax
; X86_64-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X86_64-NEXT: vzeroupper
;
; I386-LABEL: TEST_mm256_test_epi32_mask:
; I386: # BB#0: # %entry
-; I386-NEXT: vpand %ymm0, %ymm1, %ymm0
-; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; I386-NEXT: vpcmpneqd %ymm1, %ymm0, %k0
+; I386-NEXT: vptestmd %ymm0, %ymm1, %k0
; I386-NEXT: kmovw %k0, %eax
; I386-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; I386-NEXT: vzeroupper
define zeroext i8 @TEST_mm_mask_test_epi64_mask(i8 %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 {
; X86_64-LABEL: TEST_mm_mask_test_epi64_mask:
; X86_64: # BB#0: # %entry
-; X86_64-NEXT: vpand %xmm0, %xmm1, %xmm0
-; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X86_64-NEXT: kmovw %edi, %k1
-; X86_64-NEXT: vpcmpneqq %xmm1, %xmm0, %k0 {%k1}
+; X86_64-NEXT: vptestmq %xmm0, %xmm1, %k0 {%k1}
; X86_64-NEXT: kmovw %k0, %eax
; X86_64-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X86_64-NEXT: retq
;
; I386-LABEL: TEST_mm_mask_test_epi64_mask:
; I386: # BB#0: # %entry
-; I386-NEXT: vpand %xmm0, %xmm1, %xmm0
-; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1
; I386-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; I386-NEXT: kmovw %eax, %k1
-; I386-NEXT: vpcmpneqq %xmm1, %xmm0, %k0 {%k1}
+; I386-NEXT: vptestmq %xmm0, %xmm1, %k0 {%k1}
; I386-NEXT: kmovw %k0, %eax
; I386-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; I386-NEXT: retl
define zeroext i8 @TEST_mm_mask_test_epi32_mask(i8 %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 {
; X86_64-LABEL: TEST_mm_mask_test_epi32_mask:
; X86_64: # BB#0: # %entry
-; X86_64-NEXT: vpand %xmm0, %xmm1, %xmm0
-; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X86_64-NEXT: kmovw %edi, %k1
-; X86_64-NEXT: vpcmpneqd %xmm1, %xmm0, %k0 {%k1}
+; X86_64-NEXT: vptestmd %xmm0, %xmm1, %k0 {%k1}
; X86_64-NEXT: kmovw %k0, %eax
; X86_64-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X86_64-NEXT: retq
;
; I386-LABEL: TEST_mm_mask_test_epi32_mask:
; I386: # BB#0: # %entry
-; I386-NEXT: vpand %xmm0, %xmm1, %xmm0
-; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1
; I386-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; I386-NEXT: kmovw %eax, %k1
-; I386-NEXT: vpcmpneqd %xmm1, %xmm0, %k0 {%k1}
+; I386-NEXT: vptestmd %xmm0, %xmm1, %k0 {%k1}
; I386-NEXT: kmovw %k0, %eax
; I386-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; I386-NEXT: retl
define zeroext i8 @TEST_mm256_mask_test_epi64_mask(i8 %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 {
; X86_64-LABEL: TEST_mm256_mask_test_epi64_mask:
; X86_64: # BB#0: # %entry
-; X86_64-NEXT: vpand %ymm0, %ymm1, %ymm0
-; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X86_64-NEXT: kmovw %edi, %k1
-; X86_64-NEXT: vpcmpneqq %ymm1, %ymm0, %k0 {%k1}
+; X86_64-NEXT: vptestmq %ymm0, %ymm1, %k0 {%k1}
; X86_64-NEXT: kmovw %k0, %eax
; X86_64-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X86_64-NEXT: vzeroupper
;
; I386-LABEL: TEST_mm256_mask_test_epi64_mask:
; I386: # BB#0: # %entry
-; I386-NEXT: vpand %ymm0, %ymm1, %ymm0
-; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1
; I386-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; I386-NEXT: kmovw %eax, %k1
-; I386-NEXT: vpcmpneqq %ymm1, %ymm0, %k0 {%k1}
+; I386-NEXT: vptestmq %ymm0, %ymm1, %k0 {%k1}
; I386-NEXT: kmovw %k0, %eax
; I386-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; I386-NEXT: vzeroupper
define zeroext i8 @TEST_mm256_mask_test_epi32_mask(i8 %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 {
; X86_64-LABEL: TEST_mm256_mask_test_epi32_mask:
; X86_64: # BB#0: # %entry
-; X86_64-NEXT: vpand %ymm0, %ymm1, %ymm0
-; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X86_64-NEXT: kmovw %edi, %k1
-; X86_64-NEXT: vpcmpneqd %ymm1, %ymm0, %k0 {%k1}
+; X86_64-NEXT: vptestmd %ymm0, %ymm1, %k0 {%k1}
; X86_64-NEXT: kmovw %k0, %eax
; X86_64-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X86_64-NEXT: vzeroupper
;
; I386-LABEL: TEST_mm256_mask_test_epi32_mask:
; I386: # BB#0: # %entry
-; I386-NEXT: vpand %ymm0, %ymm1, %ymm0
-; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1
; I386-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; I386-NEXT: kmovw %eax, %k1
-; I386-NEXT: vpcmpneqd %ymm1, %ymm0, %k0 {%k1}
+; I386-NEXT: vptestmd %ymm0, %ymm1, %k0 {%k1}
; I386-NEXT: kmovw %k0, %eax
; I386-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; I386-NEXT: vzeroupper
define zeroext i8 @TEST_mm_testn_epi64_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 {
; X86_64-LABEL: TEST_mm_testn_epi64_mask:
; X86_64: # BB#0: # %entry
-; X86_64-NEXT: vpand %xmm0, %xmm1, %xmm0
-; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; X86_64-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
+; X86_64-NEXT: vptestnmq %xmm0, %xmm1, %k0
; X86_64-NEXT: kmovw %k0, %eax
; X86_64-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X86_64-NEXT: retq
;
; I386-LABEL: TEST_mm_testn_epi64_mask:
; I386: # BB#0: # %entry
-; I386-NEXT: vpand %xmm0, %xmm1, %xmm0
-; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; I386-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
+; I386-NEXT: vptestnmq %xmm0, %xmm1, %k0
; I386-NEXT: kmovw %k0, %eax
; I386-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; I386-NEXT: retl
define zeroext i8 @TEST_mm_testn_epi32_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 {
; X86_64-LABEL: TEST_mm_testn_epi32_mask:
; X86_64: # BB#0: # %entry
-; X86_64-NEXT: vpand %xmm0, %xmm1, %xmm0
-; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; X86_64-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
+; X86_64-NEXT: vptestnmd %xmm0, %xmm1, %k0
; X86_64-NEXT: kmovw %k0, %eax
; X86_64-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X86_64-NEXT: retq
;
; I386-LABEL: TEST_mm_testn_epi32_mask:
; I386: # BB#0: # %entry
-; I386-NEXT: vpand %xmm0, %xmm1, %xmm0
-; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; I386-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
+; I386-NEXT: vptestnmd %xmm0, %xmm1, %k0
; I386-NEXT: kmovw %k0, %eax
; I386-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; I386-NEXT: retl
define zeroext i8 @TEST_mm256_testn_epi64_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 {
; X86_64-LABEL: TEST_mm256_testn_epi64_mask:
; X86_64: # BB#0: # %entry
-; X86_64-NEXT: vpand %ymm0, %ymm1, %ymm0
-; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; X86_64-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
+; X86_64-NEXT: vptestnmq %ymm0, %ymm1, %k0
; X86_64-NEXT: kmovw %k0, %eax
; X86_64-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X86_64-NEXT: vzeroupper
;
; I386-LABEL: TEST_mm256_testn_epi64_mask:
; I386: # BB#0: # %entry
-; I386-NEXT: vpand %ymm0, %ymm1, %ymm0
-; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; I386-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
+; I386-NEXT: vptestnmq %ymm0, %ymm1, %k0
; I386-NEXT: kmovw %k0, %eax
; I386-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; I386-NEXT: vzeroupper
define zeroext i8 @TEST_mm256_testn_epi32_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 {
; X86_64-LABEL: TEST_mm256_testn_epi32_mask:
; X86_64: # BB#0: # %entry
-; X86_64-NEXT: vpand %ymm0, %ymm1, %ymm0
-; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; X86_64-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
+; X86_64-NEXT: vptestnmd %ymm0, %ymm1, %k0
; X86_64-NEXT: kmovw %k0, %eax
; X86_64-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X86_64-NEXT: vzeroupper
;
; I386-LABEL: TEST_mm256_testn_epi32_mask:
; I386: # BB#0: # %entry
-; I386-NEXT: vpand %ymm0, %ymm1, %ymm0
-; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; I386-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
+; I386-NEXT: vptestnmd %ymm0, %ymm1, %k0
; I386-NEXT: kmovw %k0, %eax
; I386-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; I386-NEXT: vzeroupper
define zeroext i8 @TEST_mm_mask_testn_epi64_mask(i8 %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 {
; X86_64-LABEL: TEST_mm_mask_testn_epi64_mask:
; X86_64: # BB#0: # %entry
-; X86_64-NEXT: vpand %xmm0, %xmm1, %xmm0
-; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X86_64-NEXT: kmovw %edi, %k1
-; X86_64-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
+; X86_64-NEXT: vptestnmq %xmm0, %xmm1, %k0 {%k1}
; X86_64-NEXT: kmovw %k0, %eax
; X86_64-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X86_64-NEXT: retq
;
; I386-LABEL: TEST_mm_mask_testn_epi64_mask:
; I386: # BB#0: # %entry
-; I386-NEXT: vpand %xmm0, %xmm1, %xmm0
-; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1
; I386-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; I386-NEXT: kmovw %eax, %k1
-; I386-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
+; I386-NEXT: vptestnmq %xmm0, %xmm1, %k0 {%k1}
; I386-NEXT: kmovw %k0, %eax
; I386-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; I386-NEXT: retl
define zeroext i8 @TEST_mm_mask_testn_epi32_mask(i8 %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 {
; X86_64-LABEL: TEST_mm_mask_testn_epi32_mask:
; X86_64: # BB#0: # %entry
-; X86_64-NEXT: vpand %xmm0, %xmm1, %xmm0
-; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X86_64-NEXT: kmovw %edi, %k1
-; X86_64-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
+; X86_64-NEXT: vptestnmd %xmm0, %xmm1, %k0 {%k1}
; X86_64-NEXT: kmovw %k0, %eax
; X86_64-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X86_64-NEXT: retq
;
; I386-LABEL: TEST_mm_mask_testn_epi32_mask:
; I386: # BB#0: # %entry
-; I386-NEXT: vpand %xmm0, %xmm1, %xmm0
-; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1
; I386-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; I386-NEXT: kmovw %eax, %k1
-; I386-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
+; I386-NEXT: vptestnmd %xmm0, %xmm1, %k0 {%k1}
; I386-NEXT: kmovw %k0, %eax
; I386-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; I386-NEXT: retl
define zeroext i8 @TEST_mm256_mask_testn_epi64_mask(i8 %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 {
; X86_64-LABEL: TEST_mm256_mask_testn_epi64_mask:
; X86_64: # BB#0: # %entry
-; X86_64-NEXT: vpand %ymm0, %ymm1, %ymm0
-; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X86_64-NEXT: kmovw %edi, %k1
-; X86_64-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
+; X86_64-NEXT: vptestnmq %ymm0, %ymm1, %k0 {%k1}
; X86_64-NEXT: kmovw %k0, %eax
; X86_64-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X86_64-NEXT: vzeroupper
;
; I386-LABEL: TEST_mm256_mask_testn_epi64_mask:
; I386: # BB#0: # %entry
-; I386-NEXT: vpand %ymm0, %ymm1, %ymm0
-; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1
; I386-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; I386-NEXT: kmovw %eax, %k1
-; I386-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
+; I386-NEXT: vptestnmq %ymm0, %ymm1, %k0 {%k1}
; I386-NEXT: kmovw %k0, %eax
; I386-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; I386-NEXT: vzeroupper
define zeroext i8 @TEST_mm256_mask_testn_epi32_mask(i8 %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 {
; X86_64-LABEL: TEST_mm256_mask_testn_epi32_mask:
; X86_64: # BB#0: # %entry
-; X86_64-NEXT: vpand %ymm0, %ymm1, %ymm0
-; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X86_64-NEXT: kmovw %edi, %k1
-; X86_64-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
+; X86_64-NEXT: vptestnmd %ymm0, %ymm1, %k0 {%k1}
; X86_64-NEXT: kmovw %k0, %eax
; X86_64-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X86_64-NEXT: vzeroupper
;
; I386-LABEL: TEST_mm256_mask_testn_epi32_mask:
; I386: # BB#0: # %entry
-; I386-NEXT: vpand %ymm0, %ymm1, %ymm0
-; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1
; I386-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; I386-NEXT: kmovw %eax, %k1
-; I386-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
+; I386-NEXT: vptestnmd %ymm0, %ymm1, %k0 {%k1}
; I386-NEXT: kmovw %k0, %eax
; I386-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; I386-NEXT: vzeroupper
; KNL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
-; KNL-NEXT: vptestmq %zmm1, %zmm1, %k0
-; KNL-NEXT: kshiftlw $8, %k0, %k0
-; KNL-NEXT: kshiftrw $8, %k0, %k1
+; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL-NEXT: vcompressps %zmm0, (%rdi) {%k1}
; KNL-NEXT: retq
call void @llvm.masked.compressstore.v8f32(<8 x float> %V, float* %base, <8 x i1> %mask)
; SKX: # BB#0:
; SKX-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
; SKX-NEXT: vpsllq $63, %xmm2, %xmm2
-; SKX-NEXT: vptestmq %xmm2, %xmm2, %k0
-; SKX-NEXT: kshiftlb $6, %k0, %k0
-; SKX-NEXT: kshiftrb $6, %k0, %k1
+; SKX-NEXT: vptestmq %xmm2, %xmm2, %k1
; SKX-NEXT: vscatterqps %xmm0, (,%ymm1) {%k1}
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; SKX_32: # BB#0:
; SKX_32-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,2,3]
; SKX_32-NEXT: vpsllq $63, %xmm2, %xmm2
-; SKX_32-NEXT: vptestmq %xmm2, %xmm2, %k0
-; SKX_32-NEXT: kshiftlb $6, %k0, %k0
-; SKX_32-NEXT: kshiftrb $6, %k0, %k1
+; SKX_32-NEXT: vptestmq %xmm2, %xmm2, %k1
; SKX_32-NEXT: vscatterdps %xmm0, (,%xmm1) {%k1}
; SKX_32-NEXT: retl
call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> %a1, <2 x float*> %ptr, i32 4, <2 x i1> %mask)
; SKX: # BB#0:
; SKX-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
; SKX-NEXT: vpsllq $63, %xmm2, %xmm2
-; SKX-NEXT: vptestmq %xmm2, %xmm2, %k0
-; SKX-NEXT: kshiftlb $6, %k0, %k0
-; SKX-NEXT: kshiftrb $6, %k0, %k1
+; SKX-NEXT: vptestmq %xmm2, %xmm2, %k1
; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SKX-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1}
; SKX-NEXT: vzeroupper
; SKX_32: # BB#0:
; SKX_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
; SKX_32-NEXT: vpsllq $63, %xmm2, %xmm2
-; SKX_32-NEXT: vptestmq %xmm2, %xmm2, %k0
-; SKX_32-NEXT: kshiftlb $6, %k0, %k0
-; SKX_32-NEXT: kshiftrb $6, %k0, %k1
+; SKX_32-NEXT: vptestmq %xmm2, %xmm2, %k1
; SKX_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SKX_32-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1}
; SKX_32-NEXT: vzeroupper
; SKX: # BB#0:
; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SKX-NEXT: vpsllq $63, %xmm1, %xmm1
-; SKX-NEXT: vptestmq %xmm1, %xmm1, %k0
-; SKX-NEXT: kshiftlb $6, %k0, %k0
-; SKX-NEXT: kshiftrb $6, %k0, %k1
+; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1
; SKX-NEXT: vgatherdps (%rdi,%xmm0,4), %xmm2 {%k1}
; SKX-NEXT: vmovaps %xmm2, %xmm0
; SKX-NEXT: retq
; SKX_32: # BB#0:
; SKX_32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1
-; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k0
-; SKX_32-NEXT: kshiftlb $6, %k0, %k0
-; SKX_32-NEXT: kshiftrb $6, %k0, %k1
+; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; SKX_32-NEXT: vgatherdps (%eax,%xmm0,4), %xmm2 {%k1}
; SKX_32-NEXT: vmovaps %xmm2, %xmm0
; AVX512F-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0
; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
-; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
-; AVX512F-NEXT: kshiftlw $8, %k0, %k0
-; AVX512F-NEXT: kshiftrw $8, %k0, %k1
+; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1
; AVX512F-NEXT: vpblendmd (%rdi), %zmm1, %zmm0 {%k1}
; AVX512F-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512F-NEXT: retq
; AVX512F: ## BB#0:
; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0
; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
-; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
-; AVX512F-NEXT: kshiftlw $8, %k0, %k0
-; AVX512F-NEXT: kshiftrw $8, %k0, %k1
+; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1
; AVX512F-NEXT: vmovups (%rdi), %zmm0 {%k1} {z}
; AVX512F-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512F-NEXT: retq
; AVX512F: ## BB#0:
; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0
; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
-; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
-; AVX512F-NEXT: kshiftlw $8, %k0, %k0
-; AVX512F-NEXT: kshiftrw $8, %k0, %k1
+; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1
; AVX512F-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z}
; AVX512F-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512F-NEXT: retq
;
; KNL-32-LABEL: pr25080:
; KNL-32: # BB#0: # %entry
-; KNL-32-NEXT: vpbroadcastd {{.*#+}} ymm1 = [8388607,8388607,8388607,8388607,8388607,8388607,8388607,8388607]
-; KNL-32-NEXT: vpand %ymm1, %ymm0, %ymm0
-; KNL-32-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; KNL-32-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; KNL-32-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; KNL-32-NEXT: vbroadcastss {{.*#+}} ymm1 = [8388607,8388607,8388607,8388607,8388607,8388607,8388607,8388607]
+; KNL-32-NEXT: vptestnmd %zmm1, %zmm0, %k0
; KNL-32-NEXT: movb $15, %al
; KNL-32-NEXT: kmovw %eax, %k1
; KNL-32-NEXT: korw %k1, %k0, %k1