From: Craig Topper Date: Fri, 8 Dec 2017 00:16:09 +0000 (+0000) Subject: [X86] Handle alls version of vXi1 insert_vector_elt with a constant index without... X-Git-Tag: android-x86-7.1-r4~7585 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=b04a69212cba7a3b64c322af211aa9afde0eceb3;p=android-x86%2Fexternal-llvm.git [X86] Handle alls version of vXi1 insert_vector_elt with a constant index without falling back to shuffles. We previously only supported inserting to the LSB or MSB where it was easy to zero to perform an OR to insert. This change effectively extracts the old value and the new value, xors them together and then xors that single bit with the correct location in the original vector. This will cancel out the old value in the first xor leaving the new value in the position. The way I've implemented this uses 3 shifts and two xors and uses an additional register. We can avoid the additional register at the cost of another shift. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@320120 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index da3e319f55f..6b2a43db570 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -14699,21 +14699,14 @@ static SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG, // If the kshift instructions of the correct width aren't natively supported // then we need to promote the vector to the native size to get the correct // zeroing behavior. - bool HasNativeShift = true; if ((!Subtarget.hasDQI() && NumElems == 8) || (NumElems < 8)) { - HasNativeShift = false; - // For now don't do this if we are going to end up using the shuffle - // below. This minimizes test diffs. - // TODO: Remove this restriction once we no longer need a shuffle fallback. - if (Vec.isUndef() || IdxVal == 0) { - // Need to promote to v16i1, do the insert, then extract back. - Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1, - DAG.getUNDEF(MVT::v16i1), Vec, - DAG.getIntPtrConstant(0, dl)); - Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v16i1, Vec, Elt, Idx); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VecVT, Op, - DAG.getIntPtrConstant(0, dl)); - } + // Need to promote to v16i1, do the insert, then extract back. + Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1, + DAG.getUNDEF(MVT::v16i1), Vec, + DAG.getIntPtrConstant(0, dl)); + Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v16i1, Vec, Elt, Idx); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VecVT, Op, + DAG.getIntPtrConstant(0, dl)); } SDValue EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Elt); @@ -14741,7 +14734,7 @@ static SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG, return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec); } // Insertion of one bit into last position - if (HasNativeShift && IdxVal == NumElems - 1) { + if (IdxVal == NumElems - 1) { // Move the bit to the last position inside the vector. EltInVec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, EltInVec, DAG.getConstant(IdxVal, dl, MVT::i8)); @@ -14754,12 +14747,20 @@ static SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG, return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec); } - // Use shuffle to insert element. - SmallVector MaskVec(NumElems); - for (unsigned i = 0; i != NumElems; ++i) - MaskVec[i] = (i == IdxVal) ? NumElems : i; - - return DAG.getVectorShuffle(VecVT, dl, Vec, EltInVec, MaskVec); + // Move the current value of the bit to be replace to bit 0. + SDValue Merged = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec, + DAG.getConstant(IdxVal, dl, MVT::i8)); + // Xor with the new bit. + Merged = DAG.getNode(ISD::XOR, dl, VecVT, Merged, EltInVec); + // Shift to MSB, filling bottom bits with 0. + Merged = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, Merged, + DAG.getConstant(NumElems - 1, dl, MVT::i8)); + // Shift to the final position, filling upper bits with 0. + Merged = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Merged, + DAG.getConstant(NumElems - 1 - IdxVal, dl, MVT::i8)); + // Xor with original vector to cancel out the original bit value that's still + // present. + return DAG.getNode(ISD::XOR, dl, VecVT, Merged, Vec); } SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, diff --git a/test/CodeGen/X86/avx512-insert-extract.ll b/test/CodeGen/X86/avx512-insert-extract.ll index a5abfe0cad2..68a75d62d66 100644 --- a/test/CodeGen/X86/avx512-insert-extract.ll +++ b/test/CodeGen/X86/avx512-insert-extract.ll @@ -309,31 +309,28 @@ define i16 @test16(i1 *%addr, i16 %a) { ; KNL-LABEL: test16: ; KNL: ## %bb.0: ; KNL-NEXT: movb (%rdi), %al -; KNL-NEXT: kmovw %esi, %k1 -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} -; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; KNL-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,7,8,9,16,11,12,13,14,15] -; KNL-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; KNL-NEXT: vpslld $31, %zmm2, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 +; KNL-NEXT: kmovw %esi, %k0 +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kshiftrw $10, %k0, %k2 +; KNL-NEXT: kxorw %k1, %k2, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $5, %k1, %k1 +; KNL-NEXT: kxorw %k0, %k1, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: ## kill: def %ax killed %ax killed %eax -; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test16: ; SKX: ## %bb.0: ; SKX-NEXT: kmovb (%rdi), %k0 ; SKX-NEXT: kmovd %esi, %k1 -; SKX-NEXT: vpmovm2d %k0, %zmm0 -; SKX-NEXT: vpmovm2d %k1, %zmm1 -; SKX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,7,8,9,16,11,12,13,14,15] -; SKX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; SKX-NEXT: vpmovd2m %zmm2, %k0 +; SKX-NEXT: kshiftrw $10, %k1, %k2 +; SKX-NEXT: kxorw %k0, %k2, %k0 +; SKX-NEXT: kshiftlw $15, %k0, %k0 +; SKX-NEXT: kshiftrw $5, %k0, %k0 +; SKX-NEXT: kxorw %k1, %k0, %k0 ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: ## kill: def %ax killed %ax killed %eax -; SKX-NEXT: vzeroupper ; SKX-NEXT: retq %x = load i1 , i1 * %addr, align 128 %a1 = bitcast i16 %a to <16 x i1> @@ -346,31 +343,28 @@ define i8 @test17(i1 *%addr, i8 %a) { ; KNL-LABEL: test17: ; KNL: ## %bb.0: ; KNL-NEXT: movb (%rdi), %al -; KNL-NEXT: kmovw %esi, %k1 -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z} -; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; KNL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,8,5,6,7] -; KNL-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; KNL-NEXT: vpsllq $63, %zmm2, %zmm0 -; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 +; KNL-NEXT: kmovw %esi, %k0 +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kshiftrw $4, %k0, %k2 +; KNL-NEXT: kxorw %k1, %k2, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $11, %k1, %k1 +; KNL-NEXT: kxorw %k0, %k1, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: ## kill: def %al killed %al killed %eax -; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test17: ; SKX: ## %bb.0: ; SKX-NEXT: kmovb (%rdi), %k0 ; SKX-NEXT: kmovd %esi, %k1 -; SKX-NEXT: vpmovm2q %k0, %zmm0 -; SKX-NEXT: vpmovm2q %k1, %zmm1 -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,8,5,6,7] -; SKX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; SKX-NEXT: vpmovq2m %zmm2, %k0 +; SKX-NEXT: kshiftrb $4, %k1, %k2 +; SKX-NEXT: kxorb %k0, %k2, %k0 +; SKX-NEXT: kshiftlb $7, %k0, %k0 +; SKX-NEXT: kshiftrb $3, %k0, %k0 +; SKX-NEXT: kxorb %k1, %k0, %k0 ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: ## kill: def %al killed %al killed %eax -; SKX-NEXT: vzeroupper ; SKX-NEXT: retq %x = load i1 , i1 * %addr, align 128 %a1 = bitcast i8 %a to <8 x i1> @@ -962,12 +956,12 @@ define i32 @test_insertelement_v32i1(i32 %a, i32 %b, <32 x i32> %x , <32 x i32> ; SKX-NEXT: vpcmpltud %zmm2, %zmm0, %k0 ; SKX-NEXT: vpcmpltud %zmm3, %zmm1, %k1 ; SKX-NEXT: kunpckwd %k0, %k1, %k0 -; SKX-NEXT: vpmovm2w %k0, %zmm0 -; SKX-NEXT: kmovd %eax, %k0 -; SKX-NEXT: vpmovm2w %k0, %zmm1 -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,32,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31] -; SKX-NEXT: vpermi2w %zmm1, %zmm0, %zmm2 -; SKX-NEXT: vpmovw2m %zmm2, %k0 +; SKX-NEXT: kshiftrd $4, %k0, %k1 +; SKX-NEXT: kmovd %eax, %k2 +; SKX-NEXT: kxord %k2, %k1, %k1 +; SKX-NEXT: kshiftld $31, %k1, %k1 +; SKX-NEXT: kshiftrd $27, %k1, %k1 +; SKX-NEXT: kxord %k0, %k1, %k0 ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq @@ -988,37 +982,33 @@ define i8 @test_iinsertelement_v4i1(i32 %a, i32 %b, <4 x i32> %x , <4 x i32> %y) ; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; KNL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; KNL-NEXT: vpextrb $4, %xmm0, %ecx -; KNL-NEXT: kmovw %ecx, %k1 -; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; KNL-NEXT: kmovw %ecx, %k0 ; KNL-NEXT: vpextrb $0, %xmm0, %ecx ; KNL-NEXT: andl $1, %ecx -; KNL-NEXT: kmovw %ecx, %k0 -; KNL-NEXT: kshiftrw $1, %k0, %k1 -; KNL-NEXT: kshiftlw $1, %k1, %k1 -; KNL-NEXT: korw %k0, %k1, %k1 -; KNL-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; KNL-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; KNL-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; KNL-NEXT: vpsllq $63, %zmm3, %zmm1 -; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 -; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; KNL-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; KNL-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; KNL-NEXT: vpsllq $63, %zmm3, %zmm1 -; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 -; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; KNL-NEXT: kmovw %ecx, %k1 +; KNL-NEXT: kshiftrw $1, %k0, %k2 +; KNL-NEXT: kshiftlw $1, %k2, %k2 +; KNL-NEXT: korw %k1, %k2, %k1 +; KNL-NEXT: kshiftrw $1, %k1, %k2 +; KNL-NEXT: kxorw %k0, %k2, %k0 +; KNL-NEXT: kshiftlw $15, %k0, %k0 +; KNL-NEXT: kshiftrw $14, %k0, %k0 +; KNL-NEXT: kxorw %k1, %k0, %k0 +; KNL-NEXT: kshiftrw $2, %k0, %k1 +; KNL-NEXT: kmovw %eax, %k2 +; KNL-NEXT: kxorw %k2, %k1, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $13, %k1, %k1 +; KNL-NEXT: kxorw %k0, %k1, %k0 +; KNL-NEXT: kshiftrw $3, %k0, %k1 ; KNL-NEXT: vpextrb $12, %xmm0, %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; KNL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; KNL-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; KNL-NEXT: vpsllq $63, %zmm2, %zmm0 -; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 +; KNL-NEXT: kmovw %eax, %k2 +; KNL-NEXT: kxorw %k2, %k1, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $12, %k1, %k1 +; KNL-NEXT: kxorw %k0, %k1, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: ## kill: def %al killed %al killed %eax -; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test_iinsertelement_v4i1: @@ -1026,12 +1016,12 @@ define i8 @test_iinsertelement_v4i1(i32 %a, i32 %b, <4 x i32> %x , <4 x i32> %y) ; SKX-NEXT: cmpl %esi, %edi ; SKX-NEXT: setb %al ; SKX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 -; SKX-NEXT: vpmovm2d %k0, %xmm0 -; SKX-NEXT: kmovd %eax, %k0 -; SKX-NEXT: vpmovm2d %k0, %xmm1 -; SKX-NEXT: vpbroadcastq %xmm1, %xmm1 -; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] -; SKX-NEXT: vpmovd2m %xmm0, %k0 +; SKX-NEXT: kshiftrw $2, %k0, %k1 +; SKX-NEXT: kmovd %eax, %k2 +; SKX-NEXT: kxorw %k2, %k1, %k1 +; SKX-NEXT: kshiftlw $15, %k1, %k1 +; SKX-NEXT: kshiftrw $13, %k1, %k1 +; SKX-NEXT: kxorw %k0, %k1, %k0 ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: ## kill: def %al killed %al killed %eax ; SKX-NEXT: retq @@ -1057,17 +1047,15 @@ define i8 @test_iinsertelement_v2i1(i32 %a, i32 %b, <2 x i64> %x , <2 x i64> %y) ; KNL-NEXT: kmovw %ecx, %k0 ; KNL-NEXT: kshiftrw $1, %k0, %k1 ; KNL-NEXT: kshiftlw $1, %k1, %k1 -; KNL-NEXT: korw %k0, %k1, %k1 -; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; KNL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] -; KNL-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; KNL-NEXT: vpsllq $63, %zmm2, %zmm0 -; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 +; KNL-NEXT: korw %k0, %k1, %k0 +; KNL-NEXT: kshiftrw $1, %k0, %k1 +; KNL-NEXT: kmovw %eax, %k2 +; KNL-NEXT: kxorw %k2, %k1, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $14, %k1, %k1 +; KNL-NEXT: kxorw %k0, %k1, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: ## kill: def %al killed %al killed %eax -; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test_iinsertelement_v2i1: @@ -1075,11 +1063,12 @@ define i8 @test_iinsertelement_v2i1(i32 %a, i32 %b, <2 x i64> %x , <2 x i64> %y) ; SKX-NEXT: cmpl %esi, %edi ; SKX-NEXT: setb %al ; SKX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 -; SKX-NEXT: vpmovm2q %k0, %xmm0 -; SKX-NEXT: kmovd %eax, %k0 -; SKX-NEXT: vpmovm2q %k0, %xmm1 -; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; SKX-NEXT: vpmovq2m %xmm0, %k0 +; SKX-NEXT: kshiftrw $1, %k0, %k1 +; SKX-NEXT: kmovd %eax, %k2 +; SKX-NEXT: kxorw %k2, %k1, %k1 +; SKX-NEXT: kshiftlw $15, %k1, %k1 +; SKX-NEXT: kshiftrw $14, %k1, %k1 +; SKX-NEXT: kxorw %k0, %k1, %k0 ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: ## kill: def %al killed %al killed %eax ; SKX-NEXT: retq diff --git a/test/CodeGen/X86/avx512-mask-op.ll b/test/CodeGen/X86/avx512-mask-op.ll index d5a3f784af4..3a4075b9d86 100644 --- a/test/CodeGen/X86/avx512-mask-op.ll +++ b/test/CodeGen/X86/avx512-mask-op.ll @@ -972,14 +972,11 @@ define <64 x i8> @test16(i64 %x) { ; SKX-NEXT: kmovq %rdi, %k0 ; SKX-NEXT: movb $1, %al ; SKX-NEXT: kmovd %eax, %k1 -; SKX-NEXT: vpmovm2b %k1, %zmm0 -; SKX-NEXT: vpsllq $40, %xmm0, %xmm0 -; SKX-NEXT: vpmovm2b %k0, %zmm1 -; SKX-NEXT: movl $32, %eax -; SKX-NEXT: kmovd %eax, %k1 -; SKX-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} -; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; SKX-NEXT: vpmovb2m %zmm0, %k0 +; SKX-NEXT: kshiftrq $5, %k0, %k2 +; SKX-NEXT: kxorq %k1, %k2, %k1 +; SKX-NEXT: kshiftlq $63, %k1, %k1 +; SKX-NEXT: kshiftrq $58, %k1, %k1 +; SKX-NEXT: kxorq %k0, %k1, %k0 ; SKX-NEXT: vpmovm2b %k0, %zmm0 ; SKX-NEXT: retq ; @@ -988,13 +985,11 @@ define <64 x i8> @test16(i64 %x) { ; AVX512BW-NEXT: kmovq %rdi, %k0 ; AVX512BW-NEXT: movb $1, %al ; AVX512BW-NEXT: kmovd %eax, %k1 -; AVX512BW-NEXT: vpmovm2b %k1, %zmm0 -; AVX512BW-NEXT: vpsllq $40, %xmm0, %xmm0 -; AVX512BW-NEXT: vpmovm2b %k0, %zmm1 -; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512BW-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 +; AVX512BW-NEXT: kshiftrq $5, %k0, %k2 +; AVX512BW-NEXT: kxorq %k1, %k2, %k1 +; AVX512BW-NEXT: kshiftlq $63, %k1, %k1 +; AVX512BW-NEXT: kshiftrq $58, %k1, %k1 +; AVX512BW-NEXT: kxorq %k0, %k1, %k0 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 ; AVX512BW-NEXT: retq ; @@ -1085,14 +1080,11 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) { ; SKX-NEXT: cmpl %edx, %esi ; SKX-NEXT: setg %al ; SKX-NEXT: kmovd %eax, %k1 -; SKX-NEXT: vpmovm2b %k1, %zmm0 -; SKX-NEXT: vpsllq $40, %xmm0, %xmm0 -; SKX-NEXT: vpmovm2b %k0, %zmm1 -; SKX-NEXT: movl $32, %eax -; SKX-NEXT: kmovd %eax, %k1 -; SKX-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} -; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; SKX-NEXT: vpmovb2m %zmm0, %k0 +; SKX-NEXT: kshiftrq $5, %k0, %k2 +; SKX-NEXT: kxorq %k1, %k2, %k1 +; SKX-NEXT: kshiftlq $63, %k1, %k1 +; SKX-NEXT: kshiftrq $58, %k1, %k1 +; SKX-NEXT: kxorq %k0, %k1, %k0 ; SKX-NEXT: vpmovm2b %k0, %zmm0 ; SKX-NEXT: retq ; @@ -1102,13 +1094,11 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) { ; AVX512BW-NEXT: cmpl %edx, %esi ; AVX512BW-NEXT: setg %al ; AVX512BW-NEXT: kmovd %eax, %k1 -; AVX512BW-NEXT: vpmovm2b %k1, %zmm0 -; AVX512BW-NEXT: vpsllq $40, %xmm0, %xmm0 -; AVX512BW-NEXT: vpmovm2b %k0, %zmm1 -; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512BW-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 +; AVX512BW-NEXT: kshiftrq $5, %k0, %k2 +; AVX512BW-NEXT: kxorq %k1, %k2, %k1 +; AVX512BW-NEXT: kshiftlq $63, %k1, %k1 +; AVX512BW-NEXT: kshiftrq $58, %k1, %k1 +; AVX512BW-NEXT: kxorq %k0, %k1, %k0 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 ; AVX512BW-NEXT: retq ; @@ -1159,24 +1149,22 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) { define <8 x i1> @test18(i8 %a, i16 %y) { ; KNL-LABEL: test18: ; KNL: ## %bb.0: -; KNL-NEXT: kmovw %edi, %k2 -; KNL-NEXT: kmovw %esi, %k0 -; KNL-NEXT: kshiftlw $7, %k0, %k1 +; KNL-NEXT: kmovw %edi, %k0 +; KNL-NEXT: kmovw %esi, %k1 +; KNL-NEXT: kshiftlw $7, %k1, %k2 +; KNL-NEXT: kshiftrw $15, %k2, %k2 +; KNL-NEXT: kshiftlw $6, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kshiftlw $6, %k0, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k3 -; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z} -; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k3} {z} -; KNL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7] -; KNL-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; KNL-NEXT: vpsllq $63, %zmm2, %zmm0 -; KNL-NEXT: vptestmq %zmm0, %zmm0, %k2 -; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z} -; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; KNL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,8] -; KNL-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; KNL-NEXT: vpsllq $63, %zmm2, %zmm0 -; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 +; KNL-NEXT: kshiftrw $6, %k0, %k3 +; KNL-NEXT: kxorw %k1, %k3, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $9, %k1, %k1 +; KNL-NEXT: kxorw %k0, %k1, %k0 +; KNL-NEXT: kshiftrw $7, %k0, %k1 +; KNL-NEXT: kxorw %k2, %k1, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $8, %k1, %k1 +; KNL-NEXT: kxorw %k0, %k1, %k1 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vpmovdw %zmm0, %ymm0 ; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %ymm0 @@ -1185,45 +1173,42 @@ define <8 x i1> @test18(i8 %a, i16 %y) { ; ; SKX-LABEL: test18: ; SKX: ## %bb.0: -; SKX-NEXT: kmovd %edi, %k1 -; SKX-NEXT: kmovd %esi, %k2 -; SKX-NEXT: kshiftlw $7, %k2, %k0 -; SKX-NEXT: kshiftrw $15, %k0, %k0 -; SKX-NEXT: kshiftlw $6, %k2, %k2 +; SKX-NEXT: kmovd %edi, %k0 +; SKX-NEXT: kmovd %esi, %k1 +; SKX-NEXT: kshiftlw $7, %k1, %k2 ; SKX-NEXT: kshiftrw $15, %k2, %k2 -; SKX-NEXT: vpmovm2q %k1, %zmm0 -; SKX-NEXT: vpmovm2q %k2, %zmm1 -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7] -; SKX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; SKX-NEXT: vpmovq2m %zmm2, %k1 -; SKX-NEXT: kshiftlb $1, %k1, %k1 +; SKX-NEXT: kshiftlw $6, %k1, %k1 +; SKX-NEXT: kshiftrw $15, %k1, %k1 +; SKX-NEXT: kshiftrb $6, %k0, %k3 +; SKX-NEXT: kxorb %k1, %k3, %k1 +; SKX-NEXT: kshiftlb $7, %k1, %k1 ; SKX-NEXT: kshiftrb $1, %k1, %k1 -; SKX-NEXT: kshiftlb $7, %k0, %k0 -; SKX-NEXT: korb %k0, %k1, %k0 +; SKX-NEXT: kxorb %k0, %k1, %k0 +; SKX-NEXT: kshiftlb $1, %k0, %k0 +; SKX-NEXT: kshiftrb $1, %k0, %k0 +; SKX-NEXT: kshiftlb $7, %k2, %k1 +; SKX-NEXT: korb %k1, %k0, %k0 ; SKX-NEXT: vpmovm2w %k0, %xmm0 -; SKX-NEXT: vzeroupper ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test18: ; AVX512BW: ## %bb.0: -; AVX512BW-NEXT: kmovd %edi, %k2 -; AVX512BW-NEXT: kmovd %esi, %k0 -; AVX512BW-NEXT: kshiftlw $7, %k0, %k1 +; AVX512BW-NEXT: kmovd %edi, %k0 +; AVX512BW-NEXT: kmovd %esi, %k1 +; AVX512BW-NEXT: kshiftlw $7, %k1, %k2 +; AVX512BW-NEXT: kshiftrw $15, %k2, %k2 +; AVX512BW-NEXT: kshiftlw $6, %k1, %k1 ; AVX512BW-NEXT: kshiftrw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $6, %k0, %k0 -; AVX512BW-NEXT: kshiftrw $15, %k0, %k3 -; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z} -; AVX512BW-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k3} {z} -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7] -; AVX512BW-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vpsllq $63, %zmm2, %zmm0 -; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k2 -; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z} -; AVX512BW-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,8] -; AVX512BW-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vpsllq $63, %zmm2, %zmm0 -; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: kshiftrw $6, %k0, %k3 +; AVX512BW-NEXT: kxorw %k1, %k3, %k1 +; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 +; AVX512BW-NEXT: kshiftrw $9, %k1, %k1 +; AVX512BW-NEXT: kxorw %k0, %k1, %k0 +; AVX512BW-NEXT: kshiftrw $7, %k0, %k1 +; AVX512BW-NEXT: kxorw %k2, %k1, %k1 +; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 +; AVX512BW-NEXT: kshiftrw $8, %k1, %k1 +; AVX512BW-NEXT: kxorw %k0, %k1, %k0 ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 ; AVX512BW-NEXT: vzeroupper @@ -1231,21 +1216,21 @@ define <8 x i1> @test18(i8 %a, i16 %y) { ; ; AVX512DQ-LABEL: test18: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: kmovw %edi, %k1 -; AVX512DQ-NEXT: kmovw %esi, %k2 -; AVX512DQ-NEXT: kshiftlw $7, %k2, %k0 -; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0 -; AVX512DQ-NEXT: kshiftlw $6, %k2, %k2 +; AVX512DQ-NEXT: kmovw %edi, %k0 +; AVX512DQ-NEXT: kmovw %esi, %k1 +; AVX512DQ-NEXT: kshiftlw $7, %k1, %k2 ; AVX512DQ-NEXT: kshiftrw $15, %k2, %k2 -; AVX512DQ-NEXT: vpmovm2q %k1, %zmm0 -; AVX512DQ-NEXT: vpmovm2q %k2, %zmm1 -; AVX512DQ-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7] -; AVX512DQ-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; AVX512DQ-NEXT: vpmovq2m %zmm2, %k1 -; AVX512DQ-NEXT: kshiftlb $1, %k1, %k1 +; AVX512DQ-NEXT: kshiftlw $6, %k1, %k1 +; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1 +; AVX512DQ-NEXT: kshiftrb $6, %k0, %k3 +; AVX512DQ-NEXT: kxorb %k1, %k3, %k1 +; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1 ; AVX512DQ-NEXT: kshiftrb $1, %k1, %k1 -; AVX512DQ-NEXT: kshiftlb $7, %k0, %k0 -; AVX512DQ-NEXT: korb %k0, %k1, %k0 +; AVX512DQ-NEXT: kxorb %k0, %k1, %k0 +; AVX512DQ-NEXT: kshiftlb $1, %k0, %k0 +; AVX512DQ-NEXT: kshiftrb $1, %k0, %k0 +; AVX512DQ-NEXT: kshiftlb $7, %k2, %k1 +; AVX512DQ-NEXT: korb %k1, %k0, %k0 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 ; AVX512DQ-NEXT: ## kill: def %xmm0 killed %xmm0 killed %ymm0 diff --git a/test/CodeGen/X86/avx512-schedule.ll b/test/CodeGen/X86/avx512-schedule.ll index 2f9f631c7e1..5488cfc38ce 100755 --- a/test/CodeGen/X86/avx512-schedule.ll +++ b/test/CodeGen/X86/avx512-schedule.ll @@ -7325,14 +7325,11 @@ define <64 x i8> @vmov_test16(i64 %x) { ; GENERIC-NEXT: kmovq %rdi, %k0 # sched: [1:0.33] ; GENERIC-NEXT: movb $1, %al # sched: [1:0.33] ; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovm2b %k1, %zmm0 # sched: [1:0.33] -; GENERIC-NEXT: vpsllq $40, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovm2b %k0, %zmm1 # sched: [1:0.33] -; GENERIC-NEXT: movl $32, %eax # sched: [1:0.33] -; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} # sched: [2:1.00] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] sched: [1:1.00] -; GENERIC-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:0.33] +; GENERIC-NEXT: kshiftrq $5, %k0, %k2 # sched: [1:1.00] +; GENERIC-NEXT: kxorq %k1, %k2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: kshiftlq $63, %k1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: kshiftrq $58, %k1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: kxorq %k0, %k1, %k0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7341,14 +7338,11 @@ define <64 x i8> @vmov_test16(i64 %x) { ; SKX-NEXT: kmovq %rdi, %k0 # sched: [1:1.00] ; SKX-NEXT: movb $1, %al # sched: [1:0.25] ; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovm2b %k1, %zmm0 # sched: [1:0.25] -; SKX-NEXT: vpsllq $40, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovm2b %k0, %zmm1 # sched: [1:0.25] -; SKX-NEXT: movl $32, %eax # sched: [1:0.25] -; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00] -; SKX-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} # sched: [1:0.33] -; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] sched: [3:1.00] -; SKX-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:1.00] +; SKX-NEXT: kshiftrq $5, %k0, %k2 # sched: [3:1.00] +; SKX-NEXT: kxorq %k1, %k2, %k1 # sched: [1:1.00] +; SKX-NEXT: kshiftlq $63, %k1, %k1 # sched: [3:1.00] +; SKX-NEXT: kshiftrq $58, %k1, %k1 # sched: [3:1.00] +; SKX-NEXT: kxorq %k0, %k1, %k0 # sched: [1:1.00] ; SKX-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] %a = bitcast i64 %x to <64 x i1> @@ -7365,14 +7359,11 @@ define <64 x i8> @vmov_test17(i64 %x, i32 %y, i32 %z) { ; GENERIC-NEXT: cmpl %edx, %esi # sched: [1:0.33] ; GENERIC-NEXT: setg %al # sched: [1:0.50] ; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovm2b %k1, %zmm0 # sched: [1:0.33] -; GENERIC-NEXT: vpsllq $40, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovm2b %k0, %zmm1 # sched: [1:0.33] -; GENERIC-NEXT: movl $32, %eax # sched: [1:0.33] -; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} # sched: [2:1.00] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] sched: [1:1.00] -; GENERIC-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:0.33] +; GENERIC-NEXT: kshiftrq $5, %k0, %k2 # sched: [1:1.00] +; GENERIC-NEXT: kxorq %k1, %k2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: kshiftlq $63, %k1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: kshiftrq $58, %k1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: kxorq %k0, %k1, %k0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7382,14 +7373,11 @@ define <64 x i8> @vmov_test17(i64 %x, i32 %y, i32 %z) { ; SKX-NEXT: cmpl %edx, %esi # sched: [1:0.25] ; SKX-NEXT: setg %al # sched: [1:0.50] ; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovm2b %k1, %zmm0 # sched: [1:0.25] -; SKX-NEXT: vpsllq $40, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovm2b %k0, %zmm1 # sched: [1:0.25] -; SKX-NEXT: movl $32, %eax # sched: [1:0.25] -; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00] -; SKX-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} # sched: [1:0.33] -; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] sched: [3:1.00] -; SKX-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:1.00] +; SKX-NEXT: kshiftrq $5, %k0, %k2 # sched: [3:1.00] +; SKX-NEXT: kxorq %k1, %k2, %k1 # sched: [1:1.00] +; SKX-NEXT: kshiftlq $63, %k1, %k1 # sched: [3:1.00] +; SKX-NEXT: kshiftrq $58, %k1, %k1 # sched: [3:1.00] +; SKX-NEXT: kxorq %k0, %k1, %k0 # sched: [1:1.00] ; SKX-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] %a = bitcast i64 %x to <64 x i1> @@ -7402,44 +7390,42 @@ define <64 x i8> @vmov_test17(i64 %x, i32 %y, i32 %z) { define <8 x i1> @vmov_test18(i8 %a, i16 %y) { ; GENERIC-LABEL: vmov_test18: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: kmovd %esi, %k2 # sched: [1:0.33] -; GENERIC-NEXT: kshiftlw $7, %k2, %k0 # sched: [1:1.00] -; GENERIC-NEXT: kshiftrw $15, %k0, %k0 # sched: [1:1.00] -; GENERIC-NEXT: kshiftlw $6, %k2, %k2 # sched: [1:1.00] +; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] +; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33] +; GENERIC-NEXT: kshiftlw $7, %k1, %k2 # sched: [1:1.00] ; GENERIC-NEXT: kshiftrw $15, %k2, %k2 # sched: [1:1.00] -; GENERIC-NEXT: vpmovm2q %k1, %zmm0 # sched: [1:0.33] -; GENERIC-NEXT: vpmovm2q %k2, %zmm1 # sched: [1:0.33] -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7] sched: [4:0.50] -; GENERIC-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 # sched: [1:1.00] -; GENERIC-NEXT: vpmovq2m %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: kshiftlb $1, %k1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: kshiftlw $6, %k1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: kshiftrw $15, %k1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: kshiftrb $6, %k0, %k3 # sched: [1:1.00] +; GENERIC-NEXT: kxorb %k1, %k3, %k1 # sched: [1:1.00] +; GENERIC-NEXT: kshiftlb $7, %k1, %k1 # sched: [1:1.00] ; GENERIC-NEXT: kshiftrb $1, %k1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: kshiftlb $7, %k0, %k0 # sched: [1:1.00] -; GENERIC-NEXT: korb %k0, %k1, %k0 # sched: [1:1.00] +; GENERIC-NEXT: kxorb %k0, %k1, %k0 # sched: [1:1.00] +; GENERIC-NEXT: kshiftlb $1, %k0, %k0 # sched: [1:1.00] +; GENERIC-NEXT: kshiftrb $1, %k0, %k0 # sched: [1:1.00] +; GENERIC-NEXT: kshiftlb $7, %k2, %k1 # sched: [1:1.00] +; GENERIC-NEXT: korb %k1, %k0, %k0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: vzeroupper ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vmov_test18: ; SKX: # %bb.0: -; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: kmovd %esi, %k2 # sched: [1:1.00] -; SKX-NEXT: kshiftlw $7, %k2, %k0 # sched: [3:1.00] -; SKX-NEXT: kshiftrw $15, %k0, %k0 # sched: [3:1.00] -; SKX-NEXT: kshiftlw $6, %k2, %k2 # sched: [3:1.00] +; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] +; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00] +; SKX-NEXT: kshiftlw $7, %k1, %k2 # sched: [3:1.00] ; SKX-NEXT: kshiftrw $15, %k2, %k2 # sched: [3:1.00] -; SKX-NEXT: vpmovm2q %k1, %zmm0 # sched: [1:0.25] -; SKX-NEXT: vpmovm2q %k2, %zmm1 # sched: [1:0.25] -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7] sched: [8:0.50] -; SKX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 # sched: [3:1.00] -; SKX-NEXT: vpmovq2m %zmm2, %k1 # sched: [1:1.00] -; SKX-NEXT: kshiftlb $1, %k1, %k1 # sched: [3:1.00] +; SKX-NEXT: kshiftlw $6, %k1, %k1 # sched: [3:1.00] +; SKX-NEXT: kshiftrw $15, %k1, %k1 # sched: [3:1.00] +; SKX-NEXT: kshiftrb $6, %k0, %k3 # sched: [3:1.00] +; SKX-NEXT: kxorb %k1, %k3, %k1 # sched: [1:1.00] +; SKX-NEXT: kshiftlb $7, %k1, %k1 # sched: [3:1.00] ; SKX-NEXT: kshiftrb $1, %k1, %k1 # sched: [3:1.00] -; SKX-NEXT: kshiftlb $7, %k0, %k0 # sched: [3:1.00] -; SKX-NEXT: korb %k0, %k1, %k0 # sched: [1:1.00] +; SKX-NEXT: kxorb %k0, %k1, %k0 # sched: [1:1.00] +; SKX-NEXT: kshiftlb $1, %k0, %k0 # sched: [3:1.00] +; SKX-NEXT: kshiftrb $1, %k0, %k0 # sched: [3:1.00] +; SKX-NEXT: kshiftlb $7, %k2, %k1 # sched: [3:1.00] +; SKX-NEXT: korb %k1, %k0, %k0 # sched: [1:1.00] ; SKX-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.25] -; SKX-NEXT: vzeroupper # sched: [4:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %b = bitcast i8 %a to <8 x i1> %b1 = bitcast i16 %y to <16 x i1> diff --git a/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll b/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll index a0b13fa16b1..1e754be6fe4 100644 --- a/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll +++ b/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll @@ -120,713 +120,537 @@ define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext ; X32: # %bb.0: # %entry ; X32-NEXT: pushl %ebx ; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: pushl %esi +; X32-NEXT: .cfi_def_cfa_offset 12 +; X32-NEXT: .cfi_offset %esi, -12 ; X32-NEXT: .cfi_offset %ebx, -8 -; X32-NEXT: vmovdqa64 %zmm0, %zmm3 -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: andb $2, %cl -; X32-NEXT: shrb %cl +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: kmovd %ecx, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vpsllw $8, %xmm1, %xmm1 -; X32-NEXT: kmovd %eax, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm2 -; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm1, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: andb $15, %cl -; X32-NEXT: movl %ecx, %edx +; X32-NEXT: kshiftrq $1, %k0, %k1 +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: andb $2, %al +; X32-NEXT: shrb %al +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $62, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $2, %k0, %k1 +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: andb $15, %al +; X32-NEXT: movl %eax, %edx ; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm2 -; X32-NEXT: vpbroadcastw %xmm2, %xmm2 -; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm2 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm2[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm1, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm2 -; X32-NEXT: vpslld $24, %xmm2, %xmm2 -; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm2 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm2[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm1, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $4, %cl -; X32-NEXT: kmovd %ecx, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm2 -; X32-NEXT: vpbroadcastd %xmm2, %xmm2 -; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm2 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm2[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm1, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $5, %cl -; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm2 -; X32-NEXT: vpsllq $40, %xmm2, %xmm2 -; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm2 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm2[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm1, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $6, %cl -; X32-NEXT: kmovd %ecx, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm2 -; X32-NEXT: vpbroadcastw %xmm2, %xmm2 -; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm2 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm2[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm1, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $7, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm1 -; X32-NEXT: vpsllq $56, %xmm1, %xmm1 -; X32-NEXT: vpmovm2b %k0, %zmm2 -; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm1, %k0 -; X32-NEXT: movb %ah, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm1 -; X32-NEXT: vpbroadcastq %xmm1, %xmm1 -; X32-NEXT: vpmovm2b %k0, %zmm2 -; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm1, %k0 -; X32-NEXT: andb $2, %cl -; X32-NEXT: shrb %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm1 -; X32-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6] -; X32-NEXT: vpmovm2b %k0, %zmm2 -; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm1, %k0 -; X32-NEXT: movb %ah, %cl -; X32-NEXT: andb $15, %cl -; X32-NEXT: movl %ecx, %edx +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $61, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $3, %k0, %k1 +; X32-NEXT: shrb $3, %al +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $60, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $4, %k0, %k1 +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: shrb $4, %al +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $59, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $5, %k0, %k1 +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: shrb $5, %al +; X32-NEXT: andb $1, %al +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $58, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $6, %k0, %k1 +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: shrb $6, %al +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $57, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $7, %k0, %k1 +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: shrb $7, %al +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $56, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $8, %k0, %k1 +; X32-NEXT: movb %ch, %al +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $55, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $9, %k0, %k1 +; X32-NEXT: andb $2, %al +; X32-NEXT: shrb %al +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $54, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $10, %k0, %k1 +; X32-NEXT: movb %ch, %al +; X32-NEXT: andb $15, %al +; X32-NEXT: movl %eax, %edx ; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm1 -; X32-NEXT: vpbroadcastw %xmm1, %xmm1 -; X32-NEXT: vpmovm2b %k0, %zmm2 -; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm1, %k0 -; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm1 -; X32-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4] -; X32-NEXT: vpmovm2b %k0, %zmm2 -; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm1, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $12, %ecx -; X32-NEXT: andl $15, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm1 -; X32-NEXT: vpbroadcastd %xmm1, %xmm1 -; X32-NEXT: vpmovm2b %k0, %zmm2 -; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm1, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $13, %ecx -; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm1 -; X32-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2] -; X32-NEXT: vpmovm2b %k0, %zmm2 -; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm1, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $14, %ecx -; X32-NEXT: andl $3, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm1 -; X32-NEXT: vpbroadcastw %xmm1, %xmm1 -; X32-NEXT: vpmovm2b %k0, %zmm2 -; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm1, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $15, %ecx -; X32-NEXT: andl $1, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm1 -; X32-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0] -; X32-NEXT: vpmovm2b %k0, %zmm2 -; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm1, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $16, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm1 -; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; X32-NEXT: vpmovm2b %k0, %zmm2 -; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm1, %k0 -; X32-NEXT: movl %ecx, %edx -; X32-NEXT: andb $2, %dl -; X32-NEXT: shrb %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm1 -; X32-NEXT: vpsllw $8, %xmm1, %xmm1 -; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; X32-NEXT: vpmovm2b %k0, %zmm2 -; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm1, %k0 +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $53, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $11, %k0, %k1 +; X32-NEXT: shrb $3, %al +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: shrl $12, %eax +; X32-NEXT: andl $15, %eax +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: shrl $13, %eax +; X32-NEXT: andb $1, %al +; X32-NEXT: kmovd %eax, %k3 +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: shrl $14, %eax +; X32-NEXT: andl $3, %eax +; X32-NEXT: kmovd %eax, %k4 +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: shrl $15, %eax +; X32-NEXT: andl $1, %eax +; X32-NEXT: kmovd %eax, %k5 +; X32-NEXT: movl %ecx, %edx +; X32-NEXT: shrl $16, %edx +; X32-NEXT: movl %edx, %eax +; X32-NEXT: andb $2, %al +; X32-NEXT: shrb %al +; X32-NEXT: kmovd %eax, %k6 +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: andb $15, %bl +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: shrb $2, %al +; X32-NEXT: kmovd %eax, %k7 +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $52, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $12, %k0, %k1 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $51, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $13, %k0, %k1 +; X32-NEXT: kxorq %k3, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $50, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $14, %k0, %k1 +; X32-NEXT: kxorq %k4, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $49, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $15, %k0, %k1 +; X32-NEXT: kxorq %k5, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $48, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $16, %k0, %k1 +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $47, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $17, %k0, %k1 +; X32-NEXT: kxorq %k6, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $46, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $18, %k0, %k1 +; X32-NEXT: kxorq %k7, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $45, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $19, %k0, %k1 +; X32-NEXT: shrb $3, %bl +; X32-NEXT: kmovd %ebx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $44, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $20, %k0, %k1 +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: shrb $4, %bl +; X32-NEXT: kmovd %ebx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $43, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $21, %k0, %k1 +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: shrb $5, %bl +; X32-NEXT: andb $1, %bl +; X32-NEXT: kmovd %ebx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $42, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $22, %k0, %k1 +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: shrb $6, %bl +; X32-NEXT: kmovd %ebx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $41, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $23, %k0, %k1 +; X32-NEXT: shrb $7, %dl +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $40, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $24, %k0, %k1 ; X32-NEXT: movl %ecx, %edx +; X32-NEXT: shrl $24, %edx +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $39, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $25, %k0, %k1 +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: andb $2, %bl +; X32-NEXT: shrb %bl +; X32-NEXT: kmovd %ebx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $38, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $26, %k0, %k1 ; X32-NEXT: andb $15, %dl ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: shrb $2, %bl -; X32-NEXT: kmovd %ebx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm1 -; X32-NEXT: vpbroadcastw %xmm1, %xmm1 -; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; X32-NEXT: vpmovm2b %k0, %zmm2 -; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm1, %k0 +; X32-NEXT: kmovd %ebx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $37, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $27, %k0, %k1 ; X32-NEXT: shrb $3, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm1 -; X32-NEXT: vpslld $24, %xmm1, %xmm1 -; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; X32-NEXT: vpmovm2b %k0, %zmm2 -; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm1, %k0 +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $36, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $28, %k0, %k1 ; X32-NEXT: movl %ecx, %edx -; X32-NEXT: shrb $4, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm1 -; X32-NEXT: vpbroadcastd %xmm1, %xmm1 -; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; X32-NEXT: vpmovm2b %k0, %zmm2 -; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm1, %k0 +; X32-NEXT: shrl $28, %edx +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $35, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $29, %k0, %k1 ; X32-NEXT: movl %ecx, %edx -; X32-NEXT: shrb $5, %dl +; X32-NEXT: shrl $29, %edx ; X32-NEXT: andb $1, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm1 -; X32-NEXT: vpsllq $40, %xmm1, %xmm1 -; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; X32-NEXT: vpmovm2b %k0, %zmm2 -; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm1, %k0 -; X32-NEXT: movl %ecx, %edx -; X32-NEXT: shrb $6, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm1 -; X32-NEXT: vpbroadcastw %xmm1, %xmm1 -; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; X32-NEXT: vpmovm2b %k0, %zmm2 -; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm1, %k0 -; X32-NEXT: shrb $7, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm1 -; X32-NEXT: vpsllq $56, %xmm1, %xmm1 -; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; X32-NEXT: vpmovm2b %k0, %zmm2 -; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm1, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $24, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm1 -; X32-NEXT: vpbroadcastq %xmm1, %ymm1 -; X32-NEXT: vpmovm2b %k0, %zmm2 -; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm1, %k0 -; X32-NEXT: movl %ecx, %edx -; X32-NEXT: andb $2, %dl -; X32-NEXT: shrb %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm1 -; X32-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6] -; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; X32-NEXT: vpmovm2b %k0, %zmm2 -; X32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm5, %ymm2, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm1, %k0 -; X32-NEXT: andb $15, %cl +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $34, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $30, %k0, %k1 ; X32-NEXT: movl %ecx, %edx -; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm1 -; X32-NEXT: vpbroadcastw %xmm1, %xmm1 -; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; X32-NEXT: vpmovm2b %k0, %zmm4 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm4, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm4[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm1, %k0 -; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm1 -; X32-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4] -; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; X32-NEXT: vpmovm2b %k0, %zmm4 -; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255] -; X32-NEXT: vpblendvb %ymm0, %ymm4, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm4[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm1, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $28, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm1 -; X32-NEXT: vpbroadcastd %xmm1, %xmm1 -; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; X32-NEXT: vpmovm2b %k0, %zmm4 -; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255] -; X32-NEXT: vpblendvb %ymm0, %ymm4, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm4[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm1, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $29, %ecx -; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm1 -; X32-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2] -; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255] -; X32-NEXT: vpblendvb %ymm4, %ymm0, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $30, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm1 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255] -; X32-NEXT: vpblendvb %ymm4, %ymm0, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: shrl $31, %eax -; X32-NEXT: kmovd %eax, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0] -; X32-NEXT: vpblendvb %ymm7, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: kmovd %eax, %k1 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; X32-NEXT: vpmovm2b %k1, %zmm7 -; X32-NEXT: vmovdqa {{.*#+}} ymm6 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm6, %ymm1, %ymm7, %ymm1 -; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: shrl $30, %edx +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $33, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $31, %k0, %k1 +; X32-NEXT: shrl $31, %ecx +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $32, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $32, %k0, %k1 +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $31, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $33, %k0, %k1 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: andb $2, %cl ; X32-NEXT: shrb %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpsllw $8, %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $30, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $34, %k0, %k1 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: andb $15, %cl ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $29, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $35, %k0, %k1 ; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslld $24, %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $28, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $36, %k0, %k1 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrb $4, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastd %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $27, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $37, %k0, %k1 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrb $5, %cl ; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpsllq $40, %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $26, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $38, %k0, %k1 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrb $6, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $25, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $39, %k0, %k1 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrb $7, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpsllq $56, %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $24, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $40, %k0, %k1 ; X32-NEXT: movb %ah, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastq %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 ; X32-NEXT: andb $2, %cl ; X32-NEXT: shrb %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6] -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k2 ; X32-NEXT: movb %ah, %cl ; X32-NEXT: andb $15, %cl ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %edx, %k3 ; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4] -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $12, %ecx -; X32-NEXT: andl $15, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastd %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k4 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrl $13, %ecx ; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2] -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $14, %ecx -; X32-NEXT: andl $3, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $15, %ecx -; X32-NEXT: andl $1, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k5 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrl $16, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: andb $2, %dl ; X32-NEXT: shrb %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpsllw $8, %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %edx, %k6 ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: andb $15, %dl ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: shrb $2, %bl -; X32-NEXT: kmovd %ebx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ebx, %k7 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $23, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $41, %k0, %k1 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $22, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $42, %k0, %k1 +; X32-NEXT: kxorq %k3, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $21, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $43, %k0, %k1 +; X32-NEXT: kxorq %k4, %k1, %k1 +; X32-NEXT: movl %eax, %esi +; X32-NEXT: shrl $12, %esi +; X32-NEXT: andl $15, %esi +; X32-NEXT: kmovd %esi, %k2 +; X32-NEXT: movl %eax, %esi +; X32-NEXT: shrl $14, %esi +; X32-NEXT: andl $3, %esi +; X32-NEXT: kmovd %esi, %k3 +; X32-NEXT: movl %eax, %esi +; X32-NEXT: shrl $15, %esi +; X32-NEXT: andl $1, %esi +; X32-NEXT: kmovd %esi, %k4 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $20, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $44, %k0, %k1 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $19, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $45, %k0, %k1 +; X32-NEXT: kxorq %k5, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $18, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $46, %k0, %k1 +; X32-NEXT: kxorq %k3, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $17, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $47, %k0, %k1 +; X32-NEXT: kxorq %k4, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $16, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $48, %k0, %k1 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $15, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $49, %k0, %k1 +; X32-NEXT: kxorq %k6, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $14, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $50, %k0, %k1 +; X32-NEXT: kxorq %k7, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $13, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $51, %k0, %k1 ; X32-NEXT: shrb $3, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslld $24, %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $12, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k4 +; X32-NEXT: kshiftrq $52, %k4, %k0 ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: shrb $4, %dl ; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastd %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kxorq %k1, %k0, %k5 ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: shrb $5, %dl ; X32-NEXT: andb $1, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpsllq $40, %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %edx, %k6 ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: shrb $6, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %edx, %k7 ; X32-NEXT: shrb $7, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpsllq $56, %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k0 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrl $24, %ecx ; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastq %xmm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: andb $2, %dl ; X32-NEXT: shrb %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6] -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vpblendvb %ymm5, %ymm6, %ymm0, %ymm0 +; X32-NEXT: kmovd %edx, %k2 ; X32-NEXT: andb $15, %cl ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vpbroadcastw %xmm1, %xmm1 -; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm5 -; X32-NEXT: vpblendvb %ymm2, %ymm5, %ymm1, %ymm2 +; X32-NEXT: kmovd %edx, %k3 +; X32-NEXT: kshiftlq $63, %k5, %k5 +; X32-NEXT: kshiftrq $11, %k5, %k5 +; X32-NEXT: kxorq %k4, %k5, %k4 +; X32-NEXT: kshiftrq $53, %k4, %k5 +; X32-NEXT: kxorq %k6, %k5, %k5 +; X32-NEXT: kshiftlq $63, %k5, %k5 +; X32-NEXT: kshiftrq $10, %k5, %k5 +; X32-NEXT: kxorq %k4, %k5, %k5 +; X32-NEXT: kshiftrq $54, %k5, %k4 +; X32-NEXT: kxorq %k7, %k4, %k6 ; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k0 +; X32-NEXT: kmovd %ecx, %k4 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrl $29, %ecx ; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k2 -; X32-NEXT: vpmovm2b %k2, %zmm0 -; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; X32-NEXT: vpmovm2b %k0, %zmm2 -; X32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4] -; X32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; X32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255] -; X32-NEXT: vpblendvb %ymm5, %ymm1, %ymm2, %ymm1 -; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; X32-NEXT: kmovd %ecx, %k7 +; X32-NEXT: kshiftlq $63, %k6, %k6 +; X32-NEXT: kshiftrq $9, %k6, %k6 +; X32-NEXT: kxorq %k5, %k6, %k5 +; X32-NEXT: kshiftrq $55, %k5, %k6 +; X32-NEXT: kxorq %k0, %k6, %k0 +; X32-NEXT: kshiftlq $63, %k0, %k0 +; X32-NEXT: kshiftrq $8, %k0, %k0 +; X32-NEXT: kxorq %k5, %k0, %k0 +; X32-NEXT: kshiftrq $56, %k0, %k5 +; X32-NEXT: kxorq %k1, %k5, %k1 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrl $28, %ecx -; X32-NEXT: kmovd %ecx, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm2 -; X32-NEXT: vpbroadcastd %xmm2, %xmm2 -; X32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; X32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255] -; X32-NEXT: vpblendvb %ymm5, %ymm1, %ymm2, %ymm1 -; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; X32-NEXT: vpmovm2b %k1, %zmm2 -; X32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2] -; X32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; X32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255] -; X32-NEXT: vpblendvb %ymm5, %ymm1, %ymm2, %ymm1 +; X32-NEXT: kmovd %ecx, %k5 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrl $30, %ecx -; X32-NEXT: kmovd %ecx, %k0 -; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; X32-NEXT: vpmovm2b %k0, %zmm2 -; X32-NEXT: vpbroadcastw %xmm2, %xmm2 -; X32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; X32-NEXT: vpblendvb %ymm4, %ymm1, %ymm2, %ymm1 -; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k6 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $7, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $57, %k0, %k1 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $6, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $58, %k0, %k1 +; X32-NEXT: kxorq %k3, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $5, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $59, %k0, %k1 +; X32-NEXT: kxorq %k4, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $4, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $60, %k0, %k1 +; X32-NEXT: kxorq %k5, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $3, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $61, %k0, %k1 +; X32-NEXT: kxorq %k7, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $2, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $62, %k0, %k1 +; X32-NEXT: kxorq %k6, %k1, %k1 ; X32-NEXT: shrl $31, %eax -; X32-NEXT: kmovd %eax, %k1 +; X32-NEXT: kmovd %eax, %k2 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $1, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 ; X32-NEXT: kshiftlq $1, %k0, %k0 ; X32-NEXT: kshiftrq $1, %k0, %k0 -; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k2, %k1 ; X32-NEXT: korq %k1, %k0, %k1 -; X32-NEXT: vpbroadcastb %eax, %zmm3 {%k1} -; X32-NEXT: vmovdqa64 %zmm3, %zmm0 +; X32-NEXT: vpbroadcastb %eax, %zmm0 {%k1} +; X32-NEXT: popl %esi ; X32-NEXT: popl %ebx ; X32-NEXT: retl ; @@ -850,710 +674,537 @@ define <8 x i64> @test_mm512_maskz_set1_epi8(i64 %__M, i8 signext %__A) { ; X32: # %bb.0: # %entry ; X32-NEXT: pushl %ebx ; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: pushl %esi +; X32-NEXT: .cfi_def_cfa_offset 12 +; X32-NEXT: .cfi_offset %esi, -12 ; X32-NEXT: .cfi_offset %ebx, -8 -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: andb $2, %cl -; X32-NEXT: shrb %cl +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: kmovd %ecx, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: vpsllw $8, %xmm0, %xmm0 -; X32-NEXT: kmovd %eax, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: andb $15, %cl -; X32-NEXT: movl %ecx, %edx +; X32-NEXT: kshiftrq $1, %k0, %k1 +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: andb $2, %al +; X32-NEXT: shrb %al +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $62, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $2, %k0, %k1 +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: andb $15, %al +; X32-NEXT: movl %eax, %edx ; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vpbroadcastw %xmm1, %xmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vpslld $24, %xmm1, %xmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $4, %cl -; X32-NEXT: kmovd %ecx, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vpbroadcastd %xmm1, %xmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $5, %cl -; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vpsllq $40, %xmm1, %xmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $6, %cl -; X32-NEXT: kmovd %ecx, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vpbroadcastw %xmm1, %xmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $7, %cl -; X32-NEXT: kmovd %ecx, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vpsllq $56, %xmm1, %xmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movb %ah, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastq %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: andb $2, %cl -; X32-NEXT: shrb %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6] -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movb %ah, %cl -; X32-NEXT: andb $15, %cl -; X32-NEXT: movl %ecx, %edx +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $61, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $3, %k0, %k1 +; X32-NEXT: shrb $3, %al +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $60, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $4, %k0, %k1 +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: shrb $4, %al +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $59, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $5, %k0, %k1 +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: shrb $5, %al +; X32-NEXT: andb $1, %al +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $58, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $6, %k0, %k1 +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: shrb $6, %al +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $57, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $7, %k0, %k1 +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: shrb $7, %al +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $56, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $8, %k0, %k1 +; X32-NEXT: movb %ch, %al +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $55, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $9, %k0, %k1 +; X32-NEXT: andb $2, %al +; X32-NEXT: shrb %al +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $54, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $10, %k0, %k1 +; X32-NEXT: movb %ch, %al +; X32-NEXT: andb $15, %al +; X32-NEXT: movl %eax, %edx ; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4] -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $12, %ecx -; X32-NEXT: andl $15, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastd %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $13, %ecx -; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2] -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $14, %ecx -; X32-NEXT: andl $3, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $15, %ecx -; X32-NEXT: andl $1, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $16, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %ecx, %edx -; X32-NEXT: andb $2, %dl -; X32-NEXT: shrb %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpsllw $8, %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $53, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $11, %k0, %k1 +; X32-NEXT: shrb $3, %al +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: shrl $12, %eax +; X32-NEXT: andl $15, %eax +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: shrl $13, %eax +; X32-NEXT: andb $1, %al +; X32-NEXT: kmovd %eax, %k3 +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: shrl $14, %eax +; X32-NEXT: andl $3, %eax +; X32-NEXT: kmovd %eax, %k4 +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: shrl $15, %eax +; X32-NEXT: andl $1, %eax +; X32-NEXT: kmovd %eax, %k5 +; X32-NEXT: movl %ecx, %edx +; X32-NEXT: shrl $16, %edx +; X32-NEXT: movl %edx, %eax +; X32-NEXT: andb $2, %al +; X32-NEXT: shrb %al +; X32-NEXT: kmovd %eax, %k6 +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: andb $15, %bl +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: shrb $2, %al +; X32-NEXT: kmovd %eax, %k7 +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $52, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $12, %k0, %k1 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $51, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $13, %k0, %k1 +; X32-NEXT: kxorq %k3, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $50, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $14, %k0, %k1 +; X32-NEXT: kxorq %k4, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $49, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $15, %k0, %k1 +; X32-NEXT: kxorq %k5, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $48, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $16, %k0, %k1 +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $47, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $17, %k0, %k1 +; X32-NEXT: kxorq %k6, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $46, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $18, %k0, %k1 +; X32-NEXT: kxorq %k7, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $45, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $19, %k0, %k1 +; X32-NEXT: shrb $3, %bl +; X32-NEXT: kmovd %ebx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $44, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $20, %k0, %k1 +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: shrb $4, %bl +; X32-NEXT: kmovd %ebx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $43, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $21, %k0, %k1 +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: shrb $5, %bl +; X32-NEXT: andb $1, %bl +; X32-NEXT: kmovd %ebx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $42, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $22, %k0, %k1 +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: shrb $6, %bl +; X32-NEXT: kmovd %ebx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $41, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $23, %k0, %k1 +; X32-NEXT: shrb $7, %dl +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $40, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $24, %k0, %k1 ; X32-NEXT: movl %ecx, %edx +; X32-NEXT: shrl $24, %edx +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $39, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $25, %k0, %k1 +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: andb $2, %bl +; X32-NEXT: shrb %bl +; X32-NEXT: kmovd %ebx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $38, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $26, %k0, %k1 ; X32-NEXT: andb $15, %dl ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: shrb $2, %bl -; X32-NEXT: kmovd %ebx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ebx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $37, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $27, %k0, %k1 ; X32-NEXT: shrb $3, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslld $24, %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %ecx, %edx -; X32-NEXT: shrb $4, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastd %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %ecx, %edx -; X32-NEXT: shrb $5, %dl -; X32-NEXT: andb $1, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpsllq $40, %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %ecx, %edx -; X32-NEXT: shrb $6, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: shrb $7, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpsllq $56, %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm6 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm6, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $24, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastq %xmm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $36, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $28, %k0, %k1 ; X32-NEXT: movl %ecx, %edx -; X32-NEXT: andb $2, %dl -; X32-NEXT: shrb %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6] -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm4 -; X32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm5, %ymm4, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm4[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: andb $15, %cl +; X32-NEXT: shrl $28, %edx +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $35, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $29, %k0, %k1 ; X32-NEXT: movl %ecx, %edx -; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm4 -; X32-NEXT: vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm4[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4] -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm4 -; X32-NEXT: vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255] -; X32-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm4[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $28, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastd %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm4 -; X32-NEXT: vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255] -; X32-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm4[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $29, %ecx -; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2] -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm1 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255] -; X32-NEXT: vpblendvb %ymm3, %ymm0, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $30, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm1 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255] -; X32-NEXT: vpblendvb %ymm3, %ymm0, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: shrl $31, %eax -; X32-NEXT: kmovd %eax, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0] -; X32-NEXT: vpblendvb %ymm7, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: kmovd %eax, %k1 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; X32-NEXT: vpmovm2b %k1, %zmm7 -; X32-NEXT: vmovdqa {{.*#+}} ymm4 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm4, %ymm1, %ymm7, %ymm1 -; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: shrl $29, %edx +; X32-NEXT: andb $1, %dl +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $34, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $30, %k0, %k1 +; X32-NEXT: movl %ecx, %edx +; X32-NEXT: shrl $30, %edx +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $33, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $31, %k0, %k1 +; X32-NEXT: shrl $31, %ecx +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $32, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $32, %k0, %k1 +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $31, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $33, %k0, %k1 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: andb $2, %cl ; X32-NEXT: shrb %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpsllw $8, %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $30, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $34, %k0, %k1 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: andb $15, %cl ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $29, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $35, %k0, %k1 ; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslld $24, %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $28, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $36, %k0, %k1 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrb $4, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastd %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $27, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $37, %k0, %k1 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrb $5, %cl ; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpsllq $40, %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $26, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $38, %k0, %k1 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrb $6, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $25, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $39, %k0, %k1 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrb $7, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpsllq $56, %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $24, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $40, %k0, %k1 ; X32-NEXT: movb %ah, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastq %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 ; X32-NEXT: andb $2, %cl ; X32-NEXT: shrb %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6] -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k2 ; X32-NEXT: movb %ah, %cl ; X32-NEXT: andb $15, %cl ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %edx, %k3 ; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4] -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $12, %ecx -; X32-NEXT: andl $15, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastd %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k4 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrl $13, %ecx ; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2] -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $14, %ecx -; X32-NEXT: andl $3, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $15, %ecx -; X32-NEXT: andl $1, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k5 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrl $16, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: andb $2, %dl ; X32-NEXT: shrb %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpsllw $8, %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %edx, %k6 ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: andb $15, %dl ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: shrb $2, %bl -; X32-NEXT: kmovd %ebx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ebx, %k7 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $23, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $41, %k0, %k1 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $22, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $42, %k0, %k1 +; X32-NEXT: kxorq %k3, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $21, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $43, %k0, %k1 +; X32-NEXT: kxorq %k4, %k1, %k1 +; X32-NEXT: movl %eax, %esi +; X32-NEXT: shrl $12, %esi +; X32-NEXT: andl $15, %esi +; X32-NEXT: kmovd %esi, %k2 +; X32-NEXT: movl %eax, %esi +; X32-NEXT: shrl $14, %esi +; X32-NEXT: andl $3, %esi +; X32-NEXT: kmovd %esi, %k3 +; X32-NEXT: movl %eax, %esi +; X32-NEXT: shrl $15, %esi +; X32-NEXT: andl $1, %esi +; X32-NEXT: kmovd %esi, %k4 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $20, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $44, %k0, %k1 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $19, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $45, %k0, %k1 +; X32-NEXT: kxorq %k5, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $18, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $46, %k0, %k1 +; X32-NEXT: kxorq %k3, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $17, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $47, %k0, %k1 +; X32-NEXT: kxorq %k4, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $16, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $48, %k0, %k1 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $15, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $49, %k0, %k1 +; X32-NEXT: kxorq %k6, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $14, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $50, %k0, %k1 +; X32-NEXT: kxorq %k7, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $13, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $51, %k0, %k1 ; X32-NEXT: shrb $3, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslld $24, %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $12, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k4 +; X32-NEXT: kshiftrq $52, %k4, %k0 ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: shrb $4, %dl ; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastd %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kxorq %k1, %k0, %k5 ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: shrb $5, %dl ; X32-NEXT: andb $1, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpsllq $40, %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %edx, %k6 ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: shrb $6, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %edx, %k7 ; X32-NEXT: shrb $7, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpsllq $56, %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; X32-NEXT: vpblendvb %ymm6, %ymm4, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k0 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrl $24, %ecx ; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastq %xmm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; X32-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm2 ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: andb $2, %dl ; X32-NEXT: shrb %dl -; X32-NEXT: kmovd %edx, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6] -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vextracti64x4 $1, %zmm2, %ymm1 -; X32-NEXT: vpblendvb %ymm5, %ymm1, %ymm0, %ymm1 +; X32-NEXT: kmovd %edx, %k2 ; X32-NEXT: andb $15, %cl ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k0 +; X32-NEXT: kmovd %edx, %k3 +; X32-NEXT: kshiftlq $63, %k5, %k5 +; X32-NEXT: kshiftrq $11, %k5, %k5 +; X32-NEXT: kxorq %k4, %k5, %k4 +; X32-NEXT: kshiftrq $53, %k4, %k5 +; X32-NEXT: kxorq %k6, %k5, %k5 +; X32-NEXT: kshiftlq $63, %k5, %k5 +; X32-NEXT: kshiftrq $10, %k5, %k5 +; X32-NEXT: kxorq %k4, %k5, %k5 +; X32-NEXT: kshiftrq $54, %k5, %k4 +; X32-NEXT: kxorq %k7, %k4, %k6 ; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k1 +; X32-NEXT: kmovd %ecx, %k4 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrl $29, %ecx ; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k2 -; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k3 -; X32-NEXT: vpmovm2b %k3, %zmm0 -; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; X32-NEXT: vpmovm2b %k0, %zmm2 -; X32-NEXT: vpbroadcastw %xmm2, %xmm2 -; X32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; X32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm4, %ymm1, %ymm2, %ymm1 -; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; X32-NEXT: vpmovm2b %k1, %zmm2 -; X32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4] -; X32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; X32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255] -; X32-NEXT: vpblendvb %ymm4, %ymm1, %ymm2, %ymm1 -; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; X32-NEXT: kmovd %ecx, %k7 +; X32-NEXT: kshiftlq $63, %k6, %k6 +; X32-NEXT: kshiftrq $9, %k6, %k6 +; X32-NEXT: kxorq %k5, %k6, %k5 +; X32-NEXT: kshiftrq $55, %k5, %k6 +; X32-NEXT: kxorq %k0, %k6, %k0 +; X32-NEXT: kshiftlq $63, %k0, %k0 +; X32-NEXT: kshiftrq $8, %k0, %k0 +; X32-NEXT: kxorq %k5, %k0, %k0 +; X32-NEXT: kshiftrq $56, %k0, %k5 +; X32-NEXT: kxorq %k1, %k5, %k1 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrl $28, %ecx -; X32-NEXT: kmovd %ecx, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm2 -; X32-NEXT: vpbroadcastd %xmm2, %xmm2 -; X32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; X32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255] -; X32-NEXT: vpblendvb %ymm4, %ymm1, %ymm2, %ymm1 -; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; X32-NEXT: vpmovm2b %k2, %zmm2 -; X32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2] -; X32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; X32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255] -; X32-NEXT: vpblendvb %ymm4, %ymm1, %ymm2, %ymm1 +; X32-NEXT: kmovd %ecx, %k5 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrl $30, %ecx -; X32-NEXT: kmovd %ecx, %k0 -; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; X32-NEXT: vpmovm2b %k0, %zmm2 -; X32-NEXT: vpbroadcastw %xmm2, %xmm2 -; X32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; X32-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1 -; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; X32-NEXT: kmovd %ecx, %k6 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $7, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $57, %k0, %k1 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $6, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $58, %k0, %k1 +; X32-NEXT: kxorq %k3, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $5, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $59, %k0, %k1 +; X32-NEXT: kxorq %k4, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $4, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $60, %k0, %k1 +; X32-NEXT: kxorq %k5, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $3, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $61, %k0, %k1 +; X32-NEXT: kxorq %k7, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $2, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $62, %k0, %k1 +; X32-NEXT: kxorq %k6, %k1, %k1 ; X32-NEXT: shrl $31, %eax -; X32-NEXT: kmovd %eax, %k0 +; X32-NEXT: kmovd %eax, %k2 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al -; X32-NEXT: vpmovb2m %zmm0, %k1 -; X32-NEXT: kshiftlq $1, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 ; X32-NEXT: kshiftrq $1, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k0, %k0 -; X32-NEXT: korq %k0, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftlq $1, %k0, %k0 +; X32-NEXT: kshiftrq $1, %k0, %k0 +; X32-NEXT: kshiftlq $63, %k2, %k1 +; X32-NEXT: korq %k1, %k0, %k1 ; X32-NEXT: vpbroadcastb %eax, %zmm0 {%k1} {z} +; X32-NEXT: popl %esi ; X32-NEXT: popl %ebx ; X32-NEXT: retl ; @@ -2057,719 +1708,541 @@ define i64 @test_mm512_mask_test_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> % ; X32-NEXT: movl %esp, %ebp ; X32-NEXT: .cfi_def_cfa_register %ebp ; X32-NEXT: pushl %ebx -; X32-NEXT: andl $-64, %esp -; X32-NEXT: subl $256, %esp # imm = 0x100 +; X32-NEXT: pushl %esi +; X32-NEXT: andl $-8, %esp +; X32-NEXT: subl $8, %esp +; X32-NEXT: .cfi_offset %esi, -16 ; X32-NEXT: .cfi_offset %ebx, -12 -; X32-NEXT: vmovaps %zmm1, {{[0-9]+}}(%esp) # 64-byte Spill -; X32-NEXT: vmovaps %zmm0, {{[0-9]+}}(%esp) # 64-byte Spill -; X32-NEXT: movl 8(%ebp), %eax -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: andb $2, %cl -; X32-NEXT: shrb %cl +; X32-NEXT: movl 8(%ebp), %ecx ; X32-NEXT: kmovd %ecx, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: vpsllw $8, %xmm0, %xmm0 -; X32-NEXT: kmovd %eax, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: andb $15, %cl -; X32-NEXT: movl %ecx, %edx +; X32-NEXT: kshiftrq $1, %k0, %k1 +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: andb $2, %al +; X32-NEXT: shrb %al +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $62, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $2, %k0, %k1 +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: andb $15, %al +; X32-NEXT: movl %eax, %edx ; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vpbroadcastw %xmm1, %xmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vpslld $24, %xmm1, %xmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $4, %cl -; X32-NEXT: kmovd %ecx, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vpbroadcastd %xmm1, %xmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $5, %cl -; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vpsllq $40, %xmm1, %xmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $6, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $7, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpsllq $56, %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movb %ah, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastq %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: andb $2, %cl -; X32-NEXT: shrb %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6] -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movb %ah, %cl -; X32-NEXT: andb $15, %cl -; X32-NEXT: movl %ecx, %edx +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $61, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $3, %k0, %k1 +; X32-NEXT: shrb $3, %al +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $60, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $4, %k0, %k1 +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: shrb $4, %al +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $59, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $5, %k0, %k1 +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: shrb $5, %al +; X32-NEXT: andb $1, %al +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $58, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $6, %k0, %k1 +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: shrb $6, %al +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $57, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $7, %k0, %k1 +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: shrb $7, %al +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $56, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $8, %k0, %k1 +; X32-NEXT: movb %ch, %al +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: andb $2, %al +; X32-NEXT: shrb %al +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: movb %ch, %al +; X32-NEXT: andb $15, %al +; X32-NEXT: movl %eax, %edx ; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4] -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $12, %ecx -; X32-NEXT: andl $15, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastd %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $13, %ecx -; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2] -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $14, %ecx -; X32-NEXT: andl $3, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $15, %ecx -; X32-NEXT: andl $1, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $16, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %ecx, %edx -; X32-NEXT: andb $2, %dl -; X32-NEXT: shrb %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpsllw $8, %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %edx, %k3 +; X32-NEXT: shrb $3, %al +; X32-NEXT: kmovd %eax, %k4 +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: shrl $13, %eax +; X32-NEXT: andb $1, %al +; X32-NEXT: kmovd %eax, %k5 +; X32-NEXT: movl %ecx, %edx +; X32-NEXT: shrl $16, %edx +; X32-NEXT: movl %edx, %eax +; X32-NEXT: andb $2, %al +; X32-NEXT: shrb %al +; X32-NEXT: kmovd %eax, %k6 +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: andb $15, %bl +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: shrb $2, %al +; X32-NEXT: kmovd %eax, %k7 +; X32-NEXT: movl 12(%ebp), %eax +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $55, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $9, %k0, %k1 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $54, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $10, %k0, %k1 +; X32-NEXT: kxorq %k3, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $53, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $11, %k0, %k1 +; X32-NEXT: kxorq %k4, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $52, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $12, %k0, %k1 +; X32-NEXT: movl %ecx, %esi +; X32-NEXT: shrl $12, %esi +; X32-NEXT: andl $15, %esi +; X32-NEXT: kmovd %esi, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $51, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $13, %k0, %k1 +; X32-NEXT: kxorq %k5, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $50, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $14, %k0, %k1 +; X32-NEXT: movl %ecx, %esi +; X32-NEXT: shrl $14, %esi +; X32-NEXT: andl $3, %esi +; X32-NEXT: kmovd %esi, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $49, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $15, %k0, %k1 +; X32-NEXT: movl %ecx, %esi +; X32-NEXT: shrl $15, %esi +; X32-NEXT: andl $1, %esi +; X32-NEXT: kmovd %esi, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $48, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $16, %k0, %k1 +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $47, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $17, %k0, %k1 +; X32-NEXT: kxorq %k6, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $46, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $18, %k0, %k1 +; X32-NEXT: kxorq %k7, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $45, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $19, %k0, %k1 +; X32-NEXT: shrb $3, %bl +; X32-NEXT: kmovd %ebx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $44, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $20, %k0, %k1 +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: shrb $4, %bl +; X32-NEXT: kmovd %ebx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $43, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $21, %k0, %k1 +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: shrb $5, %bl +; X32-NEXT: andb $1, %bl +; X32-NEXT: kmovd %ebx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $42, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $22, %k0, %k1 +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: shrb $6, %bl +; X32-NEXT: kmovd %ebx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $41, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $23, %k0, %k1 +; X32-NEXT: shrb $7, %dl +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $40, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $24, %k0, %k1 ; X32-NEXT: movl %ecx, %edx +; X32-NEXT: shrl $24, %edx +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $39, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $25, %k0, %k1 +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: andb $2, %bl +; X32-NEXT: shrb %bl +; X32-NEXT: kmovd %ebx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $38, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $26, %k0, %k1 ; X32-NEXT: andb $15, %dl ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: shrb $2, %bl -; X32-NEXT: kmovd %ebx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ebx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $37, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $27, %k0, %k1 ; X32-NEXT: shrb $3, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslld $24, %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $36, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $28, %k0, %k1 ; X32-NEXT: movl %ecx, %edx -; X32-NEXT: shrb $4, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastd %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: shrl $28, %edx +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $35, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $29, %k0, %k1 ; X32-NEXT: movl %ecx, %edx -; X32-NEXT: shrb $5, %dl +; X32-NEXT: shrl $29, %edx ; X32-NEXT: andb $1, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpsllq $40, %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %ecx, %edx -; X32-NEXT: shrb $6, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: shrb $7, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpsllq $56, %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $24, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastq %xmm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %ecx, %edx -; X32-NEXT: andb $2, %dl -; X32-NEXT: shrb %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6] -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: andb $15, %cl +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $34, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $30, %k0, %k1 ; X32-NEXT: movl %ecx, %edx -; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4] -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255] -; X32-NEXT: vpblendvb %ymm5, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $28, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastd %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255] -; X32-NEXT: vpblendvb %ymm4, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $29, %ecx -; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2] -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm1 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255] -; X32-NEXT: vpblendvb %ymm3, %ymm0, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $30, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm1 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255] -; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: shrl $31, %eax -; X32-NEXT: kmovd %eax, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0] -; X32-NEXT: vpblendvb %ymm7, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl 12(%ebp), %eax -; X32-NEXT: kmovd %eax, %k1 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; X32-NEXT: vpmovm2b %k1, %zmm7 -; X32-NEXT: vmovdqa {{.*#+}} ymm6 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm6, %ymm1, %ymm7, %ymm1 -; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: shrl $30, %edx +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $33, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $31, %k0, %k1 +; X32-NEXT: shrl $31, %ecx +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $32, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $32, %k0, %k1 +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $31, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $33, %k0, %k1 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: andb $2, %cl ; X32-NEXT: shrb %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpsllw $8, %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $30, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $34, %k0, %k1 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: andb $15, %cl ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $29, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $35, %k0, %k1 ; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslld $24, %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $28, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $36, %k0, %k1 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrb $4, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastd %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $27, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $37, %k0, %k1 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrb $5, %cl ; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpsllq $40, %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $26, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $38, %k0, %k1 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrb $6, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $25, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $39, %k0, %k1 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrb $7, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpsllq $56, %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $24, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $40, %k0, %k1 ; X32-NEXT: movb %ah, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastq %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 ; X32-NEXT: andb $2, %cl ; X32-NEXT: shrb %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6] -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k2 ; X32-NEXT: movb %ah, %cl ; X32-NEXT: andb $15, %cl ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4] -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $12, %ecx -; X32-NEXT: andl $15, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastd %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $13, %ecx -; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2] -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $14, %ecx -; X32-NEXT: andl $3, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %edx, %k3 +; X32-NEXT: shrb $3, %cl +; X32-NEXT: kmovd %ecx, %k4 ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $15, %ecx -; X32-NEXT: andl $1, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: shrl $13, %ecx +; X32-NEXT: andb $1, %cl +; X32-NEXT: kmovd %ecx, %k5 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrl $16, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: andb $2, %dl ; X32-NEXT: shrb %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpsllw $8, %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %edx, %k6 ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: andb $15, %dl ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: shrb $2, %bl -; X32-NEXT: kmovd %ebx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ebx, %k7 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $23, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $41, %k0, %k1 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $22, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $42, %k0, %k1 +; X32-NEXT: kxorq %k3, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $21, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $43, %k0, %k1 +; X32-NEXT: kxorq %k4, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $20, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $44, %k0, %k1 +; X32-NEXT: movl %eax, %esi +; X32-NEXT: shrl $12, %esi +; X32-NEXT: andl $15, %esi +; X32-NEXT: kmovd %esi, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $19, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $45, %k0, %k1 +; X32-NEXT: kxorq %k5, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $18, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $46, %k0, %k1 +; X32-NEXT: movl %eax, %esi +; X32-NEXT: shrl $14, %esi +; X32-NEXT: andl $3, %esi +; X32-NEXT: kmovd %esi, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $17, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $47, %k0, %k1 +; X32-NEXT: movl %eax, %esi +; X32-NEXT: shrl $15, %esi +; X32-NEXT: andl $1, %esi +; X32-NEXT: kmovd %esi, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $16, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $48, %k0, %k1 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $15, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $49, %k0, %k1 +; X32-NEXT: kxorq %k6, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $14, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $50, %k0, %k1 +; X32-NEXT: kxorq %k7, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $13, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $51, %k0, %k1 ; X32-NEXT: shrb $3, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslld $24, %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $12, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $52, %k0, %k1 ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: shrb $4, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastd %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: shrb $5, %dl ; X32-NEXT: andb $1, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpsllq $40, %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %edx, %k2 ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: shrb $6, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %edx, %k3 ; X32-NEXT: shrb $7, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpsllq $56, %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k4 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrl $24, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastq %xmm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k5 ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: andb $2, %dl ; X32-NEXT: shrb %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6] -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; X32-NEXT: kmovd %edx, %k6 ; X32-NEXT: andb $15, %cl ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k0 -; X32-NEXT: vpmovb2m %zmm0, %k1 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k1, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm0 +; X32-NEXT: kmovd %edx, %k7 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $11, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $53, %k0, %k1 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $10, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $54, %k0, %k1 +; X32-NEXT: kxorq %k3, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $9, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $55, %k0, %k1 +; X32-NEXT: kxorq %k4, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $8, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $56, %k0, %k1 +; X32-NEXT: kxorq %k5, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $7, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $57, %k0, %k1 +; X32-NEXT: kxorq %k6, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $6, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $58, %k0, %k1 +; X32-NEXT: kxorq %k7, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $5, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $59, %k0, %k1 ; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4] -; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm6 -; X32-NEXT: vpblendvb %ymm5, %ymm6, %ymm1, %ymm1 -; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $4, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $60, %k0, %k1 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrl $28, %ecx -; X32-NEXT: kmovd %ecx, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm5 -; X32-NEXT: vpbroadcastd %xmm5, %xmm5 -; X32-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm5 -; X32-NEXT: vpblendvb %ymm4, %ymm1, %ymm5, %ymm1 -; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $3, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $61, %k0, %k1 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrl $29, %ecx ; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm4 -; X32-NEXT: vpslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2] -; X32-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4 -; X32-NEXT: vpblendvb %ymm3, %ymm1, %ymm4, %ymm1 -; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $2, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $62, %k0, %k1 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrl $30, %ecx -; X32-NEXT: kmovd %ecx, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm3 -; X32-NEXT: vpbroadcastw %xmm3, %xmm3 -; X32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm3, %ymm1 -; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $1, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 ; X32-NEXT: kshiftlq $1, %k0, %k0 ; X32-NEXT: kshiftrq $1, %k0, %k0 ; X32-NEXT: shrl $31, %eax ; X32-NEXT: kmovd %eax, %k1 ; X32-NEXT: kshiftlq $63, %k1, %k1 ; X32-NEXT: korq %k1, %k0, %k1 -; X32-NEXT: vmovdqa64 {{[0-9]+}}(%esp), %zmm0 # 64-byte Reload -; X32-NEXT: vmovdqa64 {{[0-9]+}}(%esp), %zmm1 # 64-byte Reload -; X32-NEXT: vptestmb %zmm1, %zmm0, %k0 {%k1} -; X32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: vptestmb %zmm0, %zmm1, %k0 {%k1} +; X32-NEXT: kmovq %k0, (%esp) +; X32-NEXT: movl (%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: leal -4(%ebp), %esp +; X32-NEXT: leal -8(%ebp), %esp +; X32-NEXT: popl %esi ; X32-NEXT: popl %ebx ; X32-NEXT: popl %ebp ; X32-NEXT: vzeroupper @@ -2882,719 +2355,541 @@ define i64 @test_mm512_mask_testn_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> ; X32-NEXT: movl %esp, %ebp ; X32-NEXT: .cfi_def_cfa_register %ebp ; X32-NEXT: pushl %ebx -; X32-NEXT: andl $-64, %esp -; X32-NEXT: subl $256, %esp # imm = 0x100 +; X32-NEXT: pushl %esi +; X32-NEXT: andl $-8, %esp +; X32-NEXT: subl $8, %esp +; X32-NEXT: .cfi_offset %esi, -16 ; X32-NEXT: .cfi_offset %ebx, -12 -; X32-NEXT: vmovaps %zmm1, {{[0-9]+}}(%esp) # 64-byte Spill -; X32-NEXT: vmovaps %zmm0, {{[0-9]+}}(%esp) # 64-byte Spill -; X32-NEXT: movl 8(%ebp), %eax -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: andb $2, %cl -; X32-NEXT: shrb %cl +; X32-NEXT: movl 8(%ebp), %ecx ; X32-NEXT: kmovd %ecx, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: vpsllw $8, %xmm0, %xmm0 -; X32-NEXT: kmovd %eax, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: andb $15, %cl -; X32-NEXT: movl %ecx, %edx +; X32-NEXT: kshiftrq $1, %k0, %k1 +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: andb $2, %al +; X32-NEXT: shrb %al +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $62, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $2, %k0, %k1 +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: andb $15, %al +; X32-NEXT: movl %eax, %edx ; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vpbroadcastw %xmm1, %xmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vpslld $24, %xmm1, %xmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $4, %cl -; X32-NEXT: kmovd %ecx, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vpbroadcastd %xmm1, %xmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $5, %cl -; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vpsllq $40, %xmm1, %xmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $6, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrb $7, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpsllq $56, %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movb %ah, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastq %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: andb $2, %cl -; X32-NEXT: shrb %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6] -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movb %ah, %cl -; X32-NEXT: andb $15, %cl -; X32-NEXT: movl %ecx, %edx +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $61, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $3, %k0, %k1 +; X32-NEXT: shrb $3, %al +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $60, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $4, %k0, %k1 +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: shrb $4, %al +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $59, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $5, %k0, %k1 +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: shrb $5, %al +; X32-NEXT: andb $1, %al +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $58, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $6, %k0, %k1 +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: shrb $6, %al +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $57, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $7, %k0, %k1 +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: shrb $7, %al +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $56, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $8, %k0, %k1 +; X32-NEXT: movb %ch, %al +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: andb $2, %al +; X32-NEXT: shrb %al +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: movb %ch, %al +; X32-NEXT: andb $15, %al +; X32-NEXT: movl %eax, %edx ; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4] -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $12, %ecx -; X32-NEXT: andl $15, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastd %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $13, %ecx -; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2] -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $14, %ecx -; X32-NEXT: andl $3, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $15, %ecx -; X32-NEXT: andl $1, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $16, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %ecx, %edx -; X32-NEXT: andb $2, %dl -; X32-NEXT: shrb %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpsllw $8, %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %edx, %k3 +; X32-NEXT: shrb $3, %al +; X32-NEXT: kmovd %eax, %k4 +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: shrl $13, %eax +; X32-NEXT: andb $1, %al +; X32-NEXT: kmovd %eax, %k5 +; X32-NEXT: movl %ecx, %edx +; X32-NEXT: shrl $16, %edx +; X32-NEXT: movl %edx, %eax +; X32-NEXT: andb $2, %al +; X32-NEXT: shrb %al +; X32-NEXT: kmovd %eax, %k6 +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: andb $15, %bl +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: shrb $2, %al +; X32-NEXT: kmovd %eax, %k7 +; X32-NEXT: movl 12(%ebp), %eax +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $55, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $9, %k0, %k1 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $54, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $10, %k0, %k1 +; X32-NEXT: kxorq %k3, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $53, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $11, %k0, %k1 +; X32-NEXT: kxorq %k4, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $52, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $12, %k0, %k1 +; X32-NEXT: movl %ecx, %esi +; X32-NEXT: shrl $12, %esi +; X32-NEXT: andl $15, %esi +; X32-NEXT: kmovd %esi, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $51, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $13, %k0, %k1 +; X32-NEXT: kxorq %k5, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $50, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $14, %k0, %k1 +; X32-NEXT: movl %ecx, %esi +; X32-NEXT: shrl $14, %esi +; X32-NEXT: andl $3, %esi +; X32-NEXT: kmovd %esi, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $49, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $15, %k0, %k1 +; X32-NEXT: movl %ecx, %esi +; X32-NEXT: shrl $15, %esi +; X32-NEXT: andl $1, %esi +; X32-NEXT: kmovd %esi, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $48, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $16, %k0, %k1 +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $47, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $17, %k0, %k1 +; X32-NEXT: kxorq %k6, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $46, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $18, %k0, %k1 +; X32-NEXT: kxorq %k7, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $45, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $19, %k0, %k1 +; X32-NEXT: shrb $3, %bl +; X32-NEXT: kmovd %ebx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $44, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $20, %k0, %k1 +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: shrb $4, %bl +; X32-NEXT: kmovd %ebx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $43, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $21, %k0, %k1 +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: shrb $5, %bl +; X32-NEXT: andb $1, %bl +; X32-NEXT: kmovd %ebx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $42, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $22, %k0, %k1 +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: shrb $6, %bl +; X32-NEXT: kmovd %ebx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $41, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $23, %k0, %k1 +; X32-NEXT: shrb $7, %dl +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $40, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $24, %k0, %k1 ; X32-NEXT: movl %ecx, %edx +; X32-NEXT: shrl $24, %edx +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $39, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $25, %k0, %k1 +; X32-NEXT: movl %edx, %ebx +; X32-NEXT: andb $2, %bl +; X32-NEXT: shrb %bl +; X32-NEXT: kmovd %ebx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $38, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $26, %k0, %k1 ; X32-NEXT: andb $15, %dl ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: shrb $2, %bl -; X32-NEXT: kmovd %ebx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ebx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $37, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $27, %k0, %k1 ; X32-NEXT: shrb $3, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslld $24, %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $36, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $28, %k0, %k1 ; X32-NEXT: movl %ecx, %edx -; X32-NEXT: shrb $4, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastd %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: shrl $28, %edx +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $35, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $29, %k0, %k1 ; X32-NEXT: movl %ecx, %edx -; X32-NEXT: shrb $5, %dl +; X32-NEXT: shrl $29, %edx ; X32-NEXT: andb $1, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpsllq $40, %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %ecx, %edx -; X32-NEXT: shrb $6, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: shrb $7, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpsllq $56, %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $24, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastq %xmm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %ecx, %edx -; X32-NEXT: andb $2, %dl -; X32-NEXT: shrb %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6] -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: andb $15, %cl +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $34, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $30, %k0, %k1 ; X32-NEXT: movl %ecx, %edx -; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4] -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255] -; X32-NEXT: vpblendvb %ymm5, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $28, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastd %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255] -; X32-NEXT: vpblendvb %ymm4, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $29, %ecx -; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2] -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm1 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255] -; X32-NEXT: vpblendvb %ymm3, %ymm0, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $30, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm1 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255] -; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: shrl $31, %eax -; X32-NEXT: kmovd %eax, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0] -; X32-NEXT: vpblendvb %ymm7, %ymm1, %ymm0, %ymm0 -; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl 12(%ebp), %eax -; X32-NEXT: kmovd %eax, %k1 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; X32-NEXT: vpmovm2b %k1, %zmm7 -; X32-NEXT: vmovdqa {{.*#+}} ymm6 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm6, %ymm1, %ymm7, %ymm1 -; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: shrl $30, %edx +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $33, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $31, %k0, %k1 +; X32-NEXT: shrl $31, %ecx +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $32, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $32, %k0, %k1 +; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $31, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $33, %k0, %k1 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: andb $2, %cl ; X32-NEXT: shrb %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpsllw $8, %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $30, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $34, %k0, %k1 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: andb $15, %cl ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $29, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $35, %k0, %k1 ; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslld $24, %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $28, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $36, %k0, %k1 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrb $4, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastd %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $27, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $37, %k0, %k1 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrb $5, %cl ; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpsllq $40, %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $26, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $38, %k0, %k1 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrb $6, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $25, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $39, %k0, %k1 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrb $7, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpsllq $56, %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $24, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $40, %k0, %k1 ; X32-NEXT: movb %ah, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastq %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 ; X32-NEXT: andb $2, %cl ; X32-NEXT: shrb %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6] -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k2 ; X32-NEXT: movb %ah, %cl ; X32-NEXT: andb $15, %cl ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %edx, %k3 ; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4] -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $12, %ecx -; X32-NEXT: andl $15, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastd %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k4 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrl $13, %ecx ; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2] -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $14, %ecx -; X32-NEXT: andl $3, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $15, %ecx -; X32-NEXT: andl $1, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k5 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrl $16, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: andb $2, %dl ; X32-NEXT: shrb %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpsllw $8, %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %edx, %k6 ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: andb $15, %dl ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: shrb $2, %bl -; X32-NEXT: kmovd %ebx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ebx, %k7 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $23, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $41, %k0, %k1 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $22, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $42, %k0, %k1 +; X32-NEXT: kxorq %k3, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $21, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $43, %k0, %k1 +; X32-NEXT: kxorq %k4, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $20, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $44, %k0, %k1 +; X32-NEXT: movl %eax, %esi +; X32-NEXT: shrl $12, %esi +; X32-NEXT: andl $15, %esi +; X32-NEXT: kmovd %esi, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $19, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $45, %k0, %k1 +; X32-NEXT: kxorq %k5, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $18, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $46, %k0, %k1 +; X32-NEXT: movl %eax, %esi +; X32-NEXT: shrl $14, %esi +; X32-NEXT: andl $3, %esi +; X32-NEXT: kmovd %esi, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $17, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $47, %k0, %k1 +; X32-NEXT: movl %eax, %esi +; X32-NEXT: shrl $15, %esi +; X32-NEXT: andl $1, %esi +; X32-NEXT: kmovd %esi, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $16, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $48, %k0, %k1 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $15, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $49, %k0, %k1 +; X32-NEXT: kxorq %k6, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $14, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $50, %k0, %k1 +; X32-NEXT: kxorq %k7, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $13, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $51, %k0, %k1 ; X32-NEXT: shrb $3, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslld $24, %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $12, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $52, %k0, %k1 ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: shrb $4, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastd %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %edx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: shrb $5, %dl ; X32-NEXT: andb $1, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpsllq $40, %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %edx, %k2 ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: shrb $6, %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %edx, %k3 ; X32-NEXT: shrb $7, %cl -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpsllq $56, %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k4 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrl $24, %ecx -; X32-NEXT: kmovd %ecx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpbroadcastq %xmm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k5 ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: andb $2, %dl ; X32-NEXT: shrb %dl -; X32-NEXT: kmovd %edx, %k1 -; X32-NEXT: vpmovm2b %k1, %zmm0 -; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6] -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; X32-NEXT: kmovd %edx, %k6 ; X32-NEXT: andb $15, %cl ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k0 -; X32-NEXT: vpmovb2m %zmm0, %k1 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: vpbroadcastw %xmm0, %xmm0 -; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-NEXT: vpmovm2b %k1, %zmm1 -; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6 -; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255] -; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm0 +; X32-NEXT: kmovd %edx, %k7 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $11, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $53, %k0, %k1 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $10, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $54, %k0, %k1 +; X32-NEXT: kxorq %k3, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $9, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $55, %k0, %k1 +; X32-NEXT: kxorq %k4, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $8, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $56, %k0, %k1 +; X32-NEXT: kxorq %k5, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $7, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $57, %k0, %k1 +; X32-NEXT: kxorq %k6, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $6, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $58, %k0, %k1 +; X32-NEXT: kxorq %k7, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $5, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $59, %k0, %k1 ; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm1 -; X32-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4] -; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm6 -; X32-NEXT: vpblendvb %ymm5, %ymm6, %ymm1, %ymm1 -; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $4, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $60, %k0, %k1 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrl $28, %ecx -; X32-NEXT: kmovd %ecx, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm5 -; X32-NEXT: vpbroadcastd %xmm5, %xmm5 -; X32-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm5 -; X32-NEXT: vpblendvb %ymm4, %ymm1, %ymm5, %ymm1 -; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $3, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $61, %k0, %k1 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrl $29, %ecx ; X32-NEXT: andb $1, %cl -; X32-NEXT: kmovd %ecx, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm4 -; X32-NEXT: vpslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2] -; X32-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4 -; X32-NEXT: vpblendvb %ymm3, %ymm1, %ymm4, %ymm1 -; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm0 -; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $2, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $62, %k0, %k1 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrl $30, %ecx -; X32-NEXT: kmovd %ecx, %k0 -; X32-NEXT: vpmovm2b %k0, %zmm3 -; X32-NEXT: vpbroadcastw %xmm3, %xmm3 -; X32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 -; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm3, %ymm1 -; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; X32-NEXT: vpmovb2m %zmm0, %k0 +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $1, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 ; X32-NEXT: kshiftlq $1, %k0, %k0 ; X32-NEXT: kshiftrq $1, %k0, %k0 ; X32-NEXT: shrl $31, %eax ; X32-NEXT: kmovd %eax, %k1 ; X32-NEXT: kshiftlq $63, %k1, %k1 ; X32-NEXT: korq %k1, %k0, %k1 -; X32-NEXT: vmovdqa64 {{[0-9]+}}(%esp), %zmm0 # 64-byte Reload -; X32-NEXT: vmovdqa64 {{[0-9]+}}(%esp), %zmm1 # 64-byte Reload -; X32-NEXT: vptestnmb %zmm1, %zmm0, %k0 {%k1} -; X32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: vptestnmb %zmm0, %zmm1, %k0 {%k1} +; X32-NEXT: kmovq %k0, (%esp) +; X32-NEXT: movl (%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: leal -4(%ebp), %esp +; X32-NEXT: leal -8(%ebp), %esp +; X32-NEXT: popl %esi ; X32-NEXT: popl %ebx ; X32-NEXT: popl %ebp ; X32-NEXT: vzeroupper diff --git a/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll b/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll index 57271593bd0..f19e09758f1 100644 --- a/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll +++ b/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll @@ -1795,753 +1795,574 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { ; AVX512F-32-NEXT: .cfi_def_cfa_offset 8 ; AVX512F-32-NEXT: pushl %esi ; AVX512F-32-NEXT: .cfi_def_cfa_offset 12 -; AVX512F-32-NEXT: subl $60, %esp -; AVX512F-32-NEXT: .cfi_def_cfa_offset 72 +; AVX512F-32-NEXT: subl $68, %esp +; AVX512F-32-NEXT: .cfi_def_cfa_offset 80 ; AVX512F-32-NEXT: .cfi_offset %esi, -12 ; AVX512F-32-NEXT: .cfi_offset %ebx, -8 -; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm6 -; AVX512F-32-NEXT: vmovdqa64 %zmm0, %zmm5 -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrb $5, %al -; AVX512F-32-NEXT: andb $1, %al -; AVX512F-32-NEXT: movl %ecx, %ebx -; AVX512F-32-NEXT: andb $15, %bl -; AVX512F-32-NEXT: movl %ecx, %edx -; AVX512F-32-NEXT: andb $2, %dl -; AVX512F-32-NEXT: shrb %dl -; AVX512F-32-NEXT: kmovd %edx, %k0 +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; AVX512F-32-NEXT: movl %ebx, %eax +; AVX512F-32-NEXT: shrl $16, %eax ; AVX512F-32-NEXT: movl %ebx, %edx -; AVX512F-32-NEXT: shrb $2, %bl -; AVX512F-32-NEXT: kmovd %ebx, %k1 -; AVX512F-32-NEXT: movl %ecx, %ebx -; AVX512F-32-NEXT: shrb $4, %bl -; AVX512F-32-NEXT: shrb $3, %dl -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 -; AVX512F-32-NEXT: vpsllw $8, %xmm2, %xmm2 -; AVX512F-32-NEXT: kmovd %ecx, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm3 -; AVX512F-32-NEXT: vpbroadcastw %xmm3, %xmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm3 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 -; AVX512F-32-NEXT: kmovd %edx, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vpslld $24, %xmm3, %xmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm3 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 -; AVX512F-32-NEXT: kmovd %ebx, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vpbroadcastd %xmm3, %xmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm3 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 -; AVX512F-32-NEXT: kmovd %eax, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vpsllq $40, %xmm3, %xmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm3 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[4,5,6,7] -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrb $6, %al -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrb $7, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpsllq $56, %xmm2, %xmm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: movb %ch, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastq %xmm2, %xmm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: andb $2, %al -; AVX512F-32-NEXT: shrb %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6] -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: movb %ch, %dl ; AVX512F-32-NEXT: andb $15, %dl -; AVX512F-32-NEXT: movl %edx, %eax +; AVX512F-32-NEXT: movl %ebx, %ecx +; AVX512F-32-NEXT: andb $2, %cl +; AVX512F-32-NEXT: shrb %cl +; AVX512F-32-NEXT: kmovd %ecx, %k1 +; AVX512F-32-NEXT: movl %edx, %ecx ; AVX512F-32-NEXT: shrb $2, %dl -; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: shrb $3, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4] -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrl $12, %eax -; AVX512F-32-NEXT: andl $15, %eax -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrl $13, %eax -; AVX512F-32-NEXT: andb $1, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2] -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrl $14, %eax -; AVX512F-32-NEXT: andl $3, %eax -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrl $15, %eax -; AVX512F-32-NEXT: andl $1, %eax -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0] -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrl $16, %eax -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: kmovd %edx, %k2 +; AVX512F-32-NEXT: movb %bh, %dl +; AVX512F-32-NEXT: andb $15, %dl +; AVX512F-32-NEXT: shrb $3, %cl +; AVX512F-32-NEXT: kmovd %ecx, %k0 +; AVX512F-32-NEXT: movl %ebx, %ecx +; AVX512F-32-NEXT: shrb $4, %cl +; AVX512F-32-NEXT: kmovd %ecx, %k3 +; AVX512F-32-NEXT: movl %ebx, %ecx +; AVX512F-32-NEXT: shrb $5, %cl +; AVX512F-32-NEXT: andb $1, %cl +; AVX512F-32-NEXT: kmovd %ecx, %k4 +; AVX512F-32-NEXT: movl %ebx, %ecx +; AVX512F-32-NEXT: shrb $6, %cl +; AVX512F-32-NEXT: kmovd %ecx, %k6 +; AVX512F-32-NEXT: movl %ebx, %ecx +; AVX512F-32-NEXT: shrb $7, %cl +; AVX512F-32-NEXT: kmovd %ebx, %k5 +; AVX512F-32-NEXT: kshiftrq $1, %k5, %k7 +; AVX512F-32-NEXT: kxorq %k1, %k7, %k1 +; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 +; AVX512F-32-NEXT: kshiftrq $62, %k1, %k1 +; AVX512F-32-NEXT: kxorq %k5, %k1, %k7 +; AVX512F-32-NEXT: kshiftrq $2, %k7, %k1 +; AVX512F-32-NEXT: kxorq %k2, %k1, %k2 +; AVX512F-32-NEXT: kmovd %ecx, %k5 +; AVX512F-32-NEXT: movb %bh, %cl +; AVX512F-32-NEXT: kmovd %ecx, %k1 +; AVX512F-32-NEXT: movl %ebx, %esi +; AVX512F-32-NEXT: andb $2, %cl +; AVX512F-32-NEXT: shrb %cl +; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2 +; AVX512F-32-NEXT: kshiftrq $61, %k2, %k2 +; AVX512F-32-NEXT: kxorq %k7, %k2, %k7 +; AVX512F-32-NEXT: kshiftrq $3, %k7, %k2 +; AVX512F-32-NEXT: kxorq %k0, %k2, %k0 +; AVX512F-32-NEXT: kmovd %ecx, %k2 +; AVX512F-32-NEXT: movl %edx, %ecx +; AVX512F-32-NEXT: shrb $2, %dl +; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0 +; AVX512F-32-NEXT: kshiftrq $60, %k0, %k0 +; AVX512F-32-NEXT: kxorq %k7, %k0, %k0 +; AVX512F-32-NEXT: kshiftrq $4, %k0, %k7 +; AVX512F-32-NEXT: kxorq %k3, %k7, %k7 +; AVX512F-32-NEXT: kmovd %edx, %k3 ; AVX512F-32-NEXT: movl %eax, %edx -; AVX512F-32-NEXT: andb $2, %dl -; AVX512F-32-NEXT: shrb %dl -; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpsllw $8, %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: andb $15, %dl +; AVX512F-32-NEXT: shrb $3, %cl +; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $59, %k7, %k7 +; AVX512F-32-NEXT: kxorq %k0, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $5, %k7, %k0 +; AVX512F-32-NEXT: kxorq %k4, %k0, %k4 +; AVX512F-32-NEXT: kmovd %ecx, %k0 +; AVX512F-32-NEXT: movl %esi, %ecx +; AVX512F-32-NEXT: shrl $13, %ecx +; AVX512F-32-NEXT: andb $1, %cl +; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4 +; AVX512F-32-NEXT: kshiftrq $58, %k4, %k4 +; AVX512F-32-NEXT: kxorq %k7, %k4, %k7 +; AVX512F-32-NEXT: kshiftrq $6, %k7, %k4 +; AVX512F-32-NEXT: kxorq %k6, %k4, %k6 +; AVX512F-32-NEXT: kmovd %ecx, %k4 ; AVX512F-32-NEXT: movl %eax, %ebx -; AVX512F-32-NEXT: andb $15, %bl -; AVX512F-32-NEXT: movl %ebx, %edx -; AVX512F-32-NEXT: shrb $2, %bl -; AVX512F-32-NEXT: kmovd %ebx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: shrb $3, %dl -; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslld $24, %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: movl %eax, %edx -; AVX512F-32-NEXT: shrb $4, %dl -; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: movl %eax, %edx -; AVX512F-32-NEXT: shrb $5, %dl -; AVX512F-32-NEXT: andb $1, %dl -; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpsllq $40, %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: andb $2, %bl +; AVX512F-32-NEXT: shrb %bl +; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6 +; AVX512F-32-NEXT: kshiftrq $57, %k6, %k6 +; AVX512F-32-NEXT: kxorq %k7, %k6, %k6 +; AVX512F-32-NEXT: kshiftrq $7, %k6, %k7 +; AVX512F-32-NEXT: kxorq %k5, %k7, %k7 +; AVX512F-32-NEXT: kmovd %ebx, %k5 +; AVX512F-32-NEXT: movl %edx, %ecx +; AVX512F-32-NEXT: shrb $2, %dl +; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $56, %k7, %k7 +; AVX512F-32-NEXT: kxorq %k6, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $8, %k7, %k6 +; AVX512F-32-NEXT: kxorq %k1, %k6, %k1 +; AVX512F-32-NEXT: kmovd %edx, %k6 +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; AVX512F-32-NEXT: shrb $3, %cl +; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 +; AVX512F-32-NEXT: kshiftrq $55, %k1, %k1 +; AVX512F-32-NEXT: kxorq %k7, %k1, %k7 +; AVX512F-32-NEXT: kshiftrq $9, %k7, %k1 +; AVX512F-32-NEXT: kxorq %k2, %k1, %k2 +; AVX512F-32-NEXT: kmovd %ecx, %k1 +; AVX512F-32-NEXT: movl %eax, %ecx +; AVX512F-32-NEXT: shrb $4, %cl +; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2 +; AVX512F-32-NEXT: kshiftrq $54, %k2, %k2 +; AVX512F-32-NEXT: kxorq %k7, %k2, %k7 +; AVX512F-32-NEXT: kshiftrq $10, %k7, %k2 +; AVX512F-32-NEXT: kxorq %k3, %k2, %k3 +; AVX512F-32-NEXT: kmovd %ecx, %k2 +; AVX512F-32-NEXT: movl %eax, %ecx +; AVX512F-32-NEXT: shrb $5, %cl +; AVX512F-32-NEXT: andb $1, %cl +; AVX512F-32-NEXT: movl %esi, %edx +; AVX512F-32-NEXT: shrl $12, %edx +; AVX512F-32-NEXT: andl $15, %edx +; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3 +; AVX512F-32-NEXT: kshiftrq $53, %k3, %k3 +; AVX512F-32-NEXT: kxorq %k7, %k3, %k3 +; AVX512F-32-NEXT: kshiftrq $11, %k3, %k7 +; AVX512F-32-NEXT: kxorq %k0, %k7, %k0 +; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0 +; AVX512F-32-NEXT: kshiftrq $52, %k0, %k0 +; AVX512F-32-NEXT: kxorq %k3, %k0, %k3 +; AVX512F-32-NEXT: kshiftrq $12, %k3, %k0 +; AVX512F-32-NEXT: kmovd %edx, %k7 +; AVX512F-32-NEXT: kxorq %k7, %k0, %k7 +; AVX512F-32-NEXT: kmovd %ecx, %k0 ; AVX512F-32-NEXT: movl %eax, %edx ; AVX512F-32-NEXT: shrb $6, %dl -; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: movl %esi, %ecx +; AVX512F-32-NEXT: shrl $14, %ecx +; AVX512F-32-NEXT: andl $3, %ecx +; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $51, %k7, %k7 +; AVX512F-32-NEXT: kxorq %k3, %k7, %k3 +; AVX512F-32-NEXT: kshiftrq $13, %k3, %k7 +; AVX512F-32-NEXT: kxorq %k4, %k7, %k4 +; AVX512F-32-NEXT: kmovd %ecx, %k7 +; AVX512F-32-NEXT: movl %esi, %ecx +; AVX512F-32-NEXT: shrl $15, %ecx +; AVX512F-32-NEXT: andl $1, %ecx +; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4 +; AVX512F-32-NEXT: kshiftrq $50, %k4, %k4 +; AVX512F-32-NEXT: kxorq %k3, %k4, %k3 +; AVX512F-32-NEXT: kshiftrq $14, %k3, %k4 +; AVX512F-32-NEXT: kxorq %k7, %k4, %k4 +; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4 +; AVX512F-32-NEXT: kshiftrq $49, %k4, %k4 +; AVX512F-32-NEXT: kxorq %k3, %k4, %k3 +; AVX512F-32-NEXT: kshiftrq $15, %k3, %k4 +; AVX512F-32-NEXT: kmovd %ecx, %k7 +; AVX512F-32-NEXT: kxorq %k7, %k4, %k4 +; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4 +; AVX512F-32-NEXT: kshiftrq $48, %k4, %k4 +; AVX512F-32-NEXT: kxorq %k3, %k4, %k3 +; AVX512F-32-NEXT: kshiftrq $16, %k3, %k4 +; AVX512F-32-NEXT: kmovd %eax, %k7 +; AVX512F-32-NEXT: kxorq %k7, %k4, %k4 +; AVX512F-32-NEXT: kmovd %edx, %k7 +; AVX512F-32-NEXT: movl %esi, %edx +; AVX512F-32-NEXT: shrl $24, %edx ; AVX512F-32-NEXT: # kill: def %al killed %al killed %eax def %eax ; AVX512F-32-NEXT: shrb $7, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpsllq $56, %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrl $24, %eax -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastq %xmm2, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: movl %eax, %edx -; AVX512F-32-NEXT: andb $2, %dl -; AVX512F-32-NEXT: shrb %dl -; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6] -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: movl %eax, %edx -; AVX512F-32-NEXT: andb $15, %dl +; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4 +; AVX512F-32-NEXT: kshiftrq $47, %k4, %k4 +; AVX512F-32-NEXT: kxorq %k3, %k4, %k3 +; AVX512F-32-NEXT: kshiftrq $17, %k3, %k4 +; AVX512F-32-NEXT: kxorq %k5, %k4, %k4 +; AVX512F-32-NEXT: kmovd %eax, %k5 ; AVX512F-32-NEXT: movl %edx, %eax -; AVX512F-32-NEXT: shrb $2, %dl -; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: shrb $3, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4] -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm3, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm4[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrl $28, %eax -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm4[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: movl %ecx, %esi -; AVX512F-32-NEXT: shrl $29, %eax -; AVX512F-32-NEXT: andb $1, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2] -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm2[0,1,2,3],zmm0[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl %esi, %eax -; AVX512F-32-NEXT: shrl $30, %eax -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm1 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255] -; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl %esi, %eax -; AVX512F-32-NEXT: shrl $31, %eax -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm1, %ymm0, %ymm0 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; AVX512F-32-NEXT: kmovd %ecx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm7 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm1, %ymm7, %ymm1 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: andb $2, %al -; AVX512F-32-NEXT: shrb %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpsllw $8, %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl %ecx, %edx +; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4 +; AVX512F-32-NEXT: kshiftrq $46, %k4, %k4 +; AVX512F-32-NEXT: kxorq %k3, %k4, %k4 +; AVX512F-32-NEXT: kshiftrq $18, %k4, %k3 +; AVX512F-32-NEXT: kxorq %k6, %k3, %k6 +; AVX512F-32-NEXT: kmovd %edx, %k3 +; AVX512F-32-NEXT: # kill: def %dl killed %dl killed %edx def %edx ; AVX512F-32-NEXT: andb $15, %dl -; AVX512F-32-NEXT: movl %edx, %eax -; AVX512F-32-NEXT: shrb $2, %dl -; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: shrb $3, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpslld $24, %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrb $4, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastd %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrb $5, %al -; AVX512F-32-NEXT: andb $1, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpsllq $40, %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrb $6, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrb $7, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpsllq $56, %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movb %ch, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastq %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: andb $2, %al ; AVX512F-32-NEXT: shrb %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6] -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movb %ch, %dl -; AVX512F-32-NEXT: andb $15, %dl -; AVX512F-32-NEXT: movl %edx, %eax +; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6 +; AVX512F-32-NEXT: kshiftrq $45, %k6, %k6 +; AVX512F-32-NEXT: kxorq %k4, %k6, %k6 +; AVX512F-32-NEXT: kshiftrq $19, %k6, %k4 +; AVX512F-32-NEXT: kxorq %k1, %k4, %k1 +; AVX512F-32-NEXT: kmovd %eax, %k4 +; AVX512F-32-NEXT: movl %edx, %ecx ; AVX512F-32-NEXT: shrb $2, %dl -; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: shrb $3, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4] -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrl $12, %eax -; AVX512F-32-NEXT: andl $15, %eax -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastd %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrl $13, %eax -; AVX512F-32-NEXT: andb $1, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2] -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrl $14, %eax -; AVX512F-32-NEXT: andl $3, %eax -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrl $15, %eax -; AVX512F-32-NEXT: andl $1, %eax -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl %ecx, %ebx -; AVX512F-32-NEXT: shrl $16, %ebx -; AVX512F-32-NEXT: kmovd %ebx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 +; AVX512F-32-NEXT: kshiftrq $44, %k1, %k1 +; AVX512F-32-NEXT: kxorq %k6, %k1, %k1 +; AVX512F-32-NEXT: kshiftrq $20, %k1, %k6 +; AVX512F-32-NEXT: kxorq %k2, %k6, %k6 +; AVX512F-32-NEXT: kmovd %edx, %k2 +; AVX512F-32-NEXT: movl %ebx, %eax +; AVX512F-32-NEXT: andb $15, %al +; AVX512F-32-NEXT: shrb $3, %cl +; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6 +; AVX512F-32-NEXT: kshiftrq $43, %k6, %k6 +; AVX512F-32-NEXT: kxorq %k1, %k6, %k1 +; AVX512F-32-NEXT: kshiftrq $21, %k1, %k6 +; AVX512F-32-NEXT: kxorq %k0, %k6, %k6 +; AVX512F-32-NEXT: kmovd %ecx, %k0 +; AVX512F-32-NEXT: movl %esi, %ecx +; AVX512F-32-NEXT: shrl $29, %ecx +; AVX512F-32-NEXT: andb $1, %cl +; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6 +; AVX512F-32-NEXT: kshiftrq $42, %k6, %k6 +; AVX512F-32-NEXT: kxorq %k1, %k6, %k6 +; AVX512F-32-NEXT: kshiftrq $22, %k6, %k1 +; AVX512F-32-NEXT: kxorq %k7, %k1, %k7 +; AVX512F-32-NEXT: kmovd %ecx, %k1 ; AVX512F-32-NEXT: movl %ebx, %edx ; AVX512F-32-NEXT: andb $2, %dl ; AVX512F-32-NEXT: shrb %dl -; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpsllw $8, %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: movl %ebx, %eax +; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $41, %k7, %k7 +; AVX512F-32-NEXT: kxorq %k6, %k7, %k6 +; AVX512F-32-NEXT: kshiftrq $23, %k6, %k7 +; AVX512F-32-NEXT: kxorq %k5, %k7, %k7 +; AVX512F-32-NEXT: kmovd %edx, %k5 +; AVX512F-32-NEXT: movl %eax, %ecx +; AVX512F-32-NEXT: shrb $2, %al +; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $40, %k7, %k7 +; AVX512F-32-NEXT: kxorq %k6, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $24, %k7, %k6 +; AVX512F-32-NEXT: kxorq %k3, %k6, %k3 +; AVX512F-32-NEXT: kmovd %eax, %k6 +; AVX512F-32-NEXT: movb %bh, %al ; AVX512F-32-NEXT: andb $15, %al -; AVX512F-32-NEXT: movl %eax, %edx +; AVX512F-32-NEXT: shrb $3, %cl +; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3 +; AVX512F-32-NEXT: kshiftrq $39, %k3, %k3 +; AVX512F-32-NEXT: kxorq %k7, %k3, %k7 +; AVX512F-32-NEXT: kshiftrq $25, %k7, %k3 +; AVX512F-32-NEXT: kxorq %k4, %k3, %k4 +; AVX512F-32-NEXT: kmovd %ecx, %k3 +; AVX512F-32-NEXT: movl %ebx, %ecx +; AVX512F-32-NEXT: shrb $4, %cl +; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4 +; AVX512F-32-NEXT: kshiftrq $38, %k4, %k4 +; AVX512F-32-NEXT: kxorq %k7, %k4, %k7 +; AVX512F-32-NEXT: kshiftrq $26, %k7, %k4 +; AVX512F-32-NEXT: kxorq %k2, %k4, %k2 +; AVX512F-32-NEXT: kmovd %ecx, %k4 +; AVX512F-32-NEXT: movl %ebx, %ecx +; AVX512F-32-NEXT: shrb $5, %cl +; AVX512F-32-NEXT: andb $1, %cl +; AVX512F-32-NEXT: movl %esi, %edx +; AVX512F-32-NEXT: shrl $28, %edx +; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2 +; AVX512F-32-NEXT: kshiftrq $37, %k2, %k2 +; AVX512F-32-NEXT: kxorq %k7, %k2, %k2 +; AVX512F-32-NEXT: kshiftrq $27, %k2, %k7 +; AVX512F-32-NEXT: kxorq %k0, %k7, %k0 +; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0 +; AVX512F-32-NEXT: kshiftrq $36, %k0, %k0 +; AVX512F-32-NEXT: kxorq %k2, %k0, %k2 +; AVX512F-32-NEXT: kshiftrq $28, %k2, %k0 +; AVX512F-32-NEXT: kmovd %edx, %k7 +; AVX512F-32-NEXT: kxorq %k7, %k0, %k7 +; AVX512F-32-NEXT: kmovd %ecx, %k0 +; AVX512F-32-NEXT: movl %ebx, %edx +; AVX512F-32-NEXT: shrb $6, %dl +; AVX512F-32-NEXT: movl %esi, %ecx +; AVX512F-32-NEXT: shrl $30, %ecx +; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $35, %k7, %k7 +; AVX512F-32-NEXT: kxorq %k2, %k7, %k2 +; AVX512F-32-NEXT: kshiftrq $29, %k2, %k7 +; AVX512F-32-NEXT: kxorq %k1, %k7, %k1 +; AVX512F-32-NEXT: kmovd %ecx, %k7 +; AVX512F-32-NEXT: movl %esi, %ecx +; AVX512F-32-NEXT: shrl $31, %ecx +; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 +; AVX512F-32-NEXT: kshiftrq $34, %k1, %k1 +; AVX512F-32-NEXT: kxorq %k2, %k1, %k1 +; AVX512F-32-NEXT: kshiftrq $30, %k1, %k2 +; AVX512F-32-NEXT: kxorq %k7, %k2, %k2 +; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2 +; AVX512F-32-NEXT: kshiftrq $33, %k2, %k2 +; AVX512F-32-NEXT: kxorq %k1, %k2, %k1 +; AVX512F-32-NEXT: kshiftrq $31, %k1, %k2 +; AVX512F-32-NEXT: kmovd %ecx, %k7 +; AVX512F-32-NEXT: kxorq %k7, %k2, %k2 +; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2 +; AVX512F-32-NEXT: kshiftrq $32, %k2, %k2 +; AVX512F-32-NEXT: kxorq %k1, %k2, %k1 +; AVX512F-32-NEXT: kshiftrq $32, %k1, %k2 +; AVX512F-32-NEXT: kmovd %ebx, %k7 +; AVX512F-32-NEXT: kxorq %k7, %k2, %k2 +; AVX512F-32-NEXT: kmovd %edx, %k7 +; AVX512F-32-NEXT: movl %ebx, %ecx +; AVX512F-32-NEXT: shrb $7, %cl +; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2 +; AVX512F-32-NEXT: kshiftrq $31, %k2, %k2 +; AVX512F-32-NEXT: kxorq %k1, %k2, %k1 +; AVX512F-32-NEXT: kshiftrq $33, %k1, %k2 +; AVX512F-32-NEXT: kxorq %k5, %k2, %k2 +; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2 +; AVX512F-32-NEXT: kshiftrq $30, %k2, %k2 +; AVX512F-32-NEXT: kxorq %k1, %k2, %k2 +; AVX512F-32-NEXT: kshiftrq $34, %k2, %k1 +; AVX512F-32-NEXT: kxorq %k6, %k1, %k5 +; AVX512F-32-NEXT: kmovd %ecx, %k6 +; AVX512F-32-NEXT: movb %bh, %cl +; AVX512F-32-NEXT: kmovd %ecx, %k1 +; AVX512F-32-NEXT: andb $2, %cl +; AVX512F-32-NEXT: shrb %cl +; AVX512F-32-NEXT: kshiftlq $63, %k5, %k5 +; AVX512F-32-NEXT: kshiftrq $29, %k5, %k5 +; AVX512F-32-NEXT: kxorq %k2, %k5, %k5 +; AVX512F-32-NEXT: kshiftrq $35, %k5, %k2 +; AVX512F-32-NEXT: kxorq %k3, %k2, %k3 +; AVX512F-32-NEXT: kmovd %ecx, %k2 +; AVX512F-32-NEXT: movl %eax, %ecx ; AVX512F-32-NEXT: shrb $2, %al -; AVX512F-32-NEXT: kmovd %eax, %k0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k1 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 -; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: shrb $3, %dl -; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpslld $24, %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl %ebx, %eax -; AVX512F-32-NEXT: shrb $4, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastd %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl %ebx, %eax -; AVX512F-32-NEXT: shrb $5, %al -; AVX512F-32-NEXT: andb $1, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpsllq $40, %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3 +; AVX512F-32-NEXT: kshiftrq $28, %k3, %k3 +; AVX512F-32-NEXT: kxorq %k5, %k3, %k5 +; AVX512F-32-NEXT: kshiftrq $36, %k5, %k3 +; AVX512F-32-NEXT: kxorq %k4, %k3, %k4 +; AVX512F-32-NEXT: kmovd %eax, %k3 ; AVX512F-32-NEXT: movl %ebx, %eax -; AVX512F-32-NEXT: shrb $6, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: # kill: def %bl killed %bl killed %ebx def %ebx -; AVX512F-32-NEXT: shrb $7, %bl -; AVX512F-32-NEXT: kmovd %ebx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpsllq $56, %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrl $24, %eax -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastq %xmm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: shrl $16, %eax +; AVX512F-32-NEXT: shrb $3, %cl +; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4 +; AVX512F-32-NEXT: kshiftrq $27, %k4, %k4 +; AVX512F-32-NEXT: kxorq %k5, %k4, %k5 +; AVX512F-32-NEXT: kshiftrq $37, %k5, %k4 +; AVX512F-32-NEXT: kxorq %k0, %k4, %k0 +; AVX512F-32-NEXT: kmovd %ecx, %k4 +; AVX512F-32-NEXT: movl %ebx, %ecx +; AVX512F-32-NEXT: shrl $13, %ecx +; AVX512F-32-NEXT: andb $1, %cl +; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0 +; AVX512F-32-NEXT: kshiftrq $26, %k0, %k0 +; AVX512F-32-NEXT: kxorq %k5, %k0, %k0 +; AVX512F-32-NEXT: kshiftrq $38, %k0, %k5 +; AVX512F-32-NEXT: kxorq %k7, %k5, %k7 +; AVX512F-32-NEXT: kmovd %ecx, %k5 ; AVX512F-32-NEXT: movl %eax, %edx ; AVX512F-32-NEXT: andb $2, %dl ; AVX512F-32-NEXT: shrb %dl +; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $25, %k7, %k7 +; AVX512F-32-NEXT: kxorq %k0, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $39, %k7, %k0 +; AVX512F-32-NEXT: kxorq %k6, %k0, %k6 +; AVX512F-32-NEXT: kmovd %edx, %k0 +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) # 8-byte Spill +; AVX512F-32-NEXT: movl %eax, %ecx +; AVX512F-32-NEXT: andb $15, %cl +; AVX512F-32-NEXT: movl %ecx, %edx +; AVX512F-32-NEXT: shrb $2, %cl +; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6 +; AVX512F-32-NEXT: kshiftrq $24, %k6, %k6 +; AVX512F-32-NEXT: kxorq %k7, %k6, %k6 +; AVX512F-32-NEXT: kshiftrq $40, %k6, %k7 +; AVX512F-32-NEXT: kxorq %k1, %k7, %k7 +; AVX512F-32-NEXT: kmovd %ecx, %k1 +; AVX512F-32-NEXT: movl %ebx, %ecx +; AVX512F-32-NEXT: shrl $12, %ecx +; AVX512F-32-NEXT: andl $15, %ecx +; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $23, %k7, %k7 +; AVX512F-32-NEXT: kxorq %k6, %k7, %k6 +; AVX512F-32-NEXT: kshiftrq $41, %k6, %k7 +; AVX512F-32-NEXT: kxorq %k2, %k7, %k2 +; AVX512F-32-NEXT: kmovd %ecx, %k0 +; AVX512F-32-NEXT: movl %ebx, %ecx +; AVX512F-32-NEXT: shrl $14, %ecx +; AVX512F-32-NEXT: andl $3, %ecx +; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2 +; AVX512F-32-NEXT: kshiftrq $22, %k2, %k2 +; AVX512F-32-NEXT: kxorq %k6, %k2, %k2 +; AVX512F-32-NEXT: kshiftrq $42, %k2, %k6 +; AVX512F-32-NEXT: kxorq %k3, %k6, %k3 +; AVX512F-32-NEXT: kmovd %ecx, %k7 +; AVX512F-32-NEXT: movl %ebx, %ecx +; AVX512F-32-NEXT: shrl $15, %ecx +; AVX512F-32-NEXT: andl $1, %ecx +; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3 +; AVX512F-32-NEXT: kshiftrq $21, %k3, %k3 +; AVX512F-32-NEXT: kxorq %k2, %k3, %k2 +; AVX512F-32-NEXT: kshiftrq $43, %k2, %k3 +; AVX512F-32-NEXT: kxorq %k4, %k3, %k3 +; AVX512F-32-NEXT: kmovd %ecx, %k6 +; AVX512F-32-NEXT: shrb $3, %dl +; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3 +; AVX512F-32-NEXT: kshiftrq $20, %k3, %k3 +; AVX512F-32-NEXT: kxorq %k2, %k3, %k3 +; AVX512F-32-NEXT: kshiftrq $44, %k3, %k2 +; AVX512F-32-NEXT: kxorq %k0, %k2, %k0 +; AVX512F-32-NEXT: kmovd %edx, %k2 +; AVX512F-32-NEXT: movl %eax, %ecx +; AVX512F-32-NEXT: shrb $4, %cl +; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0 +; AVX512F-32-NEXT: kshiftrq $19, %k0, %k0 +; AVX512F-32-NEXT: kxorq %k3, %k0, %k0 +; AVX512F-32-NEXT: kshiftrq $45, %k0, %k3 +; AVX512F-32-NEXT: kxorq %k5, %k3, %k4 +; AVX512F-32-NEXT: kmovd %ecx, %k3 +; AVX512F-32-NEXT: movl %eax, %ecx +; AVX512F-32-NEXT: shrb $5, %cl +; AVX512F-32-NEXT: andb $1, %cl +; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4 +; AVX512F-32-NEXT: kshiftrq $18, %k4, %k4 +; AVX512F-32-NEXT: kxorq %k0, %k4, %k0 +; AVX512F-32-NEXT: kshiftrq $46, %k0, %k4 +; AVX512F-32-NEXT: kxorq %k7, %k4, %k5 +; AVX512F-32-NEXT: kmovd %ecx, %k4 +; AVX512F-32-NEXT: movl %eax, %ecx +; AVX512F-32-NEXT: shrb $6, %cl +; AVX512F-32-NEXT: kshiftlq $63, %k5, %k5 +; AVX512F-32-NEXT: kshiftrq $17, %k5, %k5 +; AVX512F-32-NEXT: kxorq %k0, %k5, %k0 +; AVX512F-32-NEXT: kshiftrq $47, %k0, %k5 +; AVX512F-32-NEXT: kxorq %k6, %k5, %k5 +; AVX512F-32-NEXT: kshiftlq $63, %k5, %k5 +; AVX512F-32-NEXT: kshiftrq $16, %k5, %k5 +; AVX512F-32-NEXT: kxorq %k0, %k5, %k0 +; AVX512F-32-NEXT: kshiftrq $48, %k0, %k5 +; AVX512F-32-NEXT: kmovd %eax, %k6 +; AVX512F-32-NEXT: kxorq %k6, %k5, %k6 +; AVX512F-32-NEXT: kmovd %ecx, %k5 +; AVX512F-32-NEXT: movl %ebx, %edx +; AVX512F-32-NEXT: shrl $24, %edx +; AVX512F-32-NEXT: # kill: def %al killed %al killed %eax def %eax +; AVX512F-32-NEXT: shrb $7, %al +; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6 +; AVX512F-32-NEXT: kshiftrq $15, %k6, %k6 +; AVX512F-32-NEXT: kxorq %k0, %k6, %k6 +; AVX512F-32-NEXT: kshiftrq $49, %k6, %k0 +; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k7 # 8-byte Reload +; AVX512F-32-NEXT: kxorq %k7, %k0, %k7 +; AVX512F-32-NEXT: kmovd %eax, %k0 +; AVX512F-32-NEXT: movl %edx, %eax +; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $14, %k7, %k7 +; AVX512F-32-NEXT: kxorq %k6, %k7, %k6 +; AVX512F-32-NEXT: kshiftrq $50, %k6, %k7 +; AVX512F-32-NEXT: kxorq %k1, %k7, %k7 ; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6] -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: movl %eax, %edx +; AVX512F-32-NEXT: # kill: def %dl killed %dl killed %edx def %edx ; AVX512F-32-NEXT: andb $15, %dl +; AVX512F-32-NEXT: andb $2, %al +; AVX512F-32-NEXT: shrb %al +; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $13, %k7, %k7 +; AVX512F-32-NEXT: kxorq %k6, %k7, %k6 +; AVX512F-32-NEXT: kshiftrq $51, %k6, %k7 +; AVX512F-32-NEXT: kxorq %k2, %k7, %k7 +; AVX512F-32-NEXT: kmovd %eax, %k2 ; AVX512F-32-NEXT: movl %edx, %eax ; AVX512F-32-NEXT: shrb $2, %dl -; AVX512F-32-NEXT: kmovd %edx, %k0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k1 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 -; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $12, %k7, %k7 +; AVX512F-32-NEXT: kxorq %k6, %k7, %k6 +; AVX512F-32-NEXT: kshiftrq $52, %k6, %k7 +; AVX512F-32-NEXT: kxorq %k3, %k7, %k7 +; AVX512F-32-NEXT: kmovd %edx, %k3 ; AVX512F-32-NEXT: shrb $3, %al -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 -; AVX512F-32-NEXT: kmovd %eax, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4] -; AVX512F-32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm4 -; AVX512F-32-NEXT: vpblendvb %ymm3, %ymm4, %ymm1, %ymm1 -; AVX512F-32-NEXT: movl %ecx, %eax +; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $11, %k7, %k7 +; AVX512F-32-NEXT: kxorq %k6, %k7, %k6 +; AVX512F-32-NEXT: kshiftrq $53, %k6, %k7 +; AVX512F-32-NEXT: kxorq %k4, %k7, %k7 +; AVX512F-32-NEXT: kmovd %eax, %k4 +; AVX512F-32-NEXT: movl %ebx, %eax ; AVX512F-32-NEXT: shrl $29, %eax ; AVX512F-32-NEXT: andb $1, %al -; AVX512F-32-NEXT: kmovd %eax, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax +; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $10, %k7, %k7 +; AVX512F-32-NEXT: kxorq %k6, %k7, %k6 +; AVX512F-32-NEXT: kshiftrq $54, %k6, %k7 +; AVX512F-32-NEXT: kxorq %k5, %k7, %k7 +; AVX512F-32-NEXT: kmovd %eax, %k5 +; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $9, %k7, %k7 +; AVX512F-32-NEXT: kxorq %k6, %k7, %k6 +; AVX512F-32-NEXT: kshiftrq $55, %k6, %k7 +; AVX512F-32-NEXT: kxorq %k0, %k7, %k0 +; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0 +; AVX512F-32-NEXT: kshiftrq $8, %k0, %k0 +; AVX512F-32-NEXT: kxorq %k6, %k0, %k0 +; AVX512F-32-NEXT: kshiftrq $56, %k0, %k6 +; AVX512F-32-NEXT: kxorq %k1, %k6, %k1 +; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 +; AVX512F-32-NEXT: kshiftrq $7, %k1, %k1 +; AVX512F-32-NEXT: kxorq %k0, %k1, %k0 +; AVX512F-32-NEXT: kshiftrq $57, %k0, %k1 +; AVX512F-32-NEXT: kxorq %k2, %k1, %k1 +; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 +; AVX512F-32-NEXT: kshiftrq $6, %k1, %k1 +; AVX512F-32-NEXT: kxorq %k0, %k1, %k0 +; AVX512F-32-NEXT: kshiftrq $58, %k0, %k1 +; AVX512F-32-NEXT: kxorq %k3, %k1, %k1 +; AVX512F-32-NEXT: movl %ebx, %eax ; AVX512F-32-NEXT: shrl $28, %eax -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm3 -; AVX512F-32-NEXT: vpbroadcastd %xmm3, %xmm3 -; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm1, %ymm3, %ymm1 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2] -; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm1, %ymm3, %ymm1 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrl $30, %eax -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-32-NEXT: kmovd %eax, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vpbroadcastw %xmm3, %xmm3 -; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 -; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm1, %ymm3, %ymm1 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax +; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 +; AVX512F-32-NEXT: kshiftrq $5, %k1, %k1 +; AVX512F-32-NEXT: kxorq %k0, %k1, %k0 +; AVX512F-32-NEXT: kshiftrq $59, %k0, %k1 +; AVX512F-32-NEXT: kxorq %k4, %k1, %k1 +; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 +; AVX512F-32-NEXT: kshiftrq $4, %k1, %k1 +; AVX512F-32-NEXT: kxorq %k0, %k1, %k0 +; AVX512F-32-NEXT: kshiftrq $60, %k0, %k1 +; AVX512F-32-NEXT: kmovd %eax, %k2 +; AVX512F-32-NEXT: kxorq %k2, %k1, %k1 +; AVX512F-32-NEXT: movl %ebx, %eax ; AVX512F-32-NEXT: shrl $31, %eax +; AVX512F-32-NEXT: movl %ebx, %ecx +; AVX512F-32-NEXT: shrl $30, %ecx +; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 +; AVX512F-32-NEXT: kshiftrq $3, %k1, %k1 +; AVX512F-32-NEXT: kxorq %k0, %k1, %k0 +; AVX512F-32-NEXT: kshiftrq $61, %k0, %k1 +; AVX512F-32-NEXT: kxorq %k5, %k1, %k1 +; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 +; AVX512F-32-NEXT: kshiftrq $2, %k1, %k1 +; AVX512F-32-NEXT: kxorq %k0, %k1, %k0 +; AVX512F-32-NEXT: kshiftrq $62, %k0, %k1 +; AVX512F-32-NEXT: kmovd %ecx, %k2 +; AVX512F-32-NEXT: kxorq %k2, %k1, %k1 +; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 +; AVX512F-32-NEXT: kshiftrq $1, %k1, %k1 +; AVX512F-32-NEXT: kxorq %k0, %k1, %k0 ; AVX512F-32-NEXT: kshiftlq $1, %k0, %k0 ; AVX512F-32-NEXT: kshiftrq $1, %k0, %k0 ; AVX512F-32-NEXT: kmovd %eax, %k1 ; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 ; AVX512F-32-NEXT: korq %k1, %k0, %k1 -; AVX512F-32-NEXT: vpcmpeqb %zmm6, %zmm5, %k0 {%k1} -; AVX512F-32-NEXT: vpcmpgtb %zmm5, %zmm6, %k2 {%k1} -; AVX512F-32-NEXT: vpcmpleb %zmm6, %zmm5, %k3 {%k1} -; AVX512F-32-NEXT: vpcmpneqb %zmm6, %zmm5, %k4 {%k1} -; AVX512F-32-NEXT: vpcmpleb %zmm5, %zmm6, %k5 {%k1} -; AVX512F-32-NEXT: vpcmpgtb %zmm6, %zmm5, %k1 {%k1} +; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} ; AVX512F-32-NEXT: kmovq %k0, (%esp) ; AVX512F-32-NEXT: movl (%esp), %eax ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx -; AVX512F-32-NEXT: kmovq %k2, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 {%k1} +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx -; AVX512F-32-NEXT: kmovq %k3, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: kxorq %k0, %k0, %k0 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: orl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: orl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: kmovq %k4, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx -; AVX512F-32-NEXT: kmovq %k5, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: vpcmpleb %zmm0, %zmm1, %k0 {%k1} +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx -; AVX512F-32-NEXT: kmovq %k1, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: addl %esi, %eax -; AVX512F-32-NEXT: adcl %ecx, %edx -; AVX512F-32-NEXT: addl $60, %esp +; AVX512F-32-NEXT: adcl %ebx, %edx +; AVX512F-32-NEXT: addl $68, %esp ; AVX512F-32-NEXT: popl %esi ; AVX512F-32-NEXT: popl %ebx ; AVX512F-32-NEXT: vzeroupper @@ -2679,753 +2500,574 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m ; AVX512F-32-NEXT: .cfi_def_cfa_offset 8 ; AVX512F-32-NEXT: pushl %esi ; AVX512F-32-NEXT: .cfi_def_cfa_offset 12 -; AVX512F-32-NEXT: subl $60, %esp -; AVX512F-32-NEXT: .cfi_def_cfa_offset 72 +; AVX512F-32-NEXT: subl $68, %esp +; AVX512F-32-NEXT: .cfi_def_cfa_offset 80 ; AVX512F-32-NEXT: .cfi_offset %esi, -12 ; AVX512F-32-NEXT: .cfi_offset %ebx, -8 -; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm6 -; AVX512F-32-NEXT: vmovdqa64 %zmm0, %zmm5 -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrb $5, %al -; AVX512F-32-NEXT: andb $1, %al -; AVX512F-32-NEXT: movl %ecx, %ebx -; AVX512F-32-NEXT: andb $15, %bl -; AVX512F-32-NEXT: movl %ecx, %edx -; AVX512F-32-NEXT: andb $2, %dl -; AVX512F-32-NEXT: shrb %dl -; AVX512F-32-NEXT: kmovd %edx, %k0 +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; AVX512F-32-NEXT: movl %ebx, %eax +; AVX512F-32-NEXT: shrl $16, %eax ; AVX512F-32-NEXT: movl %ebx, %edx -; AVX512F-32-NEXT: shrb $2, %bl -; AVX512F-32-NEXT: kmovd %ebx, %k1 -; AVX512F-32-NEXT: movl %ecx, %ebx -; AVX512F-32-NEXT: shrb $4, %bl -; AVX512F-32-NEXT: shrb $3, %dl -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 -; AVX512F-32-NEXT: vpsllw $8, %xmm2, %xmm2 -; AVX512F-32-NEXT: kmovd %ecx, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm3 -; AVX512F-32-NEXT: vpbroadcastw %xmm3, %xmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm3 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 -; AVX512F-32-NEXT: kmovd %edx, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vpslld $24, %xmm3, %xmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm3 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 -; AVX512F-32-NEXT: kmovd %ebx, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vpbroadcastd %xmm3, %xmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm3 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 -; AVX512F-32-NEXT: kmovd %eax, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vpsllq $40, %xmm3, %xmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm3 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[4,5,6,7] -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrb $6, %al -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrb $7, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpsllq $56, %xmm2, %xmm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: movb %ch, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastq %xmm2, %xmm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: andb $2, %al -; AVX512F-32-NEXT: shrb %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6] -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: movb %ch, %dl ; AVX512F-32-NEXT: andb $15, %dl -; AVX512F-32-NEXT: movl %edx, %eax +; AVX512F-32-NEXT: movl %ebx, %ecx +; AVX512F-32-NEXT: andb $2, %cl +; AVX512F-32-NEXT: shrb %cl +; AVX512F-32-NEXT: kmovd %ecx, %k1 +; AVX512F-32-NEXT: movl %edx, %ecx ; AVX512F-32-NEXT: shrb $2, %dl -; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: shrb $3, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4] -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrl $12, %eax -; AVX512F-32-NEXT: andl $15, %eax -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrl $13, %eax -; AVX512F-32-NEXT: andb $1, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2] -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrl $14, %eax -; AVX512F-32-NEXT: andl $3, %eax -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrl $15, %eax -; AVX512F-32-NEXT: andl $1, %eax -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0] -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrl $16, %eax -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: kmovd %edx, %k2 +; AVX512F-32-NEXT: movb %bh, %dl +; AVX512F-32-NEXT: andb $15, %dl +; AVX512F-32-NEXT: shrb $3, %cl +; AVX512F-32-NEXT: kmovd %ecx, %k0 +; AVX512F-32-NEXT: movl %ebx, %ecx +; AVX512F-32-NEXT: shrb $4, %cl +; AVX512F-32-NEXT: kmovd %ecx, %k3 +; AVX512F-32-NEXT: movl %ebx, %ecx +; AVX512F-32-NEXT: shrb $5, %cl +; AVX512F-32-NEXT: andb $1, %cl +; AVX512F-32-NEXT: kmovd %ecx, %k4 +; AVX512F-32-NEXT: movl %ebx, %ecx +; AVX512F-32-NEXT: shrb $6, %cl +; AVX512F-32-NEXT: kmovd %ecx, %k6 +; AVX512F-32-NEXT: movl %ebx, %ecx +; AVX512F-32-NEXT: shrb $7, %cl +; AVX512F-32-NEXT: kmovd %ebx, %k5 +; AVX512F-32-NEXT: kshiftrq $1, %k5, %k7 +; AVX512F-32-NEXT: kxorq %k1, %k7, %k1 +; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 +; AVX512F-32-NEXT: kshiftrq $62, %k1, %k1 +; AVX512F-32-NEXT: kxorq %k5, %k1, %k7 +; AVX512F-32-NEXT: kshiftrq $2, %k7, %k1 +; AVX512F-32-NEXT: kxorq %k2, %k1, %k2 +; AVX512F-32-NEXT: kmovd %ecx, %k5 +; AVX512F-32-NEXT: movb %bh, %cl +; AVX512F-32-NEXT: kmovd %ecx, %k1 +; AVX512F-32-NEXT: movl %ebx, %esi +; AVX512F-32-NEXT: andb $2, %cl +; AVX512F-32-NEXT: shrb %cl +; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2 +; AVX512F-32-NEXT: kshiftrq $61, %k2, %k2 +; AVX512F-32-NEXT: kxorq %k7, %k2, %k7 +; AVX512F-32-NEXT: kshiftrq $3, %k7, %k2 +; AVX512F-32-NEXT: kxorq %k0, %k2, %k0 +; AVX512F-32-NEXT: kmovd %ecx, %k2 +; AVX512F-32-NEXT: movl %edx, %ecx +; AVX512F-32-NEXT: shrb $2, %dl +; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0 +; AVX512F-32-NEXT: kshiftrq $60, %k0, %k0 +; AVX512F-32-NEXT: kxorq %k7, %k0, %k0 +; AVX512F-32-NEXT: kshiftrq $4, %k0, %k7 +; AVX512F-32-NEXT: kxorq %k3, %k7, %k7 +; AVX512F-32-NEXT: kmovd %edx, %k3 ; AVX512F-32-NEXT: movl %eax, %edx -; AVX512F-32-NEXT: andb $2, %dl -; AVX512F-32-NEXT: shrb %dl -; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpsllw $8, %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: andb $15, %dl +; AVX512F-32-NEXT: shrb $3, %cl +; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $59, %k7, %k7 +; AVX512F-32-NEXT: kxorq %k0, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $5, %k7, %k0 +; AVX512F-32-NEXT: kxorq %k4, %k0, %k4 +; AVX512F-32-NEXT: kmovd %ecx, %k0 +; AVX512F-32-NEXT: movl %esi, %ecx +; AVX512F-32-NEXT: shrl $13, %ecx +; AVX512F-32-NEXT: andb $1, %cl +; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4 +; AVX512F-32-NEXT: kshiftrq $58, %k4, %k4 +; AVX512F-32-NEXT: kxorq %k7, %k4, %k7 +; AVX512F-32-NEXT: kshiftrq $6, %k7, %k4 +; AVX512F-32-NEXT: kxorq %k6, %k4, %k6 +; AVX512F-32-NEXT: kmovd %ecx, %k4 ; AVX512F-32-NEXT: movl %eax, %ebx -; AVX512F-32-NEXT: andb $15, %bl -; AVX512F-32-NEXT: movl %ebx, %edx -; AVX512F-32-NEXT: shrb $2, %bl -; AVX512F-32-NEXT: kmovd %ebx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: shrb $3, %dl -; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslld $24, %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: movl %eax, %edx -; AVX512F-32-NEXT: shrb $4, %dl -; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: movl %eax, %edx -; AVX512F-32-NEXT: shrb $5, %dl -; AVX512F-32-NEXT: andb $1, %dl -; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpsllq $40, %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: andb $2, %bl +; AVX512F-32-NEXT: shrb %bl +; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6 +; AVX512F-32-NEXT: kshiftrq $57, %k6, %k6 +; AVX512F-32-NEXT: kxorq %k7, %k6, %k6 +; AVX512F-32-NEXT: kshiftrq $7, %k6, %k7 +; AVX512F-32-NEXT: kxorq %k5, %k7, %k7 +; AVX512F-32-NEXT: kmovd %ebx, %k5 +; AVX512F-32-NEXT: movl %edx, %ecx +; AVX512F-32-NEXT: shrb $2, %dl +; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $56, %k7, %k7 +; AVX512F-32-NEXT: kxorq %k6, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $8, %k7, %k6 +; AVX512F-32-NEXT: kxorq %k1, %k6, %k1 +; AVX512F-32-NEXT: kmovd %edx, %k6 +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; AVX512F-32-NEXT: shrb $3, %cl +; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 +; AVX512F-32-NEXT: kshiftrq $55, %k1, %k1 +; AVX512F-32-NEXT: kxorq %k7, %k1, %k7 +; AVX512F-32-NEXT: kshiftrq $9, %k7, %k1 +; AVX512F-32-NEXT: kxorq %k2, %k1, %k2 +; AVX512F-32-NEXT: kmovd %ecx, %k1 +; AVX512F-32-NEXT: movl %eax, %ecx +; AVX512F-32-NEXT: shrb $4, %cl +; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2 +; AVX512F-32-NEXT: kshiftrq $54, %k2, %k2 +; AVX512F-32-NEXT: kxorq %k7, %k2, %k7 +; AVX512F-32-NEXT: kshiftrq $10, %k7, %k2 +; AVX512F-32-NEXT: kxorq %k3, %k2, %k3 +; AVX512F-32-NEXT: kmovd %ecx, %k2 +; AVX512F-32-NEXT: movl %eax, %ecx +; AVX512F-32-NEXT: shrb $5, %cl +; AVX512F-32-NEXT: andb $1, %cl +; AVX512F-32-NEXT: movl %esi, %edx +; AVX512F-32-NEXT: shrl $12, %edx +; AVX512F-32-NEXT: andl $15, %edx +; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3 +; AVX512F-32-NEXT: kshiftrq $53, %k3, %k3 +; AVX512F-32-NEXT: kxorq %k7, %k3, %k3 +; AVX512F-32-NEXT: kshiftrq $11, %k3, %k7 +; AVX512F-32-NEXT: kxorq %k0, %k7, %k0 +; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0 +; AVX512F-32-NEXT: kshiftrq $52, %k0, %k0 +; AVX512F-32-NEXT: kxorq %k3, %k0, %k3 +; AVX512F-32-NEXT: kshiftrq $12, %k3, %k0 +; AVX512F-32-NEXT: kmovd %edx, %k7 +; AVX512F-32-NEXT: kxorq %k7, %k0, %k7 +; AVX512F-32-NEXT: kmovd %ecx, %k0 ; AVX512F-32-NEXT: movl %eax, %edx ; AVX512F-32-NEXT: shrb $6, %dl -; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: movl %esi, %ecx +; AVX512F-32-NEXT: shrl $14, %ecx +; AVX512F-32-NEXT: andl $3, %ecx +; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $51, %k7, %k7 +; AVX512F-32-NEXT: kxorq %k3, %k7, %k3 +; AVX512F-32-NEXT: kshiftrq $13, %k3, %k7 +; AVX512F-32-NEXT: kxorq %k4, %k7, %k4 +; AVX512F-32-NEXT: kmovd %ecx, %k7 +; AVX512F-32-NEXT: movl %esi, %ecx +; AVX512F-32-NEXT: shrl $15, %ecx +; AVX512F-32-NEXT: andl $1, %ecx +; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4 +; AVX512F-32-NEXT: kshiftrq $50, %k4, %k4 +; AVX512F-32-NEXT: kxorq %k3, %k4, %k3 +; AVX512F-32-NEXT: kshiftrq $14, %k3, %k4 +; AVX512F-32-NEXT: kxorq %k7, %k4, %k4 +; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4 +; AVX512F-32-NEXT: kshiftrq $49, %k4, %k4 +; AVX512F-32-NEXT: kxorq %k3, %k4, %k3 +; AVX512F-32-NEXT: kshiftrq $15, %k3, %k4 +; AVX512F-32-NEXT: kmovd %ecx, %k7 +; AVX512F-32-NEXT: kxorq %k7, %k4, %k4 +; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4 +; AVX512F-32-NEXT: kshiftrq $48, %k4, %k4 +; AVX512F-32-NEXT: kxorq %k3, %k4, %k3 +; AVX512F-32-NEXT: kshiftrq $16, %k3, %k4 +; AVX512F-32-NEXT: kmovd %eax, %k7 +; AVX512F-32-NEXT: kxorq %k7, %k4, %k4 +; AVX512F-32-NEXT: kmovd %edx, %k7 +; AVX512F-32-NEXT: movl %esi, %edx +; AVX512F-32-NEXT: shrl $24, %edx ; AVX512F-32-NEXT: # kill: def %al killed %al killed %eax def %eax ; AVX512F-32-NEXT: shrb $7, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpsllq $56, %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrl $24, %eax -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastq %xmm2, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: movl %eax, %edx -; AVX512F-32-NEXT: andb $2, %dl -; AVX512F-32-NEXT: shrb %dl -; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6] -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: movl %eax, %edx -; AVX512F-32-NEXT: andb $15, %dl +; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4 +; AVX512F-32-NEXT: kshiftrq $47, %k4, %k4 +; AVX512F-32-NEXT: kxorq %k3, %k4, %k3 +; AVX512F-32-NEXT: kshiftrq $17, %k3, %k4 +; AVX512F-32-NEXT: kxorq %k5, %k4, %k4 +; AVX512F-32-NEXT: kmovd %eax, %k5 ; AVX512F-32-NEXT: movl %edx, %eax -; AVX512F-32-NEXT: shrb $2, %dl -; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: shrb $3, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4] -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm3, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm4[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrl $28, %eax -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm4[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: movl %ecx, %esi -; AVX512F-32-NEXT: shrl $29, %eax -; AVX512F-32-NEXT: andb $1, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2] -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm2[0,1,2,3],zmm0[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl %esi, %eax -; AVX512F-32-NEXT: shrl $30, %eax -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm1 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255] -; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl %esi, %eax -; AVX512F-32-NEXT: shrl $31, %eax -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm1, %ymm0, %ymm0 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; AVX512F-32-NEXT: kmovd %ecx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm7 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm1, %ymm7, %ymm1 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: andb $2, %al -; AVX512F-32-NEXT: shrb %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpsllw $8, %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl %ecx, %edx +; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4 +; AVX512F-32-NEXT: kshiftrq $46, %k4, %k4 +; AVX512F-32-NEXT: kxorq %k3, %k4, %k4 +; AVX512F-32-NEXT: kshiftrq $18, %k4, %k3 +; AVX512F-32-NEXT: kxorq %k6, %k3, %k6 +; AVX512F-32-NEXT: kmovd %edx, %k3 +; AVX512F-32-NEXT: # kill: def %dl killed %dl killed %edx def %edx ; AVX512F-32-NEXT: andb $15, %dl -; AVX512F-32-NEXT: movl %edx, %eax -; AVX512F-32-NEXT: shrb $2, %dl -; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: shrb $3, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpslld $24, %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrb $4, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastd %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrb $5, %al -; AVX512F-32-NEXT: andb $1, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpsllq $40, %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrb $6, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrb $7, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpsllq $56, %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movb %ch, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastq %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: andb $2, %al ; AVX512F-32-NEXT: shrb %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6] -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movb %ch, %dl -; AVX512F-32-NEXT: andb $15, %dl -; AVX512F-32-NEXT: movl %edx, %eax +; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6 +; AVX512F-32-NEXT: kshiftrq $45, %k6, %k6 +; AVX512F-32-NEXT: kxorq %k4, %k6, %k6 +; AVX512F-32-NEXT: kshiftrq $19, %k6, %k4 +; AVX512F-32-NEXT: kxorq %k1, %k4, %k1 +; AVX512F-32-NEXT: kmovd %eax, %k4 +; AVX512F-32-NEXT: movl %edx, %ecx ; AVX512F-32-NEXT: shrb $2, %dl -; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: shrb $3, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4] -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrl $12, %eax -; AVX512F-32-NEXT: andl $15, %eax -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastd %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrl $13, %eax -; AVX512F-32-NEXT: andb $1, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2] -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrl $14, %eax -; AVX512F-32-NEXT: andl $3, %eax -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrl $15, %eax -; AVX512F-32-NEXT: andl $1, %eax -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl %ecx, %ebx -; AVX512F-32-NEXT: shrl $16, %ebx -; AVX512F-32-NEXT: kmovd %ebx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 +; AVX512F-32-NEXT: kshiftrq $44, %k1, %k1 +; AVX512F-32-NEXT: kxorq %k6, %k1, %k1 +; AVX512F-32-NEXT: kshiftrq $20, %k1, %k6 +; AVX512F-32-NEXT: kxorq %k2, %k6, %k6 +; AVX512F-32-NEXT: kmovd %edx, %k2 +; AVX512F-32-NEXT: movl %ebx, %eax +; AVX512F-32-NEXT: andb $15, %al +; AVX512F-32-NEXT: shrb $3, %cl +; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6 +; AVX512F-32-NEXT: kshiftrq $43, %k6, %k6 +; AVX512F-32-NEXT: kxorq %k1, %k6, %k1 +; AVX512F-32-NEXT: kshiftrq $21, %k1, %k6 +; AVX512F-32-NEXT: kxorq %k0, %k6, %k6 +; AVX512F-32-NEXT: kmovd %ecx, %k0 +; AVX512F-32-NEXT: movl %esi, %ecx +; AVX512F-32-NEXT: shrl $29, %ecx +; AVX512F-32-NEXT: andb $1, %cl +; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6 +; AVX512F-32-NEXT: kshiftrq $42, %k6, %k6 +; AVX512F-32-NEXT: kxorq %k1, %k6, %k6 +; AVX512F-32-NEXT: kshiftrq $22, %k6, %k1 +; AVX512F-32-NEXT: kxorq %k7, %k1, %k7 +; AVX512F-32-NEXT: kmovd %ecx, %k1 ; AVX512F-32-NEXT: movl %ebx, %edx ; AVX512F-32-NEXT: andb $2, %dl ; AVX512F-32-NEXT: shrb %dl -; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpsllw $8, %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: movl %ebx, %eax +; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $41, %k7, %k7 +; AVX512F-32-NEXT: kxorq %k6, %k7, %k6 +; AVX512F-32-NEXT: kshiftrq $23, %k6, %k7 +; AVX512F-32-NEXT: kxorq %k5, %k7, %k7 +; AVX512F-32-NEXT: kmovd %edx, %k5 +; AVX512F-32-NEXT: movl %eax, %ecx +; AVX512F-32-NEXT: shrb $2, %al +; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $40, %k7, %k7 +; AVX512F-32-NEXT: kxorq %k6, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $24, %k7, %k6 +; AVX512F-32-NEXT: kxorq %k3, %k6, %k3 +; AVX512F-32-NEXT: kmovd %eax, %k6 +; AVX512F-32-NEXT: movb %bh, %al ; AVX512F-32-NEXT: andb $15, %al -; AVX512F-32-NEXT: movl %eax, %edx +; AVX512F-32-NEXT: shrb $3, %cl +; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3 +; AVX512F-32-NEXT: kshiftrq $39, %k3, %k3 +; AVX512F-32-NEXT: kxorq %k7, %k3, %k7 +; AVX512F-32-NEXT: kshiftrq $25, %k7, %k3 +; AVX512F-32-NEXT: kxorq %k4, %k3, %k4 +; AVX512F-32-NEXT: kmovd %ecx, %k3 +; AVX512F-32-NEXT: movl %ebx, %ecx +; AVX512F-32-NEXT: shrb $4, %cl +; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4 +; AVX512F-32-NEXT: kshiftrq $38, %k4, %k4 +; AVX512F-32-NEXT: kxorq %k7, %k4, %k7 +; AVX512F-32-NEXT: kshiftrq $26, %k7, %k4 +; AVX512F-32-NEXT: kxorq %k2, %k4, %k2 +; AVX512F-32-NEXT: kmovd %ecx, %k4 +; AVX512F-32-NEXT: movl %ebx, %ecx +; AVX512F-32-NEXT: shrb $5, %cl +; AVX512F-32-NEXT: andb $1, %cl +; AVX512F-32-NEXT: movl %esi, %edx +; AVX512F-32-NEXT: shrl $28, %edx +; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2 +; AVX512F-32-NEXT: kshiftrq $37, %k2, %k2 +; AVX512F-32-NEXT: kxorq %k7, %k2, %k2 +; AVX512F-32-NEXT: kshiftrq $27, %k2, %k7 +; AVX512F-32-NEXT: kxorq %k0, %k7, %k0 +; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0 +; AVX512F-32-NEXT: kshiftrq $36, %k0, %k0 +; AVX512F-32-NEXT: kxorq %k2, %k0, %k2 +; AVX512F-32-NEXT: kshiftrq $28, %k2, %k0 +; AVX512F-32-NEXT: kmovd %edx, %k7 +; AVX512F-32-NEXT: kxorq %k7, %k0, %k7 +; AVX512F-32-NEXT: kmovd %ecx, %k0 +; AVX512F-32-NEXT: movl %ebx, %edx +; AVX512F-32-NEXT: shrb $6, %dl +; AVX512F-32-NEXT: movl %esi, %ecx +; AVX512F-32-NEXT: shrl $30, %ecx +; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $35, %k7, %k7 +; AVX512F-32-NEXT: kxorq %k2, %k7, %k2 +; AVX512F-32-NEXT: kshiftrq $29, %k2, %k7 +; AVX512F-32-NEXT: kxorq %k1, %k7, %k1 +; AVX512F-32-NEXT: kmovd %ecx, %k7 +; AVX512F-32-NEXT: movl %esi, %ecx +; AVX512F-32-NEXT: shrl $31, %ecx +; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 +; AVX512F-32-NEXT: kshiftrq $34, %k1, %k1 +; AVX512F-32-NEXT: kxorq %k2, %k1, %k1 +; AVX512F-32-NEXT: kshiftrq $30, %k1, %k2 +; AVX512F-32-NEXT: kxorq %k7, %k2, %k2 +; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2 +; AVX512F-32-NEXT: kshiftrq $33, %k2, %k2 +; AVX512F-32-NEXT: kxorq %k1, %k2, %k1 +; AVX512F-32-NEXT: kshiftrq $31, %k1, %k2 +; AVX512F-32-NEXT: kmovd %ecx, %k7 +; AVX512F-32-NEXT: kxorq %k7, %k2, %k2 +; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2 +; AVX512F-32-NEXT: kshiftrq $32, %k2, %k2 +; AVX512F-32-NEXT: kxorq %k1, %k2, %k1 +; AVX512F-32-NEXT: kshiftrq $32, %k1, %k2 +; AVX512F-32-NEXT: kmovd %ebx, %k7 +; AVX512F-32-NEXT: kxorq %k7, %k2, %k2 +; AVX512F-32-NEXT: kmovd %edx, %k7 +; AVX512F-32-NEXT: movl %ebx, %ecx +; AVX512F-32-NEXT: shrb $7, %cl +; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2 +; AVX512F-32-NEXT: kshiftrq $31, %k2, %k2 +; AVX512F-32-NEXT: kxorq %k1, %k2, %k1 +; AVX512F-32-NEXT: kshiftrq $33, %k1, %k2 +; AVX512F-32-NEXT: kxorq %k5, %k2, %k2 +; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2 +; AVX512F-32-NEXT: kshiftrq $30, %k2, %k2 +; AVX512F-32-NEXT: kxorq %k1, %k2, %k2 +; AVX512F-32-NEXT: kshiftrq $34, %k2, %k1 +; AVX512F-32-NEXT: kxorq %k6, %k1, %k5 +; AVX512F-32-NEXT: kmovd %ecx, %k6 +; AVX512F-32-NEXT: movb %bh, %cl +; AVX512F-32-NEXT: kmovd %ecx, %k1 +; AVX512F-32-NEXT: andb $2, %cl +; AVX512F-32-NEXT: shrb %cl +; AVX512F-32-NEXT: kshiftlq $63, %k5, %k5 +; AVX512F-32-NEXT: kshiftrq $29, %k5, %k5 +; AVX512F-32-NEXT: kxorq %k2, %k5, %k5 +; AVX512F-32-NEXT: kshiftrq $35, %k5, %k2 +; AVX512F-32-NEXT: kxorq %k3, %k2, %k3 +; AVX512F-32-NEXT: kmovd %ecx, %k2 +; AVX512F-32-NEXT: movl %eax, %ecx ; AVX512F-32-NEXT: shrb $2, %al -; AVX512F-32-NEXT: kmovd %eax, %k0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k1 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 -; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: shrb $3, %dl -; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpslld $24, %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl %ebx, %eax -; AVX512F-32-NEXT: shrb $4, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastd %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3 +; AVX512F-32-NEXT: kshiftrq $28, %k3, %k3 +; AVX512F-32-NEXT: kxorq %k5, %k3, %k5 +; AVX512F-32-NEXT: kshiftrq $36, %k5, %k3 +; AVX512F-32-NEXT: kxorq %k4, %k3, %k4 +; AVX512F-32-NEXT: kmovd %eax, %k3 ; AVX512F-32-NEXT: movl %ebx, %eax -; AVX512F-32-NEXT: shrb $5, %al -; AVX512F-32-NEXT: andb $1, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpsllq $40, %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl %ebx, %eax -; AVX512F-32-NEXT: shrb $6, %al -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: # kill: def %bl killed %bl killed %ebx def %ebx -; AVX512F-32-NEXT: shrb $7, %bl -; AVX512F-32-NEXT: kmovd %ebx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpsllq $56, %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrl $24, %eax -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastq %xmm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: shrl $16, %eax +; AVX512F-32-NEXT: shrb $3, %cl +; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4 +; AVX512F-32-NEXT: kshiftrq $27, %k4, %k4 +; AVX512F-32-NEXT: kxorq %k5, %k4, %k5 +; AVX512F-32-NEXT: kshiftrq $37, %k5, %k4 +; AVX512F-32-NEXT: kxorq %k0, %k4, %k0 +; AVX512F-32-NEXT: kmovd %ecx, %k4 +; AVX512F-32-NEXT: movl %ebx, %ecx +; AVX512F-32-NEXT: shrl $13, %ecx +; AVX512F-32-NEXT: andb $1, %cl +; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0 +; AVX512F-32-NEXT: kshiftrq $26, %k0, %k0 +; AVX512F-32-NEXT: kxorq %k5, %k0, %k0 +; AVX512F-32-NEXT: kshiftrq $38, %k0, %k5 +; AVX512F-32-NEXT: kxorq %k7, %k5, %k7 +; AVX512F-32-NEXT: kmovd %ecx, %k5 ; AVX512F-32-NEXT: movl %eax, %edx ; AVX512F-32-NEXT: andb $2, %dl ; AVX512F-32-NEXT: shrb %dl +; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $25, %k7, %k7 +; AVX512F-32-NEXT: kxorq %k0, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $39, %k7, %k0 +; AVX512F-32-NEXT: kxorq %k6, %k0, %k6 +; AVX512F-32-NEXT: kmovd %edx, %k0 +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) # 8-byte Spill +; AVX512F-32-NEXT: movl %eax, %ecx +; AVX512F-32-NEXT: andb $15, %cl +; AVX512F-32-NEXT: movl %ecx, %edx +; AVX512F-32-NEXT: shrb $2, %cl +; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6 +; AVX512F-32-NEXT: kshiftrq $24, %k6, %k6 +; AVX512F-32-NEXT: kxorq %k7, %k6, %k6 +; AVX512F-32-NEXT: kshiftrq $40, %k6, %k7 +; AVX512F-32-NEXT: kxorq %k1, %k7, %k7 +; AVX512F-32-NEXT: kmovd %ecx, %k1 +; AVX512F-32-NEXT: movl %ebx, %ecx +; AVX512F-32-NEXT: shrl $12, %ecx +; AVX512F-32-NEXT: andl $15, %ecx +; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $23, %k7, %k7 +; AVX512F-32-NEXT: kxorq %k6, %k7, %k6 +; AVX512F-32-NEXT: kshiftrq $41, %k6, %k7 +; AVX512F-32-NEXT: kxorq %k2, %k7, %k2 +; AVX512F-32-NEXT: kmovd %ecx, %k0 +; AVX512F-32-NEXT: movl %ebx, %ecx +; AVX512F-32-NEXT: shrl $14, %ecx +; AVX512F-32-NEXT: andl $3, %ecx +; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2 +; AVX512F-32-NEXT: kshiftrq $22, %k2, %k2 +; AVX512F-32-NEXT: kxorq %k6, %k2, %k2 +; AVX512F-32-NEXT: kshiftrq $42, %k2, %k6 +; AVX512F-32-NEXT: kxorq %k3, %k6, %k3 +; AVX512F-32-NEXT: kmovd %ecx, %k7 +; AVX512F-32-NEXT: movl %ebx, %ecx +; AVX512F-32-NEXT: shrl $15, %ecx +; AVX512F-32-NEXT: andl $1, %ecx +; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3 +; AVX512F-32-NEXT: kshiftrq $21, %k3, %k3 +; AVX512F-32-NEXT: kxorq %k2, %k3, %k2 +; AVX512F-32-NEXT: kshiftrq $43, %k2, %k3 +; AVX512F-32-NEXT: kxorq %k4, %k3, %k3 +; AVX512F-32-NEXT: kmovd %ecx, %k6 +; AVX512F-32-NEXT: shrb $3, %dl +; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3 +; AVX512F-32-NEXT: kshiftrq $20, %k3, %k3 +; AVX512F-32-NEXT: kxorq %k2, %k3, %k3 +; AVX512F-32-NEXT: kshiftrq $44, %k3, %k2 +; AVX512F-32-NEXT: kxorq %k0, %k2, %k0 +; AVX512F-32-NEXT: kmovd %edx, %k2 +; AVX512F-32-NEXT: movl %eax, %ecx +; AVX512F-32-NEXT: shrb $4, %cl +; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0 +; AVX512F-32-NEXT: kshiftrq $19, %k0, %k0 +; AVX512F-32-NEXT: kxorq %k3, %k0, %k0 +; AVX512F-32-NEXT: kshiftrq $45, %k0, %k3 +; AVX512F-32-NEXT: kxorq %k5, %k3, %k4 +; AVX512F-32-NEXT: kmovd %ecx, %k3 +; AVX512F-32-NEXT: movl %eax, %ecx +; AVX512F-32-NEXT: shrb $5, %cl +; AVX512F-32-NEXT: andb $1, %cl +; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4 +; AVX512F-32-NEXT: kshiftrq $18, %k4, %k4 +; AVX512F-32-NEXT: kxorq %k0, %k4, %k0 +; AVX512F-32-NEXT: kshiftrq $46, %k0, %k4 +; AVX512F-32-NEXT: kxorq %k7, %k4, %k5 +; AVX512F-32-NEXT: kmovd %ecx, %k4 +; AVX512F-32-NEXT: movl %eax, %ecx +; AVX512F-32-NEXT: shrb $6, %cl +; AVX512F-32-NEXT: kshiftlq $63, %k5, %k5 +; AVX512F-32-NEXT: kshiftrq $17, %k5, %k5 +; AVX512F-32-NEXT: kxorq %k0, %k5, %k0 +; AVX512F-32-NEXT: kshiftrq $47, %k0, %k5 +; AVX512F-32-NEXT: kxorq %k6, %k5, %k5 +; AVX512F-32-NEXT: kshiftlq $63, %k5, %k5 +; AVX512F-32-NEXT: kshiftrq $16, %k5, %k5 +; AVX512F-32-NEXT: kxorq %k0, %k5, %k0 +; AVX512F-32-NEXT: kshiftrq $48, %k0, %k5 +; AVX512F-32-NEXT: kmovd %eax, %k6 +; AVX512F-32-NEXT: kxorq %k6, %k5, %k6 +; AVX512F-32-NEXT: kmovd %ecx, %k5 +; AVX512F-32-NEXT: movl %ebx, %edx +; AVX512F-32-NEXT: shrl $24, %edx +; AVX512F-32-NEXT: # kill: def %al killed %al killed %eax def %eax +; AVX512F-32-NEXT: shrb $7, %al +; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6 +; AVX512F-32-NEXT: kshiftrq $15, %k6, %k6 +; AVX512F-32-NEXT: kxorq %k0, %k6, %k6 +; AVX512F-32-NEXT: kshiftrq $49, %k6, %k0 +; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k7 # 8-byte Reload +; AVX512F-32-NEXT: kxorq %k7, %k0, %k7 +; AVX512F-32-NEXT: kmovd %eax, %k0 +; AVX512F-32-NEXT: movl %edx, %eax +; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $14, %k7, %k7 +; AVX512F-32-NEXT: kxorq %k6, %k7, %k6 +; AVX512F-32-NEXT: kshiftrq $50, %k6, %k7 +; AVX512F-32-NEXT: kxorq %k1, %k7, %k7 ; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6] -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: movl %eax, %edx +; AVX512F-32-NEXT: # kill: def %dl killed %dl killed %edx def %edx ; AVX512F-32-NEXT: andb $15, %dl +; AVX512F-32-NEXT: andb $2, %al +; AVX512F-32-NEXT: shrb %al +; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $13, %k7, %k7 +; AVX512F-32-NEXT: kxorq %k6, %k7, %k6 +; AVX512F-32-NEXT: kshiftrq $51, %k6, %k7 +; AVX512F-32-NEXT: kxorq %k2, %k7, %k7 +; AVX512F-32-NEXT: kmovd %eax, %k2 ; AVX512F-32-NEXT: movl %edx, %eax ; AVX512F-32-NEXT: shrb $2, %dl -; AVX512F-32-NEXT: kmovd %edx, %k0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k1 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 -; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $12, %k7, %k7 +; AVX512F-32-NEXT: kxorq %k6, %k7, %k6 +; AVX512F-32-NEXT: kshiftrq $52, %k6, %k7 +; AVX512F-32-NEXT: kxorq %k3, %k7, %k7 +; AVX512F-32-NEXT: kmovd %edx, %k3 ; AVX512F-32-NEXT: shrb $3, %al -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 -; AVX512F-32-NEXT: kmovd %eax, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4] -; AVX512F-32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm4 -; AVX512F-32-NEXT: vpblendvb %ymm3, %ymm4, %ymm1, %ymm1 -; AVX512F-32-NEXT: movl %ecx, %eax +; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $11, %k7, %k7 +; AVX512F-32-NEXT: kxorq %k6, %k7, %k6 +; AVX512F-32-NEXT: kshiftrq $53, %k6, %k7 +; AVX512F-32-NEXT: kxorq %k4, %k7, %k7 +; AVX512F-32-NEXT: kmovd %eax, %k4 +; AVX512F-32-NEXT: movl %ebx, %eax ; AVX512F-32-NEXT: shrl $29, %eax ; AVX512F-32-NEXT: andb $1, %al -; AVX512F-32-NEXT: kmovd %eax, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax +; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $10, %k7, %k7 +; AVX512F-32-NEXT: kxorq %k6, %k7, %k6 +; AVX512F-32-NEXT: kshiftrq $54, %k6, %k7 +; AVX512F-32-NEXT: kxorq %k5, %k7, %k7 +; AVX512F-32-NEXT: kmovd %eax, %k5 +; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7 +; AVX512F-32-NEXT: kshiftrq $9, %k7, %k7 +; AVX512F-32-NEXT: kxorq %k6, %k7, %k6 +; AVX512F-32-NEXT: kshiftrq $55, %k6, %k7 +; AVX512F-32-NEXT: kxorq %k0, %k7, %k0 +; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0 +; AVX512F-32-NEXT: kshiftrq $8, %k0, %k0 +; AVX512F-32-NEXT: kxorq %k6, %k0, %k0 +; AVX512F-32-NEXT: kshiftrq $56, %k0, %k6 +; AVX512F-32-NEXT: kxorq %k1, %k6, %k1 +; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 +; AVX512F-32-NEXT: kshiftrq $7, %k1, %k1 +; AVX512F-32-NEXT: kxorq %k0, %k1, %k0 +; AVX512F-32-NEXT: kshiftrq $57, %k0, %k1 +; AVX512F-32-NEXT: kxorq %k2, %k1, %k1 +; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 +; AVX512F-32-NEXT: kshiftrq $6, %k1, %k1 +; AVX512F-32-NEXT: kxorq %k0, %k1, %k0 +; AVX512F-32-NEXT: kshiftrq $58, %k0, %k1 +; AVX512F-32-NEXT: kxorq %k3, %k1, %k1 +; AVX512F-32-NEXT: movl %ebx, %eax ; AVX512F-32-NEXT: shrl $28, %eax -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm3 -; AVX512F-32-NEXT: vpbroadcastd %xmm3, %xmm3 -; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm1, %ymm3, %ymm1 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2] -; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm1, %ymm3, %ymm1 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrl $30, %eax -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-32-NEXT: kmovd %eax, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vpbroadcastw %xmm3, %xmm3 -; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 -; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm1, %ymm3, %ymm1 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax +; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 +; AVX512F-32-NEXT: kshiftrq $5, %k1, %k1 +; AVX512F-32-NEXT: kxorq %k0, %k1, %k0 +; AVX512F-32-NEXT: kshiftrq $59, %k0, %k1 +; AVX512F-32-NEXT: kxorq %k4, %k1, %k1 +; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 +; AVX512F-32-NEXT: kshiftrq $4, %k1, %k1 +; AVX512F-32-NEXT: kxorq %k0, %k1, %k0 +; AVX512F-32-NEXT: kshiftrq $60, %k0, %k1 +; AVX512F-32-NEXT: kmovd %eax, %k2 +; AVX512F-32-NEXT: kxorq %k2, %k1, %k1 +; AVX512F-32-NEXT: movl %ebx, %eax ; AVX512F-32-NEXT: shrl $31, %eax +; AVX512F-32-NEXT: movl %ebx, %ecx +; AVX512F-32-NEXT: shrl $30, %ecx +; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 +; AVX512F-32-NEXT: kshiftrq $3, %k1, %k1 +; AVX512F-32-NEXT: kxorq %k0, %k1, %k0 +; AVX512F-32-NEXT: kshiftrq $61, %k0, %k1 +; AVX512F-32-NEXT: kxorq %k5, %k1, %k1 +; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 +; AVX512F-32-NEXT: kshiftrq $2, %k1, %k1 +; AVX512F-32-NEXT: kxorq %k0, %k1, %k0 +; AVX512F-32-NEXT: kshiftrq $62, %k0, %k1 +; AVX512F-32-NEXT: kmovd %ecx, %k2 +; AVX512F-32-NEXT: kxorq %k2, %k1, %k1 +; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 +; AVX512F-32-NEXT: kshiftrq $1, %k1, %k1 +; AVX512F-32-NEXT: kxorq %k0, %k1, %k0 ; AVX512F-32-NEXT: kshiftlq $1, %k0, %k0 ; AVX512F-32-NEXT: kshiftrq $1, %k0, %k0 ; AVX512F-32-NEXT: kmovd %eax, %k1 ; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 ; AVX512F-32-NEXT: korq %k1, %k0, %k1 -; AVX512F-32-NEXT: vpcmpeqb %zmm6, %zmm5, %k0 {%k1} -; AVX512F-32-NEXT: vpcmpltub %zmm6, %zmm5, %k2 {%k1} -; AVX512F-32-NEXT: vpcmpleub %zmm6, %zmm5, %k3 {%k1} -; AVX512F-32-NEXT: vpcmpneqb %zmm6, %zmm5, %k4 {%k1} -; AVX512F-32-NEXT: vpcmpnltub %zmm6, %zmm5, %k5 {%k1} -; AVX512F-32-NEXT: vpcmpnleub %zmm6, %zmm5, %k1 {%k1} +; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} ; AVX512F-32-NEXT: kmovq %k0, (%esp) ; AVX512F-32-NEXT: movl (%esp), %eax ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx -; AVX512F-32-NEXT: kmovq %k2, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx -; AVX512F-32-NEXT: kmovq %k3, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: kxorq %k0, %k0, %k0 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: orl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: orl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: kmovq %k4, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx -; AVX512F-32-NEXT: kmovq %k5, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx -; AVX512F-32-NEXT: kmovq %k1, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: addl %esi, %eax -; AVX512F-32-NEXT: adcl %ecx, %edx -; AVX512F-32-NEXT: addl $60, %esp +; AVX512F-32-NEXT: adcl %ebx, %edx +; AVX512F-32-NEXT: addl $68, %esp ; AVX512F-32-NEXT: popl %esi ; AVX512F-32-NEXT: popl %ebx ; AVX512F-32-NEXT: vzeroupper diff --git a/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll b/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll index 217ddb607b8..82dba2993e7 100644 --- a/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll +++ b/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll @@ -4207,39 +4207,35 @@ define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -4262,39 +4258,35 @@ define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* % ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -4337,39 +4329,35 @@ define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64 ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -4414,39 +4402,35 @@ define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -4475,39 +4459,35 @@ define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -4552,39 +4532,35 @@ define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -4613,39 +4589,35 @@ define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -4668,39 +4640,35 @@ define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -4743,39 +4711,35 @@ define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -4820,39 +4784,35 @@ define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -4881,39 +4841,35 @@ define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__ ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -4958,39 +4914,35 @@ define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -8521,23 +8473,21 @@ define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -8560,23 +8510,21 @@ define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* % ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -8611,23 +8559,21 @@ define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -8664,23 +8610,21 @@ define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -8709,23 +8653,21 @@ define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -8762,23 +8704,21 @@ define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -8807,23 +8747,21 @@ define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -8846,23 +8784,21 @@ define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -8897,23 +8833,21 @@ define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -8950,23 +8884,21 @@ define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -8995,23 +8927,21 @@ define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__ ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -9048,23 +8978,21 @@ define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -9727,36 +9655,33 @@ define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) ; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -9784,36 +9709,33 @@ define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* % ; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -9861,36 +9783,33 @@ define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64 ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -9940,36 +9859,33 @@ define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -10003,36 +9919,33 @@ define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) ; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -10082,36 +9995,33 @@ define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -10145,36 +10055,33 @@ define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b ; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -10202,36 +10109,33 @@ define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* ; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -10279,36 +10183,33 @@ define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -10358,36 +10259,33 @@ define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -10421,36 +10319,33 @@ define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__ ; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -10500,36 +10395,33 @@ define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -16550,39 +16442,35 @@ define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -16605,39 +16493,35 @@ define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -16680,39 +16564,35 @@ define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i6 ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -16757,39 +16637,35 @@ define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -16818,39 +16694,35 @@ define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -16895,39 +16767,35 @@ define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -16956,39 +16824,35 @@ define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__ ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -17011,39 +16875,35 @@ define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64> ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -17086,39 +16946,35 @@ define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -17163,39 +17019,35 @@ define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem(i8 zeroext %__u, < ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -17224,39 +17076,35 @@ define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %_ ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -17301,39 +17149,35 @@ define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -20864,23 +20708,21 @@ define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -20903,23 +20745,21 @@ define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -20954,23 +20794,21 @@ define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i6 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -21007,23 +20845,21 @@ define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -21052,23 +20888,21 @@ define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -21105,23 +20939,21 @@ define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -21150,23 +20982,21 @@ define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__ ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -21189,23 +21019,21 @@ define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64> ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -21240,23 +21068,21 @@ define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -21293,23 +21119,21 @@ define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem(i8 zeroext %__u, < ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -21338,23 +21162,21 @@ define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %_ ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -21391,23 +21213,21 @@ define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -22070,36 +21890,33 @@ define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) ; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -22127,36 +21944,33 @@ define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* ; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -22204,36 +22018,33 @@ define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i6 ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -22283,36 +22094,33 @@ define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -22346,36 +22154,33 @@ define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b ; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -22425,36 +22230,33 @@ define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -22488,36 +22290,33 @@ define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__ ; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -22545,36 +22344,33 @@ define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64> ; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -22622,36 +22418,33 @@ define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -22701,36 +22494,33 @@ define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem(i8 zeroext %__u, < ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -22764,36 +22554,33 @@ define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %_ ; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -22843,36 +22630,33 @@ define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -28991,39 +28775,35 @@ define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -29049,39 +28829,35 @@ define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -29124,39 +28900,35 @@ define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i6 ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -29202,39 +28974,35 @@ define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -29265,39 +29033,35 @@ define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -29342,39 +29106,35 @@ define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -29405,39 +29165,35 @@ define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__ ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -29463,39 +29219,35 @@ define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64> ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -29538,39 +29290,35 @@ define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask(i8 zeroext %__u, <2 x ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -29616,39 +29364,35 @@ define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem(i8 zeroext %__u, < ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -29679,39 +29423,35 @@ define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %_ ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -29756,39 +29496,35 @@ define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -33345,23 +33081,21 @@ define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33387,23 +33121,21 @@ define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33438,23 +33170,21 @@ define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i6 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33492,23 +33222,21 @@ define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33539,23 +33267,21 @@ define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33592,23 +33318,21 @@ define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33639,23 +33363,21 @@ define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__ ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33681,23 +33403,21 @@ define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64> ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33732,23 +33452,21 @@ define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33786,23 +33504,21 @@ define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem(i8 zeroext %__u, < ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33833,23 +33549,21 @@ define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %_ ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33886,23 +33600,21 @@ define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -34583,36 +34295,33 @@ define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -34643,36 +34352,33 @@ define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -34722,118 +34428,112 @@ define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i6 ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper -; NoVLX-NEXT: retq -entry: - %0 = bitcast <4 x i64> %__a to <4 x i64> - %1 = bitcast <4 x i64> %__b to <4 x i64> - %2 = icmp sge <4 x i64> %0, %1 - %3 = bitcast i8 %__u to <8 x i1> - %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> - %4 = and <4 x i1> %2, %extract.i - %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> - %6 = bitcast <8 x i1> %5 to i8 - ret i8 %6 -} - -define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { -; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem: -; VLX: # %bb.0: # %entry -; VLX-NEXT: kmovd %edi, %k1 -; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1} -; VLX-NEXT: kmovd %k0, %eax -; VLX-NEXT: # kill: def %al killed %al killed %eax -; VLX-NEXT: vzeroupper -; VLX-NEXT: retq -; -; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem: -; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftlw $12, %k0, %k1 -; NoVLX-NEXT: kshiftrw $15, %k1, %k1 -; NoVLX-NEXT: kshiftlw $13, %k0, %k2 -; NoVLX-NEXT: kshiftrw $15, %k2, %k2 -; NoVLX-NEXT: kshiftlw $15, %k0, %k3 -; NoVLX-NEXT: kshiftrw $15, %k3, %k3 -; NoVLX-NEXT: kshiftlw $14, %k0, %k0 -; NoVLX-NEXT: kshiftrw $15, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: kmovw %k3, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kmovw %k2, %eax -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper +; NoVLX-NEXT: retq +entry: + %0 = bitcast <4 x i64> %__a to <4 x i64> + %1 = bitcast <4 x i64> %__b to <4 x i64> + %2 = icmp sge <4 x i64> %0, %1 + %3 = bitcast i8 %__u to <8 x i1> + %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + %4 = and <4 x i1> %2, %extract.i + %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> + %6 = bitcast <8 x i1> %5 to i8 + ret i8 %6 +} + +define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { +; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem: +; VLX: # %bb.0: # %entry +; VLX-NEXT: kmovd %edi, %k1 +; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1} +; VLX-NEXT: kmovd %k0, %eax +; VLX-NEXT: # kill: def %al killed %al killed %eax +; VLX-NEXT: vzeroupper +; VLX-NEXT: retq +; +; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem: +; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 +; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 +; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k1 +; NoVLX-NEXT: kshiftrw $15, %k1, %k1 +; NoVLX-NEXT: kshiftlw $13, %k0, %k2 +; NoVLX-NEXT: kshiftrw $15, %k2, %k2 +; NoVLX-NEXT: kshiftlw $15, %k0, %k3 +; NoVLX-NEXT: kshiftrw $15, %k3, %k3 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $15, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: kmovw %k3, %ecx +; NoVLX-NEXT: vmovd %ecx, %xmm1 +; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: kmovw %k2, %eax +; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vpextrb $4, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k0 +; NoVLX-NEXT: vpextrb $0, %xmm0, %eax +; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: vpextrb $12, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -34869,36 +34569,33 @@ define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -34950,36 +34647,33 @@ define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -35015,36 +34709,33 @@ define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__ ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -35075,36 +34766,33 @@ define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64> ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -35154,36 +34842,33 @@ define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -35236,36 +34921,33 @@ define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem(i8 zeroext %__u, < ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -35301,36 +34983,33 @@ define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %_ ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -35382,36 +35061,33 @@ define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -41579,39 +41255,35 @@ define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -41637,39 +41309,35 @@ define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* ; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -41715,39 +41383,35 @@ define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i6 ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -41795,39 +41459,35 @@ define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -41859,39 +41519,35 @@ define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -41939,39 +41595,35 @@ define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -42003,39 +41655,35 @@ define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__ ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -42061,39 +41709,35 @@ define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64> ; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -42139,39 +41783,35 @@ define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -42219,39 +41859,35 @@ define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem(i8 zeroext %__u, < ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -42283,39 +41919,35 @@ define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %_ ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -42363,39 +41995,35 @@ define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -45983,23 +45611,21 @@ define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -46025,23 +45651,21 @@ define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* ; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -46079,23 +45703,21 @@ define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i6 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -46135,23 +45757,21 @@ define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -46183,23 +45803,21 @@ define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -46239,23 +45857,21 @@ define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -46287,23 +45903,21 @@ define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__ ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -46329,23 +45943,21 @@ define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64> ; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -46383,23 +45995,21 @@ define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -46439,23 +46049,21 @@ define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem(i8 zeroext %__u, < ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -46487,23 +46095,21 @@ define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %_ ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -46543,23 +46149,21 @@ define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -47261,36 +46865,33 @@ define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -47321,36 +46922,33 @@ define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -47401,36 +46999,33 @@ define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i6 ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -47483,36 +47078,33 @@ define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -47549,36 +47141,33 @@ define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -47631,36 +47220,33 @@ define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -47697,36 +47283,33 @@ define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__ ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -47757,36 +47340,33 @@ define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64> ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -47837,36 +47417,33 @@ define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -47919,36 +47496,33 @@ define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem(i8 zeroext %__u, < ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -47985,36 +47559,33 @@ define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %_ ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -48067,36 +47638,33 @@ define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -49961,39 +49529,35 @@ define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> @@ -50016,39 +49580,35 @@ define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> @@ -50073,39 +49633,35 @@ define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, float* %_ ; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1 ; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> @@ -50137,36 +49693,33 @@ define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask(i4 zeroext %__u, <2 x i6 ; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vandps %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -50201,36 +49754,33 @@ define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem(i4 zeroext %__u, <2 ; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -50267,36 +49817,33 @@ define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, < ; NoVLX-NEXT: vcmpeqps %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -50328,39 +49875,35 @@ define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__ ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> @@ -50383,39 +49926,35 @@ define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64> ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> @@ -50440,39 +49979,35 @@ define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, float* ; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1 ; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> @@ -50504,36 +50039,33 @@ define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask(i4 zeroext %__u, <2 x ; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vandps %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -50568,36 +50100,33 @@ define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask_mem(i4 zeroext %__u, < ; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -50634,36 +50163,33 @@ define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, ; NoVLX-NEXT: vcmpeqps %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -54198,23 +53724,21 @@ define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> @@ -54237,23 +53761,21 @@ define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> @@ -54278,23 +53800,21 @@ define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, double* % ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] ; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> @@ -54326,20 +53846,19 @@ define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask(i2 zeroext %__u, <2 x i6 ; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -54374,20 +53893,19 @@ define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem(i2 zeroext %__u, <2 ; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -54424,20 +53942,19 @@ define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b(i2 zeroext %__u, < ; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -54469,23 +53986,21 @@ define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__ ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> @@ -54508,23 +54023,21 @@ define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64> ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> @@ -54549,23 +54062,21 @@ define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, double* ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] ; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax -; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> @@ -54597,20 +54108,19 @@ define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask(i2 zeroext %__u, <2 x ; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -54645,20 +54155,19 @@ define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem(i2 zeroext %__u, < ; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -54695,20 +54204,19 @@ define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b(i2 zeroext %__u, ; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -55344,36 +54852,33 @@ define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) ; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -55401,36 +54906,33 @@ define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* ; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -55460,36 +54962,33 @@ define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, double* % ; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -55526,36 +55025,33 @@ define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask(i4 zeroext %__u, <4 x i6 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -55592,36 +55088,33 @@ define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem(i4 zeroext %__u, <4 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -55660,36 +55153,33 @@ define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, < ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] -; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -55723,36 +55213,33 @@ define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__ ; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -55780,36 +55267,33 @@ define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64> ; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -55839,36 +55323,33 @@ define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, double* ; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -55905,36 +55386,33 @@ define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask(i4 zeroext %__u, <4 x ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -55971,36 +55449,33 @@ define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem(i4 zeroext %__u, < ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -56039,36 +55514,33 @@ define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k1 -; NoVLX-NEXT: kshiftlw $1, %k1, %k1 -; NoVLX-NEXT: korw %k0, %k1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 -; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; NoVLX-NEXT: kxorw %k0, %k0, %k2 +; NoVLX-NEXT: kshiftrw $1, %k2, %k2 +; NoVLX-NEXT: kshiftlw $1, %k2, %k2 +; NoVLX-NEXT: korw %k1, %k2, %k1 +; NoVLX-NEXT: kshiftrw $1, %k1, %k2 +; NoVLX-NEXT: kxorw %k0, %k2, %k0 +; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kxorw %k1, %k0, %k0 +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: vpextrb $8, %xmm0, %eax +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $13, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] -; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %eax, %k2 +; NoVLX-NEXT: kxorw %k2, %k1, %k1 +; NoVLX-NEXT: kshiftlw $15, %k1, %k1 +; NoVLX-NEXT: kshiftrw $12, %k1, %k1 +; NoVLX-NEXT: kxorw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper