From: Simon Pilgrim Date: Tue, 25 Jun 2019 10:51:15 +0000 (+0000) Subject: [TargetLowering] SimplifyDemandedBits - legal checks for SIGN/ZERO_EXTEND -> ZERO... X-Git-Tag: android-x86-9.0-r1~1397 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=691701d2f8aea47c36cad4769b906bd06c48c86b;p=android-x86%2Fexternal-llvm.git [TargetLowering] SimplifyDemandedBits - legal checks for SIGN/ZERO_EXTEND -> ZERO/ANY_EXTEND As part of the fix for rL364264 + rL364272 - limit the *_EXTEND conversion to !TLO.LegalOperations || isOperationLegal cases. We'll improve X86 legality in future commits. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@364290 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 64fc3b894a5..2608d06ff90 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1390,8 +1390,11 @@ bool TargetLowering::SimplifyDemandedBits( TLO.DAG.getDataLayout().isLittleEndian()) return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src)); - if (!IsVecInReg) - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, Src)); + if (!IsVecInReg) { + unsigned Opc = ISD::ANY_EXTEND; + if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) + return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src)); + } } APInt InDemandedBits = DemandedBits.trunc(InBits); @@ -1414,8 +1417,11 @@ bool TargetLowering::SimplifyDemandedBits( // If none of the top bits are demanded, convert this into an any_extend. // TODO: Add SIGN_EXTEND_VECTOR_INREG - ANY_EXTEND_VECTOR_INREG fold. - if (DemandedBits.getActiveBits() <= InBits && !IsVecInReg) - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, Src)); + if (DemandedBits.getActiveBits() <= InBits && !IsVecInReg) { + unsigned Opc = ISD::ANY_EXTEND; + if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) + return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src)); + } APInt InDemandedBits = DemandedBits.trunc(InBits); APInt InDemandedElts = DemandedElts.zextOrSelf(InElts); @@ -1435,8 +1441,11 @@ bool TargetLowering::SimplifyDemandedBits( // If the sign bit is known zero, convert this to a zero extend. // TODO: Add SIGN_EXTEND_VECTOR_INREG - ZERO_EXTEND_VECTOR_INREG fold. - if (Known.isNonNegative() && !IsVecInReg) - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Src)); + if (Known.isNonNegative() && !IsVecInReg) { + unsigned Opc = ISD::ZERO_EXTEND; + if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) + return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src)); + } break; } case ISD::ANY_EXTEND: { diff --git a/test/CodeGen/X86/avx512-ext.ll b/test/CodeGen/X86/avx512-ext.ll index 072e3c8bdae..d2c0c7d66da 100644 --- a/test/CodeGen/X86/avx512-ext.ll +++ b/test/CodeGen/X86/avx512-ext.ll @@ -475,7 +475,7 @@ define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind re define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { ; KNL-LABEL: zext_8x8mem_to_8x32: ; KNL: # %bb.0: -; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 ; KNL-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero @@ -492,7 +492,7 @@ define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind re ; ; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x32: ; AVX512DQNOBW: # %bb.0: -; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 ; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero @@ -506,7 +506,7 @@ define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind re define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { ; KNL-LABEL: sext_8x8mem_to_8x32: ; KNL: # %bb.0: -; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 ; KNL-NEXT: vpmovsxbd (%rdi), %ymm0 @@ -523,7 +523,7 @@ define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind re ; ; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x32: ; AVX512DQNOBW: # %bb.0: -; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 ; AVX512DQNOBW-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} @@ -537,7 +537,7 @@ define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind re define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { ; KNL-LABEL: zext_16x8mem_to_16x32: ; KNL: # %bb.0: -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 ; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero @@ -552,7 +552,7 @@ define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwi ; ; AVX512DQNOBW-LABEL: zext_16x8mem_to_16x32: ; AVX512DQNOBW: # %bb.0: -; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1 ; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero @@ -566,7 +566,7 @@ define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwi define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { ; KNL-LABEL: sext_16x8mem_to_16x32: ; KNL: # %bb.0: -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 ; KNL-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} @@ -581,7 +581,7 @@ define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwi ; ; AVX512DQNOBW-LABEL: sext_16x8mem_to_16x32: ; AVX512DQNOBW: # %bb.0: -; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1 ; AVX512DQNOBW-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} @@ -595,7 +595,7 @@ define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwi define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone { ; KNL-LABEL: zext_16x8_to_16x32_mask: ; KNL: # %bb.0: -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 ; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero @@ -610,7 +610,7 @@ define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounw ; ; AVX512DQNOBW-LABEL: zext_16x8_to_16x32_mask: ; AVX512DQNOBW: # %bb.0: -; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; AVX512DQNOBW-NEXT: vpmovsxbd %xmm1, %zmm1 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm1, %zmm1 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm1, %k1 ; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero @@ -623,7 +623,7 @@ define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounw define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone { ; KNL-LABEL: sext_16x8_to_16x32_mask: ; KNL: # %bb.0: -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} @@ -638,7 +638,7 @@ define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounw ; ; AVX512DQNOBW-LABEL: sext_16x8_to_16x32_mask: ; AVX512DQNOBW: # %bb.0: -; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; AVX512DQNOBW-NEXT: vpmovsxbd %xmm1, %zmm1 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm1, %zmm1 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm1, %k1 ; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} @@ -777,7 +777,7 @@ define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone { define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { ; KNL-LABEL: zext_8x8mem_to_8x64: ; KNL: # %bb.0: -; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 ; KNL-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero @@ -792,7 +792,7 @@ define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind re ; ; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x64: ; AVX512DQNOBW: # %bb.0: -; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 ; AVX512DQNOBW-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero @@ -806,7 +806,7 @@ define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind re define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { ; KNL-LABEL: sext_8x8mem_to_8x64mask: ; KNL: # %bb.0: -; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 ; KNL-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} @@ -821,7 +821,7 @@ define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwin ; ; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x64mask: ; AVX512DQNOBW: # %bb.0: -; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 ; AVX512DQNOBW-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} @@ -902,7 +902,7 @@ define <4 x i32> @sext_4x16mem_to_4x32(<4 x i16> *%i) nounwind readnone { define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { ; KNL-LABEL: zext_8x16mem_to_8x32: ; KNL: # %bb.0: -; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 ; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero @@ -919,7 +919,7 @@ define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind ; ; AVX512DQNOBW-LABEL: zext_8x16mem_to_8x32: ; AVX512DQNOBW: # %bb.0: -; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 ; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero @@ -933,7 +933,7 @@ define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { ; KNL-LABEL: sext_8x16mem_to_8x32mask: ; KNL: # %bb.0: -; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 ; KNL-NEXT: vpmovsxwd (%rdi), %ymm0 @@ -950,7 +950,7 @@ define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounw ; ; AVX512DQNOBW-LABEL: sext_8x16mem_to_8x32mask: ; AVX512DQNOBW: # %bb.0: -; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 ; AVX512DQNOBW-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} @@ -974,7 +974,7 @@ define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone { define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone { ; KNL-LABEL: zext_8x16_to_8x32mask: ; KNL: # %bb.0: -; KNL-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero +; KNL-NEXT: vpmovsxwq %xmm1, %zmm1 ; KNL-NEXT: vpsllq $63, %zmm1, %zmm1 ; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero @@ -991,7 +991,7 @@ define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind ; ; AVX512DQNOBW-LABEL: zext_8x16_to_8x32mask: ; AVX512DQNOBW: # %bb.0: -; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; AVX512DQNOBW-NEXT: vpmovsxwd %xmm1, %ymm1 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm1, %ymm1 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm1, %k1 ; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero @@ -1013,7 +1013,7 @@ define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone { define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone { ; KNL-LABEL: zext_16x16mem_to_16x32: ; KNL: # %bb.0: -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero @@ -1028,7 +1028,7 @@ define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) noun ; ; AVX512DQNOBW-LABEL: zext_16x16mem_to_16x32: ; AVX512DQNOBW: # %bb.0: -; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1 ; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero @@ -1042,7 +1042,7 @@ define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) noun define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone { ; KNL-LABEL: sext_16x16mem_to_16x32mask: ; KNL: # %bb.0: -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 ; KNL-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} @@ -1057,7 +1057,7 @@ define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) ; ; AVX512DQNOBW-LABEL: sext_16x16mem_to_16x32mask: ; AVX512DQNOBW: # %bb.0: -; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512DQNOBW-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k1 ; AVX512DQNOBW-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} @@ -1080,7 +1080,7 @@ define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone { define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone { ; KNL-LABEL: zext_16x16_to_16x32mask: ; KNL: # %bb.0: -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 ; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero @@ -1095,7 +1095,7 @@ define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) noun ; ; AVX512DQNOBW-LABEL: zext_16x16_to_16x32mask: ; AVX512DQNOBW: # %bb.0: -; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; AVX512DQNOBW-NEXT: vpmovsxbd %xmm1, %zmm1 ; AVX512DQNOBW-NEXT: vpslld $31, %zmm1, %zmm1 ; AVX512DQNOBW-NEXT: vpmovd2m %zmm1, %k1 ; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero @@ -1227,7 +1227,7 @@ define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone { define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { ; KNL-LABEL: zext_8x16mem_to_8x64: ; KNL: # %bb.0: -; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 ; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero @@ -1242,7 +1242,7 @@ define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind ; ; AVX512DQNOBW-LABEL: zext_8x16mem_to_8x64: ; AVX512DQNOBW: # %bb.0: -; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 ; AVX512DQNOBW-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero @@ -1256,7 +1256,7 @@ define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { ; KNL-LABEL: sext_8x16mem_to_8x64mask: ; KNL: # %bb.0: -; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 ; KNL-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} @@ -1271,7 +1271,7 @@ define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounw ; ; AVX512DQNOBW-LABEL: sext_8x16mem_to_8x64mask: ; AVX512DQNOBW: # %bb.0: -; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 ; AVX512DQNOBW-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} @@ -1295,7 +1295,7 @@ define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone { define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone { ; KNL-LABEL: zext_8x16_to_8x64mask: ; KNL: # %bb.0: -; KNL-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero +; KNL-NEXT: vpmovsxwq %xmm1, %zmm1 ; KNL-NEXT: vpsllq $63, %zmm1, %zmm1 ; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero @@ -1310,7 +1310,7 @@ define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind ; ; AVX512DQNOBW-LABEL: zext_8x16_to_8x64mask: ; AVX512DQNOBW: # %bb.0: -; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; AVX512DQNOBW-NEXT: vpmovsxwd %xmm1, %ymm1 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm1, %ymm1 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm1, %k1 ; AVX512DQNOBW-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero @@ -1472,7 +1472,7 @@ define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone { ; KNL-LABEL: zext_8x32mem_to_8x64: ; KNL: # %bb.0: -; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 ; KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero @@ -1487,7 +1487,7 @@ define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind ; ; AVX512DQNOBW-LABEL: zext_8x32mem_to_8x64: ; AVX512DQNOBW: # %bb.0: -; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 ; AVX512DQNOBW-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero @@ -1501,7 +1501,7 @@ define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone { ; KNL-LABEL: sext_8x32mem_to_8x64mask: ; KNL: # %bb.0: -; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 ; KNL-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} @@ -1516,7 +1516,7 @@ define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounw ; ; AVX512DQNOBW-LABEL: sext_8x32mem_to_8x64mask: ; AVX512DQNOBW: # %bb.0: -; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k1 ; AVX512DQNOBW-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} @@ -1549,7 +1549,7 @@ define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone { define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone { ; KNL-LABEL: zext_8x32_to_8x64mask: ; KNL: # %bb.0: -; KNL-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero +; KNL-NEXT: vpmovsxwq %xmm1, %zmm1 ; KNL-NEXT: vpsllq $63, %zmm1, %zmm1 ; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero @@ -1564,7 +1564,7 @@ define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind ; ; AVX512DQNOBW-LABEL: zext_8x32_to_8x64mask: ; AVX512DQNOBW: # %bb.0: -; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; AVX512DQNOBW-NEXT: vpmovsxwd %xmm1, %ymm1 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm1, %ymm1 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm1, %k1 ; AVX512DQNOBW-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero @@ -1705,7 +1705,7 @@ define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) { define i8 @trunc_8i16_to_8i1(<8 x i16> %a) { ; KNL-LABEL: trunc_8i16_to_8i1: ; KNL: # %bb.0: -; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax @@ -1723,7 +1723,7 @@ define i8 @trunc_8i16_to_8i1(<8 x i16> %a) { ; ; AVX512DQNOBW-LABEL: trunc_8i16_to_8i1: ; AVX512DQNOBW: # %bb.0: -; AVX512DQNOBW-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 ; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k0 ; AVX512DQNOBW-NEXT: kmovw %k0, %eax diff --git a/test/CodeGen/X86/avx512-fma.ll b/test/CodeGen/X86/avx512-fma.ll index e9a70f3bfe2..07eeeb4c662 100644 --- a/test/CodeGen/X86/avx512-fma.ll +++ b/test/CodeGen/X86/avx512-fma.ll @@ -122,7 +122,7 @@ define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind { define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) { ; KNL-LABEL: test_x86_fmadd132_ps: ; KNL: ## %bb.0: -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 ; KNL-NEXT: vpslld $31, %zmm2, %zmm2 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1 ; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm1 @@ -145,7 +145,7 @@ define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <1 define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) { ; KNL-LABEL: test_x86_fmadd231_ps: ; KNL: ## %bb.0: -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 ; KNL-NEXT: vpslld $31, %zmm2, %zmm2 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1 ; KNL-NEXT: vfmadd231ps {{.*#+}} zmm1 = (zmm0 * mem) + zmm1 @@ -170,7 +170,7 @@ define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <1 define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) { ; KNL-LABEL: test_x86_fmadd213_ps: ; KNL: ## %bb.0: -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 ; KNL-NEXT: vpslld $31, %zmm2, %zmm2 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1 ; KNL-NEXT: vfmadd213ps {{.*#+}} zmm1 = (zmm0 * zmm1) + mem diff --git a/test/CodeGen/X86/avx512-insert-extract.ll b/test/CodeGen/X86/avx512-insert-extract.ll index 8bdd7dc2c1d..02c67ba079c 100644 --- a/test/CodeGen/X86/avx512-insert-extract.ll +++ b/test/CodeGen/X86/avx512-insert-extract.ll @@ -1710,11 +1710,11 @@ define i32 @test_insertelement_variable_v32i1(<32 x i8> %a, i8 %b, i32 %index) { ; KNL-NEXT: testb %dil, %dil ; KNL-NEXT: vmovdqa %ymm0, (%rsp) ; KNL-NEXT: setne (%rsp,%rsi) -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd (%rsp), %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax @@ -1776,21 +1776,21 @@ define i64 @test_insertelement_variable_v64i1(<64 x i8> %a, i8 %b, i32 %index) { ; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp) ; KNL-NEXT: vmovdqa %ymm0, (%rsp) ; KNL-NEXT: setne (%rsp,%rsi) -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd (%rsp), %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %ecx ; KNL-NEXT: shll $16, %ecx ; KNL-NEXT: orl %eax, %ecx -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %edx -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax @@ -1956,21 +1956,21 @@ define i96 @test_insertelement_variable_v96i1(<96 x i8> %a, i8 %b, i32 %index) { ; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp) ; KNL-NEXT: vmovdqa %ymm2, (%rsp) ; KNL-NEXT: setne (%rsp,%rax) -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd (%rsp), %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %ecx ; KNL-NEXT: shll $16, %ecx ; KNL-NEXT: orl %eax, %ecx -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %edx -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax @@ -1978,21 +1978,21 @@ define i96 @test_insertelement_variable_v96i1(<96 x i8> %a, i8 %b, i32 %index) { ; KNL-NEXT: orl %edx, %eax ; KNL-NEXT: shlq $32, %rax ; KNL-NEXT: orq %rcx, %rax -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %esi ; KNL-NEXT: shll $16, %esi ; KNL-NEXT: orl %ecx, %esi -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %edx @@ -2168,21 +2168,21 @@ define i128 @test_insertelement_variable_v128i1(<128 x i8> %a, i8 %b, i32 %index ; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp) ; KNL-NEXT: vmovdqa %ymm0, (%rsp) ; KNL-NEXT: setne (%rsp,%rsi) -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd (%rsp), %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %ecx ; KNL-NEXT: shll $16, %ecx ; KNL-NEXT: orl %eax, %ecx -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %edx -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax @@ -2190,21 +2190,21 @@ define i128 @test_insertelement_variable_v128i1(<128 x i8> %a, i8 %b, i32 %index ; KNL-NEXT: orl %edx, %eax ; KNL-NEXT: shlq $32, %rax ; KNL-NEXT: orq %rcx, %rax -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %esi ; KNL-NEXT: shll $16, %esi ; KNL-NEXT: orl %ecx, %esi -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %edx diff --git a/test/CodeGen/X86/avx512-mask-op.ll b/test/CodeGen/X86/avx512-mask-op.ll index 96bbadba695..02cf7544e0c 100644 --- a/test/CodeGen/X86/avx512-mask-op.ll +++ b/test/CodeGen/X86/avx512-mask-op.ll @@ -613,7 +613,7 @@ false: define void @test7(<8 x i1> %mask) { ; KNL-LABEL: test7: ; KNL: ## %bb.0: ## %allocas -; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax @@ -640,7 +640,7 @@ define void @test7(<8 x i1> %mask) { ; ; AVX512DQ-LABEL: test7: ; AVX512DQ: ## %bb.0: ## %allocas -; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0 ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax @@ -763,10 +763,10 @@ define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) { ; KNL-NEXT: cmpl %esi, %edi ; KNL-NEXT: jg LBB18_1 ; KNL-NEXT: ## %bb.2: -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd %xmm1, %zmm0 ; KNL-NEXT: jmp LBB18_3 ; KNL-NEXT: LBB18_1: -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: LBB18_3: ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 @@ -810,10 +810,10 @@ define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) { ; AVX512DQ-NEXT: cmpl %esi, %edi ; AVX512DQ-NEXT: jg LBB18_1 ; AVX512DQ-NEXT: ## %bb.2: -; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm0 ; AVX512DQ-NEXT: jmp LBB18_3 ; AVX512DQ-NEXT: LBB18_1: -; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512DQ-NEXT: LBB18_3: ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 @@ -1666,7 +1666,7 @@ define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) { define void @store_v8i1(<8 x i1> %c , <8 x i1>* %ptr) { ; KNL-LABEL: store_v8i1: ; KNL: ## %bb.0: -; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 ; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax @@ -1694,7 +1694,7 @@ define void @store_v8i1(<8 x i1> %c , <8 x i1>* %ptr) { ; ; AVX512DQ-LABEL: store_v8i1: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0 ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0 ; AVX512DQ-NEXT: knotb %k0, %k0 @@ -1718,7 +1718,7 @@ define void @store_v8i1(<8 x i1> %c , <8 x i1>* %ptr) { define void @store_v16i1(<16 x i1> %c , <16 x i1>* %ptr) { ; KNL-LABEL: store_v16i1: ; KNL: ## %bb.0: -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, (%rdi) @@ -1744,7 +1744,7 @@ define void @store_v16i1(<16 x i1> %c , <16 x i1>* %ptr) { ; ; AVX512DQ-LABEL: store_v16i1: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 ; AVX512DQ-NEXT: knotw %k0, %k0 @@ -2483,7 +2483,7 @@ define <64 x i8> @load_64i1(<64 x i1>* %a) { define void @store_8i1(<8 x i1>* %a, <8 x i1> %v) { ; KNL-LABEL: store_8i1: ; KNL: ## %bb.0: -; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax @@ -2509,7 +2509,7 @@ define void @store_8i1(<8 x i1>* %a, <8 x i1> %v) { ; ; AVX512DQ-LABEL: store_8i1: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0 ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0 ; AVX512DQ-NEXT: kmovb %k0, (%rdi) @@ -2530,7 +2530,7 @@ define void @store_8i1(<8 x i1>* %a, <8 x i1> %v) { define void @store_8i1_1(<8 x i1>* %a, <8 x i16> %v) { ; KNL-LABEL: store_8i1_1: ; KNL: ## %bb.0: -; KNL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax @@ -2556,7 +2556,7 @@ define void @store_8i1_1(<8 x i1>* %a, <8 x i16> %v) { ; ; AVX512DQ-LABEL: store_8i1_1: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0 ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0 ; AVX512DQ-NEXT: kmovb %k0, (%rdi) @@ -2578,7 +2578,7 @@ define void @store_8i1_1(<8 x i1>* %a, <8 x i16> %v) { define void @store_16i1(<16 x i1>* %a, <16 x i1> %v) { ; KNL-LABEL: store_16i1: ; KNL: ## %bb.0: -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, (%rdi) @@ -2602,7 +2602,7 @@ define void @store_16i1(<16 x i1>* %a, <16 x i1> %v) { ; ; AVX512DQ-LABEL: store_16i1: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 ; AVX512DQ-NEXT: kmovw %k0, (%rdi) @@ -2623,11 +2623,11 @@ define void @store_16i1(<16 x i1>* %a, <16 x i1> %v) { define void @store_32i1(<32 x i1>* %a, <32 x i1> %v) { ; KNL-LABEL: store_32i1: ; KNL: ## %bb.0: -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd %xmm0, %zmm1 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 ; KNL-NEXT: kmovw %k1, 2(%rdi) @@ -2653,11 +2653,11 @@ define void @store_32i1(<32 x i1>* %a, <32 x i1> %v) { ; ; AVX512DQ-LABEL: store_32i1: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm1 ; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1 ; AVX512DQ-NEXT: vpmovd2m %zmm1, %k0 ; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1 ; AVX512DQ-NEXT: kmovw %k1, 2(%rdi) @@ -2680,10 +2680,10 @@ define void @store_32i1(<32 x i1>* %a, <32 x i1> %v) { define void @store_32i1_1(<32 x i1>* %a, <32 x i16> %v) { ; KNL-LABEL: store_32i1_1: ; KNL: ## %bb.0: -; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero +; KNL-NEXT: vpmovsxwd %ymm1, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 ; KNL-NEXT: kmovw %k1, 2(%rdi) @@ -2709,10 +2709,10 @@ define void @store_32i1_1(<32 x i1>* %a, <32 x i16> %v) { ; ; AVX512DQ-LABEL: store_32i1_1: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 -; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero +; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm0 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1 ; AVX512DQ-NEXT: kmovw %k1, 2(%rdi) @@ -2738,16 +2738,16 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) { ; ; KNL-LABEL: store_64i1: ; KNL: ## %bb.0: -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd %xmm1, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd %xmm2, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k2 -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero,xmm3[8],zero,zero,zero,xmm3[9],zero,zero,zero,xmm3[10],zero,zero,zero,xmm3[11],zero,zero,zero,xmm3[12],zero,zero,zero,xmm3[13],zero,zero,zero,xmm3[14],zero,zero,zero,xmm3[15],zero,zero,zero +; KNL-NEXT: vpmovsxbd %xmm3, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k3 ; KNL-NEXT: kmovw %k3, 6(%rdi) @@ -2775,16 +2775,16 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) { ; ; AVX512DQ-LABEL: store_64i1: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 -; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm0 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1 -; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm0 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k2 -; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero,xmm3[8],zero,zero,zero,xmm3[9],zero,zero,zero,xmm3[10],zero,zero,zero,xmm3[11],zero,zero,zero,xmm3[12],zero,zero,zero,xmm3[13],zero,zero,zero,xmm3[14],zero,zero,zero,xmm3[15],zero,zero,zero +; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm0 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovd2m %zmm0, %k3 ; AVX512DQ-NEXT: kmovw %k3, 6(%rdi) diff --git a/test/CodeGen/X86/avx512-masked-memop-64-32.ll b/test/CodeGen/X86/avx512-masked-memop-64-32.ll index 89dfc69fcd5..f199cb097aa 100644 --- a/test/CodeGen/X86/avx512-masked-memop-64-32.ll +++ b/test/CodeGen/X86/avx512-masked-memop-64-32.ll @@ -111,7 +111,7 @@ declare <16 x %mystruct*> @llvm.masked.load.v16p0mystruct.p0v16p0mystruct(<16 x define <16 x %mystruct*> @test24(<16 x i1> %mask, <16 x %mystruct*>* %addr) { ; AVX512F-LABEL: test24: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 ; AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z} @@ -134,7 +134,7 @@ define <16 x %mystruct*> @test24(<16 x i1> %mask, <16 x %mystruct*>* %addr) { define void @test_store_16i64(<16 x i64>* %ptrs, <16 x i1> %mask, <16 x i64> %src0) { ; AVX512F-LABEL: test_store_16i64: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 ; AVX512F-NEXT: vmovdqu64 %zmm1, (%rdi) {%k1} @@ -160,7 +160,7 @@ declare void @llvm.masked.store.v16i64.p0v16i64(<16 x i64> %src0, <16 x i64>* %p define void @test_store_16f64(<16 x double>* %ptrs, <16 x i1> %mask, <16 x double> %src0) { ; AVX512F-LABEL: test_store_16f64: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 ; AVX512F-NEXT: vmovupd %zmm1, (%rdi) {%k1} @@ -186,7 +186,7 @@ declare void @llvm.masked.store.v16f64.p0v16f64(<16 x double> %src0, <16 x doubl define <16 x i64> @test_load_16i64(<16 x i64>* %ptrs, <16 x i1> %mask, <16 x i64> %src0) { ; AVX512F-LABEL: test_load_16i64: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 ; AVX512F-NEXT: vpblendmq (%rdi), %zmm1, %zmm0 {%k1} @@ -210,7 +210,7 @@ declare <16 x i64> @llvm.masked.load.v16i64.p0v16i64(<16 x i64>* %ptrs, i32, <16 define <16 x double> @test_load_16f64(<16 x double>* %ptrs, <16 x i1> %mask, <16 x double> %src0) { ; AVX512F-LABEL: test_load_16f64: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 ; AVX512F-NEXT: vblendmpd (%rdi), %zmm1, %zmm0 {%k1} @@ -235,10 +235,10 @@ define <32 x double> @test_load_32f64(<32 x double>* %ptrs, <32 x i1> %mask, <32 ; AVX512F-LABEL: test_load_32f64: ; AVX512F: ## %bb.0: ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm5 -; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm5 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero,xmm5[4],zero,zero,zero,xmm5[5],zero,zero,zero,xmm5[6],zero,zero,zero,xmm5[7],zero,zero,zero,xmm5[8],zero,zero,zero,xmm5[9],zero,zero,zero,xmm5[10],zero,zero,zero,xmm5[11],zero,zero,zero,xmm5[12],zero,zero,zero,xmm5[13],zero,zero,zero,xmm5[14],zero,zero,zero,xmm5[15],zero,zero,zero +; AVX512F-NEXT: vpmovsxbd %xmm5, %zmm5 ; AVX512F-NEXT: vpslld $31, %zmm5, %zmm5 ; AVX512F-NEXT: vptestmd %zmm5, %zmm5, %k1 -; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k2 ; AVX512F-NEXT: vblendmpd (%rdi), %zmm1, %zmm0 {%k2} diff --git a/test/CodeGen/X86/avx512-vec-cmp.ll b/test/CodeGen/X86/avx512-vec-cmp.ll index a823e1b275b..1799ebd53fa 100644 --- a/test/CodeGen/X86/avx512-vec-cmp.ll +++ b/test/CodeGen/X86/avx512-vec-cmp.ll @@ -850,8 +850,7 @@ define <4 x float> @test42(<4 x float> %x, <4 x float> %x1, float* %ptr) noun define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x i1> %mask_in) nounwind { ; KNL-LABEL: test43: ; KNL: ## %bb.0: -; KNL-NEXT: vpmovzxwq %xmm2, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x34,0xd2] -; KNL-NEXT: ## zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero +; KNL-NEXT: vpmovsxwq %xmm2, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x24,0xd2] ; KNL-NEXT: vpsllq $63, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x73,0xf2,0x3f] ; KNL-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x01] ; KNL-NEXT: vptestmq %zmm2, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x27,0xca] diff --git a/test/CodeGen/X86/masked_compressstore.ll b/test/CodeGen/X86/masked_compressstore.ll index 0806ccd86ef..2b3ee63fc23 100644 --- a/test/CodeGen/X86/masked_compressstore.ll +++ b/test/CodeGen/X86/masked_compressstore.ll @@ -194,7 +194,7 @@ define void @compressstore_v8f64_v8i1(double* %base, <8 x double> %V, <8 x i1> % ; ; AVX512F-LABEL: compressstore_v8f64_v8i1: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero +; AVX512F-NEXT: vpmovsxwq %xmm1, %zmm1 ; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm1 ; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1 ; AVX512F-NEXT: vcompresspd %zmm0, (%rdi) {%k1} @@ -203,7 +203,7 @@ define void @compressstore_v8f64_v8i1(double* %base, <8 x double> %V, <8 x i1> % ; ; AVX512VLDQ-LABEL: compressstore_v8f64_v8i1: ; AVX512VLDQ: ## %bb.0: -; AVX512VLDQ-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; AVX512VLDQ-NEXT: vpmovsxwd %xmm1, %ymm1 ; AVX512VLDQ-NEXT: vpslld $31, %ymm1, %ymm1 ; AVX512VLDQ-NEXT: vpmovd2m %ymm1, %k1 ; AVX512VLDQ-NEXT: vcompresspd %zmm0, (%rdi) {%k1} @@ -579,7 +579,7 @@ define void @compressstore_v16f64_v16i1(double* %base, <16 x double> %V, <16 x i ; ; AVX512F-LABEL: compressstore_v16f64_v16i1: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2 ; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2 ; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k1 ; AVX512F-NEXT: kshiftrw $8, %k1, %k2 @@ -607,7 +607,7 @@ define void @compressstore_v16f64_v16i1(double* %base, <16 x double> %V, <16 x i ; ; AVX512VLDQ-LABEL: compressstore_v16f64_v16i1: ; AVX512VLDQ: ## %bb.0: -; AVX512VLDQ-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2 ; AVX512VLDQ-NEXT: vpslld $31, %zmm2, %zmm2 ; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k1 ; AVX512VLDQ-NEXT: kmovb %k1, %eax @@ -1094,7 +1094,7 @@ define void @compressstore_v8f32_v8i1(float* %base, <8 x float> %V, <8 x i1> %ma ; AVX512F-LABEL: compressstore_v8f32_v8i1: ; AVX512F: ## %bb.0: ; AVX512F-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero +; AVX512F-NEXT: vpmovsxwq %xmm1, %zmm1 ; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm1 ; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1 ; AVX512F-NEXT: vcompressps %zmm0, (%rdi) {%k1} @@ -1103,7 +1103,7 @@ define void @compressstore_v8f32_v8i1(float* %base, <8 x float> %V, <8 x i1> %ma ; ; AVX512VLDQ-LABEL: compressstore_v8f32_v8i1: ; AVX512VLDQ: ## %bb.0: -; AVX512VLDQ-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; AVX512VLDQ-NEXT: vpmovsxwd %xmm1, %ymm1 ; AVX512VLDQ-NEXT: vpslld $31, %ymm1, %ymm1 ; AVX512VLDQ-NEXT: vpmovd2m %ymm1, %k1 ; AVX512VLDQ-NEXT: vcompressps %ymm0, (%rdi) {%k1} @@ -2997,7 +2997,7 @@ define void @compressstore_v8i64_v8i1(i64* %base, <8 x i64> %V, <8 x i1> %mask) ; ; AVX512F-LABEL: compressstore_v8i64_v8i1: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero +; AVX512F-NEXT: vpmovsxwq %xmm1, %zmm1 ; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm1 ; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1 ; AVX512F-NEXT: vpcompressq %zmm0, (%rdi) {%k1} @@ -3006,7 +3006,7 @@ define void @compressstore_v8i64_v8i1(i64* %base, <8 x i64> %V, <8 x i1> %mask) ; ; AVX512VLDQ-LABEL: compressstore_v8i64_v8i1: ; AVX512VLDQ: ## %bb.0: -; AVX512VLDQ-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; AVX512VLDQ-NEXT: vpmovsxwd %xmm1, %ymm1 ; AVX512VLDQ-NEXT: vpslld $31, %ymm1, %ymm1 ; AVX512VLDQ-NEXT: vpmovd2m %ymm1, %k1 ; AVX512VLDQ-NEXT: vpcompressq %zmm0, (%rdi) {%k1} diff --git a/test/CodeGen/X86/masked_expandload.ll b/test/CodeGen/X86/masked_expandload.ll index 351e0409f3f..62b012ca2ab 100644 --- a/test/CodeGen/X86/masked_expandload.ll +++ b/test/CodeGen/X86/masked_expandload.ll @@ -466,7 +466,7 @@ define <8 x double> @expandload_v8f64_v8i1(double* %base, <8 x double> %src0, <8 ; ; AVX512F-LABEL: expandload_v8f64_v8i1: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero +; AVX512F-NEXT: vpmovsxwq %xmm1, %zmm1 ; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm1 ; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1 ; AVX512F-NEXT: vexpandpd (%rdi), %zmm0 {%k1} @@ -474,7 +474,7 @@ define <8 x double> @expandload_v8f64_v8i1(double* %base, <8 x double> %src0, <8 ; ; AVX512VLDQ-LABEL: expandload_v8f64_v8i1: ; AVX512VLDQ: ## %bb.0: -; AVX512VLDQ-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; AVX512VLDQ-NEXT: vpmovsxwd %xmm1, %ymm1 ; AVX512VLDQ-NEXT: vpslld $31, %ymm1, %ymm1 ; AVX512VLDQ-NEXT: vpmovd2m %ymm1, %k1 ; AVX512VLDQ-NEXT: vexpandpd (%rdi), %zmm0 {%k1} diff --git a/test/CodeGen/X86/masked_gather_scatter.ll b/test/CodeGen/X86/masked_gather_scatter.ll index 2c4294e07db..610fca0949a 100644 --- a/test/CodeGen/X86/masked_gather_scatter.ll +++ b/test/CodeGen/X86/masked_gather_scatter.ll @@ -1850,7 +1850,7 @@ define <16 x float*> @test31(<16 x float**> %ptrs) { define <16 x i32> @test_gather_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i32> %src0) { ; KNL_64-LABEL: test_gather_16i32: ; KNL_64: # %bb.0: -; KNL_64-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2 ; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2 ; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 ; KNL_64-NEXT: vextracti64x4 $1, %zmm3, %ymm2 @@ -1862,7 +1862,7 @@ define <16 x i32> @test_gather_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i ; ; KNL_32-LABEL: test_gather_16i32: ; KNL_32: # %bb.0: -; KNL_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1 ; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1 ; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vpgatherdd (,%zmm0), %zmm2 {%k1} @@ -1871,7 +1871,7 @@ define <16 x i32> @test_gather_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i ; ; SKX-LABEL: test_gather_16i32: ; SKX: # %bb.0: -; SKX-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 ; SKX-NEXT: vpslld $31, %zmm2, %zmm2 ; SKX-NEXT: vpmovd2m %zmm2, %k1 ; SKX-NEXT: vextracti64x4 $1, %zmm3, %ymm2 @@ -1883,7 +1883,7 @@ define <16 x i32> @test_gather_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i ; ; SKX_32-LABEL: test_gather_16i32: ; SKX_32: # %bb.0: -; SKX_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 ; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 ; SKX_32-NEXT: vpmovd2m %zmm1, %k1 ; SKX_32-NEXT: vpgatherdd (,%zmm0), %zmm2 {%k1} @@ -1895,7 +1895,7 @@ define <16 x i32> @test_gather_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i define <16 x i64> @test_gather_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> %src0) { ; KNL_64-LABEL: test_gather_16i64: ; KNL_64: # %bb.0: -; KNL_64-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2 ; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2 ; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 ; KNL_64-NEXT: kshiftrw $8, %k1, %k2 @@ -1914,7 +1914,7 @@ define <16 x i64> @test_gather_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i ; KNL_32-NEXT: .cfi_def_cfa_register %ebp ; KNL_32-NEXT: andl $-64, %esp ; KNL_32-NEXT: subl $64, %esp -; KNL_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1 ; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1 ; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vmovdqa64 8(%ebp), %zmm1 @@ -1930,7 +1930,7 @@ define <16 x i64> @test_gather_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i ; ; SKX-LABEL: test_gather_16i64: ; SKX: # %bb.0: -; SKX-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 ; SKX-NEXT: vpslld $31, %zmm2, %zmm2 ; SKX-NEXT: vpmovd2m %zmm2, %k1 ; SKX-NEXT: kshiftrw $8, %k1, %k2 @@ -1949,7 +1949,7 @@ define <16 x i64> @test_gather_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i ; SKX_32-NEXT: .cfi_def_cfa_register %ebp ; SKX_32-NEXT: andl $-64, %esp ; SKX_32-NEXT: subl $64, %esp -; SKX_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 ; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 ; SKX_32-NEXT: vpmovd2m %zmm1, %k1 ; SKX_32-NEXT: vmovdqa64 8(%ebp), %zmm1 @@ -1969,7 +1969,7 @@ declare <16 x i64> @llvm.masked.gather.v16i64.v16p0i64(<16 x i64*> %ptrs, i32, < define <16 x float> @test_gather_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 x float> %src0) { ; KNL_64-LABEL: test_gather_16f32: ; KNL_64: # %bb.0: -; KNL_64-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2 ; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2 ; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 ; KNL_64-NEXT: vextractf64x4 $1, %zmm3, %ymm2 @@ -1981,7 +1981,7 @@ define <16 x float> @test_gather_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 ; ; KNL_32-LABEL: test_gather_16f32: ; KNL_32: # %bb.0: -; KNL_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1 ; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1 ; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vgatherdps (,%zmm0), %zmm2 {%k1} @@ -1990,7 +1990,7 @@ define <16 x float> @test_gather_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 ; ; SKX-LABEL: test_gather_16f32: ; SKX: # %bb.0: -; SKX-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 ; SKX-NEXT: vpslld $31, %zmm2, %zmm2 ; SKX-NEXT: vpmovd2m %zmm2, %k1 ; SKX-NEXT: vextractf64x4 $1, %zmm3, %ymm2 @@ -2002,7 +2002,7 @@ define <16 x float> @test_gather_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 ; ; SKX_32-LABEL: test_gather_16f32: ; SKX_32: # %bb.0: -; SKX_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 ; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 ; SKX_32-NEXT: vpmovd2m %zmm1, %k1 ; SKX_32-NEXT: vgatherdps (,%zmm0), %zmm2 {%k1} @@ -2014,7 +2014,7 @@ define <16 x float> @test_gather_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 define <16 x double> @test_gather_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x double> %src0) { ; KNL_64-LABEL: test_gather_16f64: ; KNL_64: # %bb.0: -; KNL_64-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2 ; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2 ; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 ; KNL_64-NEXT: kshiftrw $8, %k1, %k2 @@ -2033,7 +2033,7 @@ define <16 x double> @test_gather_16f64(<16 x double*> %ptrs, <16 x i1> %mask, < ; KNL_32-NEXT: .cfi_def_cfa_register %ebp ; KNL_32-NEXT: andl $-64, %esp ; KNL_32-NEXT: subl $64, %esp -; KNL_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1 ; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1 ; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vmovapd 8(%ebp), %zmm1 @@ -2049,7 +2049,7 @@ define <16 x double> @test_gather_16f64(<16 x double*> %ptrs, <16 x i1> %mask, < ; ; SKX-LABEL: test_gather_16f64: ; SKX: # %bb.0: -; SKX-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 ; SKX-NEXT: vpslld $31, %zmm2, %zmm2 ; SKX-NEXT: vpmovd2m %zmm2, %k1 ; SKX-NEXT: kshiftrw $8, %k1, %k2 @@ -2068,7 +2068,7 @@ define <16 x double> @test_gather_16f64(<16 x double*> %ptrs, <16 x i1> %mask, < ; SKX_32-NEXT: .cfi_def_cfa_register %ebp ; SKX_32-NEXT: andl $-64, %esp ; SKX_32-NEXT: subl $64, %esp -; SKX_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 ; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 ; SKX_32-NEXT: vpmovd2m %zmm1, %k1 ; SKX_32-NEXT: vmovapd 8(%ebp), %zmm1 @@ -2088,7 +2088,7 @@ declare <16 x double> @llvm.masked.gather.v16f64.v16p0f64(<16 x double*> %ptrs, define void @test_scatter_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i32> %src0) { ; KNL_64-LABEL: test_scatter_16i32: ; KNL_64: # %bb.0: -; KNL_64-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2 ; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2 ; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 ; KNL_64-NEXT: kshiftrw $8, %k1, %k2 @@ -2100,7 +2100,7 @@ define void @test_scatter_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i32> % ; ; KNL_32-LABEL: test_scatter_16i32: ; KNL_32: # %bb.0: -; KNL_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1 ; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1 ; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vpscatterdd %zmm2, (,%zmm0) {%k1} @@ -2109,7 +2109,7 @@ define void @test_scatter_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i32> % ; ; SKX-LABEL: test_scatter_16i32: ; SKX: # %bb.0: -; SKX-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 ; SKX-NEXT: vpslld $31, %zmm2, %zmm2 ; SKX-NEXT: vpmovd2m %zmm2, %k1 ; SKX-NEXT: kshiftrw $8, %k1, %k2 @@ -2121,7 +2121,7 @@ define void @test_scatter_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i32> % ; ; SKX_32-LABEL: test_scatter_16i32: ; SKX_32: # %bb.0: -; SKX_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 ; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 ; SKX_32-NEXT: vpmovd2m %zmm1, %k1 ; SKX_32-NEXT: vpscatterdd %zmm2, (,%zmm0) {%k1} @@ -2133,7 +2133,7 @@ define void @test_scatter_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i32> % define void @test_scatter_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> %src0) { ; KNL_64-LABEL: test_scatter_16i64: ; KNL_64: # %bb.0: -; KNL_64-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2 ; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2 ; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 ; KNL_64-NEXT: kshiftrw $8, %k1, %k2 @@ -2151,7 +2151,7 @@ define void @test_scatter_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> % ; KNL_32-NEXT: .cfi_def_cfa_register %ebp ; KNL_32-NEXT: andl $-64, %esp ; KNL_32-NEXT: subl $64, %esp -; KNL_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1 ; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1 ; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vmovdqa64 8(%ebp), %zmm1 @@ -2167,7 +2167,7 @@ define void @test_scatter_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> % ; ; SKX-LABEL: test_scatter_16i64: ; SKX: # %bb.0: -; SKX-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 ; SKX-NEXT: vpslld $31, %zmm2, %zmm2 ; SKX-NEXT: vpmovd2m %zmm2, %k1 ; SKX-NEXT: kshiftrw $8, %k1, %k2 @@ -2185,7 +2185,7 @@ define void @test_scatter_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> % ; SKX_32-NEXT: .cfi_def_cfa_register %ebp ; SKX_32-NEXT: andl $-64, %esp ; SKX_32-NEXT: subl $64, %esp -; SKX_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 ; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 ; SKX_32-NEXT: vpmovd2m %zmm1, %k1 ; SKX_32-NEXT: vmovdqa64 8(%ebp), %zmm1 @@ -2205,7 +2205,7 @@ declare void @llvm.masked.scatter.v16i64.v16p0i64(<16 x i64> %src0, <16 x i64*> define void @test_scatter_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 x float> %src0) { ; KNL_64-LABEL: test_scatter_16f32: ; KNL_64: # %bb.0: -; KNL_64-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2 ; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2 ; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 ; KNL_64-NEXT: kshiftrw $8, %k1, %k2 @@ -2217,7 +2217,7 @@ define void @test_scatter_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 x floa ; ; KNL_32-LABEL: test_scatter_16f32: ; KNL_32: # %bb.0: -; KNL_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1 ; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1 ; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vscatterdps %zmm2, (,%zmm0) {%k1} @@ -2226,7 +2226,7 @@ define void @test_scatter_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 x floa ; ; SKX-LABEL: test_scatter_16f32: ; SKX: # %bb.0: -; SKX-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 ; SKX-NEXT: vpslld $31, %zmm2, %zmm2 ; SKX-NEXT: vpmovd2m %zmm2, %k1 ; SKX-NEXT: kshiftrw $8, %k1, %k2 @@ -2238,7 +2238,7 @@ define void @test_scatter_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 x floa ; ; SKX_32-LABEL: test_scatter_16f32: ; SKX_32: # %bb.0: -; SKX_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 ; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 ; SKX_32-NEXT: vpmovd2m %zmm1, %k1 ; SKX_32-NEXT: vscatterdps %zmm2, (,%zmm0) {%k1} @@ -2251,7 +2251,7 @@ declare void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %src0, <16 x floa define void @test_scatter_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x double> %src0) { ; KNL_64-LABEL: test_scatter_16f64: ; KNL_64: # %bb.0: -; KNL_64-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2 ; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2 ; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 ; KNL_64-NEXT: kshiftrw $8, %k1, %k2 @@ -2269,7 +2269,7 @@ define void @test_scatter_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x dou ; KNL_32-NEXT: .cfi_def_cfa_register %ebp ; KNL_32-NEXT: andl $-64, %esp ; KNL_32-NEXT: subl $64, %esp -; KNL_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1 ; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1 ; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vmovapd 8(%ebp), %zmm1 @@ -2285,7 +2285,7 @@ define void @test_scatter_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x dou ; ; SKX-LABEL: test_scatter_16f64: ; SKX: # %bb.0: -; SKX-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero +; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 ; SKX-NEXT: vpslld $31, %zmm2, %zmm2 ; SKX-NEXT: vpmovd2m %zmm2, %k1 ; SKX-NEXT: kshiftrw $8, %k1, %k2 @@ -2303,7 +2303,7 @@ define void @test_scatter_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x dou ; SKX_32-NEXT: .cfi_def_cfa_register %ebp ; SKX_32-NEXT: andl $-64, %esp ; SKX_32-NEXT: subl $64, %esp -; SKX_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 ; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 ; SKX_32-NEXT: vpmovd2m %zmm1, %k1 ; SKX_32-NEXT: vmovapd 8(%ebp), %zmm1 diff --git a/test/CodeGen/X86/masked_load.ll b/test/CodeGen/X86/masked_load.ll index b5efae10536..81ebf448914 100644 --- a/test/CodeGen/X86/masked_load.ll +++ b/test/CodeGen/X86/masked_load.ll @@ -1256,7 +1256,7 @@ define <8 x float> @load_v8f32_v8i1_zero(<8 x i1> %mask, <8 x float>* %addr) { ; ; AVX512F-LABEL: load_v8f32_v8i1_zero: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 ; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 ; AVX512F-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} @@ -1265,7 +1265,7 @@ define <8 x float> @load_v8f32_v8i1_zero(<8 x i1> %mask, <8 x float>* %addr) { ; ; AVX512VLDQ-LABEL: load_v8f32_v8i1_zero: ; AVX512VLDQ: ## %bb.0: -; AVX512VLDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512VLDQ-NEXT: vpmovsxwd %xmm0, %ymm0 ; AVX512VLDQ-NEXT: vpslld $31, %ymm0, %ymm0 ; AVX512VLDQ-NEXT: vpmovd2m %ymm0, %k1 ; AVX512VLDQ-NEXT: vmovaps (%rdi), %ymm0 {%k1} {z} @@ -2442,7 +2442,7 @@ define <8 x i32> @load_v8i32_v8i1(<8 x i1> %mask, <8 x i32>* %addr, <8 x i32> %d ; AVX512F-LABEL: load_v8i32_v8i1: ; AVX512F: ## %bb.0: ; AVX512F-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 ; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 ; AVX512F-NEXT: vpblendmd (%rdi), %zmm1, %zmm0 {%k1} @@ -2451,7 +2451,7 @@ define <8 x i32> @load_v8i32_v8i1(<8 x i1> %mask, <8 x i32>* %addr, <8 x i32> %d ; ; AVX512VLDQ-LABEL: load_v8i32_v8i1: ; AVX512VLDQ: ## %bb.0: -; AVX512VLDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512VLDQ-NEXT: vpmovsxwd %xmm0, %ymm0 ; AVX512VLDQ-NEXT: vpslld $31, %ymm0, %ymm0 ; AVX512VLDQ-NEXT: vpmovd2m %ymm0, %k1 ; AVX512VLDQ-NEXT: vpblendmd (%rdi), %ymm1, %ymm0 {%k1} @@ -2617,7 +2617,7 @@ define <8 x i32> @load_v8i32_v8i1_zero(<8 x i1> %mask, <8 x i32>* %addr) { ; ; AVX512F-LABEL: load_v8i32_v8i1_zero: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 ; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 ; AVX512F-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} @@ -2626,7 +2626,7 @@ define <8 x i32> @load_v8i32_v8i1_zero(<8 x i1> %mask, <8 x i32>* %addr) { ; ; AVX512VLDQ-LABEL: load_v8i32_v8i1_zero: ; AVX512VLDQ: ## %bb.0: -; AVX512VLDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512VLDQ-NEXT: vpmovsxwd %xmm0, %ymm0 ; AVX512VLDQ-NEXT: vpslld $31, %ymm0, %ymm0 ; AVX512VLDQ-NEXT: vpmovd2m %ymm0, %k1 ; AVX512VLDQ-NEXT: vmovdqu32 (%rdi), %ymm0 {%k1} {z} diff --git a/test/CodeGen/X86/pmul.ll b/test/CodeGen/X86/pmul.ll index 1960e790925..2768bf0dc9d 100644 --- a/test/CodeGen/X86/pmul.ll +++ b/test/CodeGen/X86/pmul.ll @@ -1345,9 +1345,9 @@ define <8 x i64> @mul_v8i64_sext(<8 x i16> %val1, <8 x i32> %val2) { ; AVX2-NEXT: vpmovsxwq %xmm2, %ymm2 ; AVX2-NEXT: vpmovsxwq %xmm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 -; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero +; AVX2-NEXT: vpmovsxdq %xmm3, %ymm3 ; AVX2-NEXT: vpmuldq %ymm3, %ymm2, %ymm2 -; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero +; AVX2-NEXT: vpmovsxdq %xmm1, %ymm1 ; AVX2-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vmovdqa %ymm2, %ymm1 ; AVX2-NEXT: retq diff --git a/test/CodeGen/X86/pr30284.ll b/test/CodeGen/X86/pr30284.ll index 763bd1ba7b3..be1a8d44184 100644 --- a/test/CodeGen/X86/pr30284.ll +++ b/test/CodeGen/X86/pr30284.ll @@ -16,7 +16,7 @@ define void @undef_cond() { define void @f_f___un_3C_unf_3E_un_3C_unf_3E_(<16 x i1> %x) { ; CHECK-LABEL: f_f___un_3C_unf_3E_un_3C_unf_3E_: ; CHECK: # %bb.0: -; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; CHECK-NEXT: vpmovsxbd %xmm0, %zmm0 ; CHECK-NEXT: vpslld $31, %zmm0, %zmm0 ; CHECK-NEXT: vpmovd2m %zmm0, %k1 ; CHECK-NEXT: vmovapd 0, %zmm0 diff --git a/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll b/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll index 3a5d073759e..7abb3a043a6 100644 --- a/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll +++ b/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll @@ -22,13 +22,13 @@ define <16 x i1> @shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<8 x i32>* %a, <8 ; AVX256VL-NEXT: vpmovdw %ymm2, %xmm2 ; AVX256VL-NEXT: vpblendw {{.*#+}} xmm3 = xmm2[0,1],xmm1[2],xmm2[3],xmm1[4],xmm2[5,6,7] ; AVX256VL-NEXT: vpshufb {{.*#+}} xmm3 = xmm3[6,7,12,13,4,5,8,9,6,7,14,15,14,15,0,1] -; AVX256VL-NEXT: vpmovzxwd {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero +; AVX256VL-NEXT: vpmovsxwd %xmm3, %ymm3 ; AVX256VL-NEXT: vpslld $31, %ymm3, %ymm3 ; AVX256VL-NEXT: vptestmd %ymm3, %ymm3, %k1 ; AVX256VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,1,3] ; AVX256VL-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[6,7,12,13,2,3,14,15,6,7,6,7,14,15,0,1] ; AVX256VL-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[3],xmm2[4],xmm1[5],xmm2[6,7] -; AVX256VL-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; AVX256VL-NEXT: vpmovsxwd %xmm1, %ymm1 ; AVX256VL-NEXT: vpslld $31, %ymm1, %ymm1 ; AVX256VL-NEXT: vptestmd %ymm1, %ymm1, %k0 ; AVX256VL-NEXT: kunpckbw %k1, %k0, %k0 @@ -160,11 +160,11 @@ define <32 x i1> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0 ; AVX256VL-NEXT: vpermq {{.*#+}} ymm2 = ymm2[1,1,2,1] ; AVX256VL-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255] ; AVX256VL-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1 -; AVX256VL-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; AVX256VL-NEXT: vpmovsxwd %xmm1, %ymm2 ; AVX256VL-NEXT: vpslld $31, %ymm2, %ymm2 ; AVX256VL-NEXT: vptestmd %ymm2, %ymm2, %k1 ; AVX256VL-NEXT: vextracti128 $1, %ymm1, %xmm1 -; AVX256VL-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; AVX256VL-NEXT: vpmovsxwd %xmm1, %ymm1 ; AVX256VL-NEXT: vpslld $31, %ymm1, %ymm1 ; AVX256VL-NEXT: vptestmd %ymm1, %ymm1, %k0 ; AVX256VL-NEXT: kunpckbw %k1, %k0, %k0 diff --git a/test/CodeGen/X86/setcc-lowering.ll b/test/CodeGen/X86/setcc-lowering.ll index 1cf20bf62e4..aade54b0b02 100644 --- a/test/CodeGen/X86/setcc-lowering.ll +++ b/test/CodeGen/X86/setcc-lowering.ll @@ -65,7 +65,7 @@ define void @pr26232(i64 %a, <16 x i1> %b) { ; KNL-32-NEXT: pushl %esi ; KNL-32-NEXT: .cfi_def_cfa_offset 8 ; KNL-32-NEXT: .cfi_offset %esi, -8 -; KNL-32-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; KNL-32-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-32-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-32-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-32-NEXT: movl {{[0-9]+}}(%esp), %eax diff --git a/test/CodeGen/X86/vector-reduce-and-bool.ll b/test/CodeGen/X86/vector-reduce-and-bool.ll index b7ebe04cefb..b8d57f6954d 100644 --- a/test/CodeGen/X86/vector-reduce-and-bool.ll +++ b/test/CodeGen/X86/vector-reduce-and-bool.ll @@ -138,7 +138,7 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) { ; ; AVX512F-LABEL: trunc_v8i16_v8i1: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 ; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax @@ -397,7 +397,7 @@ define i1 @trunc_v16i16_v16i1(<16 x i16>) { ; ; AVX512F-LABEL: trunc_v16i16_v16i1: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kortestw %k0, %k0 @@ -461,7 +461,7 @@ define i1 @trunc_v32i8_v32i1(<32 x i8>) { ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512F-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1 @@ -752,7 +752,7 @@ define i1 @trunc_v32i16_v32i1(<32 x i16>) { ; AVX512F-LABEL: trunc_v32i16_v32i1: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1 @@ -872,7 +872,7 @@ define i1 @trunc_v64i8_v64i1(<64 x i8>) { ; AVX512F-NEXT: vpand %xmm2, %xmm3, %xmm2 ; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX512F-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1 diff --git a/test/CodeGen/X86/vector-reduce-or-bool.ll b/test/CodeGen/X86/vector-reduce-or-bool.ll index d41403e6794..de5f52ffc92 100644 --- a/test/CodeGen/X86/vector-reduce-or-bool.ll +++ b/test/CodeGen/X86/vector-reduce-or-bool.ll @@ -132,7 +132,7 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) { ; ; AVX512F-LABEL: trunc_v8i16_v8i1: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 ; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax @@ -388,7 +388,7 @@ define i1 @trunc_v16i16_v16i1(<16 x i16>) { ; ; AVX512F-LABEL: trunc_v16i16_v16i1: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kortestw %k0, %k0 @@ -452,7 +452,7 @@ define i1 @trunc_v32i8_v32i1(<32 x i8>) { ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1 @@ -743,7 +743,7 @@ define i1 @trunc_v32i16_v32i1(<32 x i16>) { ; AVX512F-LABEL: trunc_v32i16_v32i1: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1 @@ -863,7 +863,7 @@ define i1 @trunc_v64i8_v64i1(<64 x i8>) { ; AVX512F-NEXT: vpor %xmm2, %xmm3, %xmm2 ; AVX512F-NEXT: vpor %xmm2, %xmm1, %xmm1 ; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1 diff --git a/test/CodeGen/X86/vector-reduce-xor-bool.ll b/test/CodeGen/X86/vector-reduce-xor-bool.ll index 29d032b4b44..eb0ba63fc12 100644 --- a/test/CodeGen/X86/vector-reduce-xor-bool.ll +++ b/test/CodeGen/X86/vector-reduce-xor-bool.ll @@ -138,7 +138,7 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) { ; ; AVX512F-LABEL: trunc_v8i16_v8i1: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 ; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax @@ -411,7 +411,7 @@ define i1 @trunc_v16i16_v16i1(<16 x i16>) { ; ; AVX512F-LABEL: trunc_v16i16_v16i1: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax @@ -493,7 +493,7 @@ define i1 @trunc_v32i8_v32i1(<32 x i8>) { ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512F-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1 @@ -840,7 +840,7 @@ define i1 @trunc_v32i16_v32i1(<32 x i16>) { ; AVX512F-LABEL: trunc_v32i16_v32i1: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1 @@ -972,7 +972,7 @@ define i1 @trunc_v64i8_v64i1(<64 x i8>) { ; AVX512F-NEXT: vpxor %xmm2, %xmm3, %xmm2 ; AVX512F-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; AVX512F-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1 diff --git a/test/CodeGen/X86/vector-shuffle-512-v16.ll b/test/CodeGen/X86/vector-shuffle-512-v16.ll index 62c06a8cd3b..eb096ff1de7 100644 --- a/test/CodeGen/X86/vector-shuffle-512-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-512-v16.ll @@ -593,7 +593,7 @@ define <16 x i32> @test_vshufi32x4_512(<16 x i32> %x, <16 x i32> %x1) nounwind { define <16 x float> @test_vshuff32x4_512_mask(<16 x float> %x, <16 x float> %x1, <16 x float> %y, <16 x i1> %mask) nounwind { ; AVX512F-LABEL: test_vshuff32x4_512_mask: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero,xmm3[8],zero,zero,zero,xmm3[9],zero,zero,zero,xmm3[10],zero,zero,zero,xmm3[11],zero,zero,zero,xmm3[12],zero,zero,zero,xmm3[13],zero,zero,zero,xmm3[14],zero,zero,zero,xmm3[15],zero,zero,zero +; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3 ; AVX512F-NEXT: vpslld $31, %zmm3, %zmm3 ; AVX512F-NEXT: vpmovd2m %zmm3, %k1 ; AVX512F-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] @@ -615,7 +615,7 @@ define <16 x float> @test_vshuff32x4_512_mask(<16 x float> %x, <16 x float> %x1, define <16 x i32> @test_vshufi32x4_512_mask(<16 x i32> %x, <16 x i32> %x1, <16 x i32> %y, <16 x i1> %mask) nounwind { ; AVX512F-LABEL: test_vshufi32x4_512_mask: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero,xmm3[8],zero,zero,zero,xmm3[9],zero,zero,zero,xmm3[10],zero,zero,zero,xmm3[11],zero,zero,zero,xmm3[12],zero,zero,zero,xmm3[13],zero,zero,zero,xmm3[14],zero,zero,zero,xmm3[15],zero,zero,zero +; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3 ; AVX512F-NEXT: vpslld $31, %zmm3, %zmm3 ; AVX512F-NEXT: vpmovd2m %zmm3, %k1 ; AVX512F-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] diff --git a/test/CodeGen/X86/vector-shuffle-512-v8.ll b/test/CodeGen/X86/vector-shuffle-512-v8.ll index ff3a3106337..52b04fdc8dd 100644 --- a/test/CodeGen/X86/vector-shuffle-512-v8.ll +++ b/test/CodeGen/X86/vector-shuffle-512-v8.ll @@ -1849,7 +1849,7 @@ define <8 x double> @test_vshuff64x2_512(<8 x double> %x, <8 x double> %x1) noun define <8 x double> @test_vshuff64x2_512_maskz(<8 x double> %x, <8 x double> %x1, <8 x i1> %mask) nounwind { ; ALL-LABEL: test_vshuff64x2_512_maskz: ; ALL: # %bb.0: -; ALL-NEXT: vpmovzxwq {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero +; ALL-NEXT: vpmovsxwq %xmm2, %zmm2 ; ALL-NEXT: vpsllq $63, %zmm2, %zmm2 ; ALL-NEXT: vptestmq %zmm2, %zmm2, %k1 ; ALL-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[2,3,0,1] @@ -1862,7 +1862,7 @@ define <8 x double> @test_vshuff64x2_512_maskz(<8 x double> %x, <8 x double> %x1 define <8 x i64> @test_vshufi64x2_512_mask(<8 x i64> %x, <8 x i64> %x1, <8 x i1> %mask) nounwind { ; ALL-LABEL: test_vshufi64x2_512_mask: ; ALL: # %bb.0: -; ALL-NEXT: vpmovzxwq {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero +; ALL-NEXT: vpmovsxwq %xmm2, %zmm2 ; ALL-NEXT: vpsllq $63, %zmm2, %zmm2 ; ALL-NEXT: vptestmq %zmm2, %zmm2, %k1 ; ALL-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} = zmm0[0,1,4,5],zmm1[2,3,0,1] @@ -1891,7 +1891,7 @@ define <8 x double> @test_vshuff64x2_512_mem(<8 x double> %x, <8 x double> *%ptr define <8 x double> @test_vshuff64x2_512_mem_mask(<8 x double> %x, <8 x double> *%ptr, <8 x i1> %mask) nounwind { ; AVX512F-LABEL: test_vshuff64x2_512_mem_mask: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero +; AVX512F-NEXT: vpmovsxwq %xmm1, %zmm1 ; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm1 ; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1 ; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} = zmm0[0,1,4,5],mem[2,3,0,1] @@ -1899,7 +1899,7 @@ define <8 x double> @test_vshuff64x2_512_mem_mask(<8 x double> %x, <8 x double> ; ; AVX512F-32-LABEL: test_vshuff64x2_512_mem_mask: ; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero +; AVX512F-32-NEXT: vpmovsxwq %xmm1, %zmm1 ; AVX512F-32-NEXT: vpsllq $63, %zmm1, %zmm1 ; AVX512F-32-NEXT: vptestmq %zmm1, %zmm1, %k1 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -1914,7 +1914,7 @@ define <8 x double> @test_vshuff64x2_512_mem_mask(<8 x double> %x, <8 x double> define <8 x double> @test_vshuff64x2_512_mem_maskz(<8 x double> %x, <8 x double> *%ptr, <8 x i1> %mask) nounwind { ; AVX512F-LABEL: test_vshuff64x2_512_mem_maskz: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero +; AVX512F-NEXT: vpmovsxwq %xmm1, %zmm1 ; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm1 ; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1 ; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],mem[2,3,0,1] @@ -1922,7 +1922,7 @@ define <8 x double> @test_vshuff64x2_512_mem_maskz(<8 x double> %x, <8 x double> ; ; AVX512F-32-LABEL: test_vshuff64x2_512_mem_maskz: ; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero +; AVX512F-32-NEXT: vpmovsxwq %xmm1, %zmm1 ; AVX512F-32-NEXT: vpsllq $63, %zmm1, %zmm1 ; AVX512F-32-NEXT: vptestmq %zmm1, %zmm1, %k1 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax diff --git a/test/CodeGen/X86/vector-shuffle-v1.ll b/test/CodeGen/X86/vector-shuffle-v1.ll index 3897e55a1ad..d926e5427dd 100644 --- a/test/CodeGen/X86/vector-shuffle-v1.ll +++ b/test/CodeGen/X86/vector-shuffle-v1.ll @@ -213,11 +213,11 @@ define <16 x i1> @shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<16 x i32> %a, <1 define <32 x i1> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<32 x i1> %a) { ; AVX512F-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm1 ; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k2 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} @@ -232,11 +232,11 @@ define <32 x i1> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0 ; ; AVX512VL-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512VL-NEXT: vpmovsxbd %xmm0, %zmm1 ; AVX512VL-NEXT: vpslld $31, %zmm1, %zmm1 ; AVX512VL-NEXT: vptestmd %zmm1, %zmm1, %k1 ; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512VL-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512VL-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512VL-NEXT: vptestmd %zmm0, %zmm0, %k2 ; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} @@ -749,7 +749,7 @@ define i8 @shuf8i1__9_6_1_10_3_7_7_1(i8 %a) { define i8 @shuf8i1_9_6_1_10_3_7_7_0_all_ones(<8 x i1> %a) { ; AVX512F-LABEL: shuf8i1_9_6_1_10_3_7_7_0_all_ones: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 ; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 ; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} @@ -764,7 +764,7 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0_all_ones(<8 x i1> %a) { ; ; AVX512VL-LABEL: shuf8i1_9_6_1_10_3_7_7_0_all_ones: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512VL-NEXT: vpmovsxwd %xmm0, %ymm0 ; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0 ; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k1 ; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0