From 9571ea66208c00e179fca478ddff78e49f122668 Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Wed, 18 Feb 2015 09:43:40 +0000 Subject: [PATCH] Fixes two issue in SimplifyDemandedBits of sext_in_reg: 1) We should not try to simplify if the sext has multiple uses 2) There is no need to simplify is the source value is already sign-extended. Patch by Gil Rapaport Differential Revision: http://reviews.llvm.org/D6949 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@229659 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 29 ++++++++++++++++++----------- test/CodeGen/X86/vector-blend.ll | 4 ++-- test/CodeGen/X86/vselect-avx.ll | 20 ++++++++------------ 3 files changed, 28 insertions(+), 25 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index a1f425eeac1..f2c5a63354e 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -793,19 +793,26 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1); // If we only care about the highest bit, don't bother shifting right. - if (MsbMask == DemandedMask) { + if (MsbMask == NewMask) { unsigned ShAmt = ExVT.getScalarType().getSizeInBits(); SDValue InOp = Op.getOperand(0); - - // Compute the correct shift amount type, which must be getShiftAmountTy - // for scalar types after legalization. - EVT ShiftAmtTy = Op.getValueType(); - if (TLO.LegalTypes() && !ShiftAmtTy.isVector()) - ShiftAmtTy = getShiftAmountTy(ShiftAmtTy); - - SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, ShiftAmtTy); - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, - Op.getValueType(), InOp, ShiftAmt)); + unsigned VTBits = Op->getValueType(0).getScalarType().getSizeInBits(); + bool AlreadySignExtended = + TLO.DAG.ComputeNumSignBits(InOp) >= VTBits-ShAmt+1; + // However if the input is already sign extended we expect the sign + // extension to be dropped altogether later and do not simplify. + if (!AlreadySignExtended) { + // Compute the correct shift amount type, which must be getShiftAmountTy + // for scalar types after legalization. + EVT ShiftAmtTy = Op.getValueType(); + if (TLO.LegalTypes() && !ShiftAmtTy.isVector()) + ShiftAmtTy = getShiftAmountTy(ShiftAmtTy); + + SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, ShiftAmtTy); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, + Op.getValueType(), InOp, + ShiftAmt)); + } } // Sign extension. Compute the demanded bits in the result that are not diff --git a/test/CodeGen/X86/vector-blend.ll b/test/CodeGen/X86/vector-blend.ll index 0a7114b5510..18203de7294 100644 --- a/test/CodeGen/X86/vector-blend.ll +++ b/test/CodeGen/X86/vector-blend.ll @@ -419,8 +419,8 @@ define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) { ; ; SSE41-LABEL: vsel_i648: ; SSE41: # BB#0: # %entry -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm4[4,5,6,7] -; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7] +; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm4[1] +; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm6[1] ; SSE41-NEXT: movaps %xmm5, %xmm1 ; SSE41-NEXT: movaps %xmm7, %xmm3 ; SSE41-NEXT: retq diff --git a/test/CodeGen/X86/vselect-avx.ll b/test/CodeGen/X86/vselect-avx.ll index 0c0f4bbf992..26b00dbd85f 100644 --- a/test/CodeGen/X86/vselect-avx.ll +++ b/test/CodeGen/X86/vselect-avx.ll @@ -59,19 +59,15 @@ bb: ; ; -; Note: For now, hard code ORIG_MASK and SHRUNK_MASK registers, because we -; cannot express that ORIG_MASK must not be equal to ORIG_MASK. Otherwise, -; even a faulty pattern would pass! -; ; CHECK-LABEL: test3: -; Compute the original mask. -; CHECK: vpcmpeqd {{%xmm[0-9]+}}, {{%xmm[0-9]+}}, [[ORIG_MASK:%xmm0]] -; Shrink the bit of the mask. -; CHECK-NEXT: vpslld $31, [[ORIG_MASK]], [[SHRUNK_MASK:%xmm3]] -; Use the shrunk mask in the blend. -; CHECK-NEXT: vblendvps [[SHRUNK_MASK]], %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}} -; Use the original mask in the and. -; CHECK-NEXT: vpand LCPI2_2(%rip), [[ORIG_MASK]], {{%xmm[0-9]+}} +; Compute the mask. +; CHECK: vpcmpeqd {{%xmm[0-9]+}}, {{%xmm[0-9]+}}, [[MASK:%xmm[0-9]+]] +; Do not shrink the bit of the mask. +; CHECK-NOT: vpslld $31, [[MASK]], {{%xmm[0-9]+}} +; Use the mask in the blend. +; CHECK-NEXT: vblendvps [[MASK]], %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}} +; Use the mask in the and. +; CHECK-NEXT: vpand LCPI2_2(%rip), [[MASK]], {{%xmm[0-9]+}} ; CHECK: retq define void @test3(<4 x i32> %induction30, <4 x i16>* %tmp16, <4 x i16>* %tmp17, <4 x i16> %tmp3, <4 x i16> %tmp12) { %tmp6 = srem <4 x i32> %induction30, -- 2.11.0