From 477d32c48ae3fcf618e72e286804c1b2b5bbecbe Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 23 Oct 2018 11:33:38 +0000 Subject: [PATCH] [X86][SSE] Update raw mask shuffle decoders to handle UNDEF mask elts Matches the approach taken in the constant pool shuffle decoders, and uses an UndefElts mask instead of uint64_t(-1) raw mask values, which doesn't work safely for i32/i64 shuffle mask sizes (as the -1 value is legal). This allows us to remove the constant pool shuffle decoders from most of the getTargetShuffleMask variable shuffle cases (X86ISD::VPERMV3 will be handled in a future commit). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@345018 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/Utils/X86ShuffleDecode.cpp | 45 ++++++++++++++++------- lib/Target/X86/Utils/X86ShuffleDecode.h | 13 ++++--- lib/Target/X86/X86ISelLowering.cpp | 61 ++++++++++--------------------- 3 files changed, 59 insertions(+), 60 deletions(-) diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index fe567f4cece..bed940d0d0e 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -304,12 +304,12 @@ void DecodeVPERM2X128Mask(unsigned NumElts, unsigned Imm, } } -void DecodePSHUFBMask(ArrayRef RawMask, +void DecodePSHUFBMask(ArrayRef RawMask, const APInt &UndefElts, SmallVectorImpl &ShuffleMask) { for (int i = 0, e = RawMask.size(); i < e; ++i) { uint64_t M = RawMask[i]; - if (M == (uint64_t)SM_SentinelUndef) { - ShuffleMask.push_back(M); + if (UndefElts[i]) { + ShuffleMask.push_back(SM_SentinelUndef); continue; } // For 256/512-bit vectors the base of the shuffle is the 128-bit @@ -336,7 +336,7 @@ void DecodeBLENDMask(unsigned NumElts, unsigned Imm, } } -void DecodeVPPERMMask(ArrayRef RawMask, +void DecodeVPPERMMask(ArrayRef RawMask, const APInt &UndefElts, SmallVectorImpl &ShuffleMask) { assert(RawMask.size() == 16 && "Illegal VPPERM shuffle mask size"); @@ -354,12 +354,12 @@ void DecodeVPPERMMask(ArrayRef RawMask, // 6 - Most significant bit of source byte replicated in all bit positions. // 7 - Invert most significant bit of source byte and replicate in all bit positions. for (int i = 0, e = RawMask.size(); i < e; ++i) { - uint64_t M = RawMask[i]; - if (M == (uint64_t)SM_SentinelUndef) { - ShuffleMask.push_back(M); + if (UndefElts[i]) { + ShuffleMask.push_back(SM_SentinelUndef); continue; } + uint64_t M = RawMask[i]; uint64_t PermuteOp = (M >> 5) & 0x7; if (PermuteOp == 4) { ShuffleMask.push_back(SM_SentinelZero); @@ -490,7 +490,7 @@ void DecodeINSERTQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx, } void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits, - ArrayRef RawMask, + ArrayRef RawMask, const APInt &UndefElts, SmallVectorImpl &ShuffleMask) { unsigned VecSize = NumElts * ScalarBits; unsigned NumLanes = VecSize / 128; @@ -500,6 +500,10 @@ void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits, assert((ScalarBits == 32 || ScalarBits == 64) && "Unexpected element size"); for (unsigned i = 0, e = RawMask.size(); i < e; ++i) { + if (UndefElts[i]) { + ShuffleMask.push_back(SM_SentinelUndef); + continue; + } uint64_t M = RawMask[i]; M = (ScalarBits == 64 ? ((M >> 1) & 0x1) : (M & 0x3)); unsigned LaneOffset = i & ~(NumEltsPerLane - 1); @@ -508,7 +512,7 @@ void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits, } void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z, - ArrayRef RawMask, + ArrayRef RawMask, const APInt &UndefElts, SmallVectorImpl &ShuffleMask) { unsigned VecSize = NumElts * ScalarBits; unsigned NumLanes = VecSize / 128; @@ -518,6 +522,11 @@ void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z, assert((NumElts == RawMask.size()) && "Unexpected mask size"); for (unsigned i = 0, e = RawMask.size(); i < e; ++i) { + if (UndefElts[i]) { + ShuffleMask.push_back(SM_SentinelUndef); + continue; + } + // VPERMIL2 Operation. // Bits[3] - Match Bit. // Bits[2:1] - (Per Lane) PD Shuffle Mask. @@ -548,19 +557,29 @@ void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z, } } -void DecodeVPERMVMask(ArrayRef RawMask, +void DecodeVPERMVMask(ArrayRef RawMask, const APInt &UndefElts, SmallVectorImpl &ShuffleMask) { uint64_t EltMaskSize = RawMask.size() - 1; - for (auto M : RawMask) { + for (int i = 0, e = RawMask.size(); i != e; ++i) { + if (UndefElts[i]) { + ShuffleMask.push_back(SM_SentinelUndef); + continue; + } + uint64_t M = RawMask[i]; M &= EltMaskSize; ShuffleMask.push_back((int)M); } } -void DecodeVPERMV3Mask(ArrayRef RawMask, +void DecodeVPERMV3Mask(ArrayRef RawMask, const APInt &UndefElts, SmallVectorImpl &ShuffleMask) { uint64_t EltMaskSize = (RawMask.size() * 2) - 1; - for (auto M : RawMask) { + for (int i = 0, e = RawMask.size(); i != e; ++i) { + if (UndefElts[i]) { + ShuffleMask.push_back(SM_SentinelUndef); + continue; + } + uint64_t M = RawMask[i]; M &= EltMaskSize; ShuffleMask.push_back((int)M); } diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h index 6d13bd58a12..85cde14a324 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -15,6 +15,7 @@ #ifndef LLVM_LIB_TARGET_X86_UTILS_X86SHUFFLEDECODE_H #define LLVM_LIB_TARGET_X86_UTILS_X86SHUFFLEDECODE_H +#include "llvm/ADT/APInt.h" #include "llvm/ADT/SmallVector.h" //===----------------------------------------------------------------------===// @@ -108,7 +109,7 @@ void DecodeSubVectorBroadcast(unsigned DstNumElts, unsigned SrcNumElts, /// Decode a PSHUFB mask from a raw array of constants such as from /// BUILD_VECTOR. -void DecodePSHUFBMask(ArrayRef RawMask, +void DecodePSHUFBMask(ArrayRef RawMask, const APInt &UndefElts, SmallVectorImpl &ShuffleMask); /// Decode a BLEND immediate mask into a shuffle mask. @@ -131,7 +132,7 @@ void DecodeVPERMMask(unsigned NumElts, unsigned Imm, /// BUILD_VECTOR. /// This can only basic masks (permutes + zeros), not any of the other /// operations that VPPERM can perform. -void DecodeVPPERMMask(ArrayRef RawMask, +void DecodeVPPERMMask(ArrayRef RawMask, const APInt &UndefElts, SmallVectorImpl &ShuffleMask); /// Decode a zero extension instruction as a shuffle mask. @@ -156,20 +157,20 @@ void DecodeINSERTQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx, /// Decode a VPERMILPD/VPERMILPS variable mask from a raw array of constants. void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits, - ArrayRef RawMask, + ArrayRef RawMask, const APInt &UndefElts, SmallVectorImpl &ShuffleMask); /// Decode a VPERMIL2PD/VPERMIL2PS variable mask from a raw array of constants. void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z, - ArrayRef RawMask, + ArrayRef RawMask, const APInt &UndefElts, SmallVectorImpl &ShuffleMask); /// Decode a VPERM W/D/Q/PS/PD mask from a raw array of constants. -void DecodeVPERMVMask(ArrayRef RawMask, +void DecodeVPERMVMask(ArrayRef RawMask, const APInt &UndefElts, SmallVectorImpl &ShuffleMask); /// Decode a VPERMT2 W/D/Q/PS/PD mask from a raw array of constants. -void DecodeVPERMV3Mask(ArrayRef RawMask, +void DecodeVPERMV3Mask(ArrayRef RawMask, const APInt &UndefElts, SmallVectorImpl &ShuffleMask); } // llvm namespace diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 8ba6c9ee018..cc867070398 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5825,14 +5825,12 @@ static bool isConstantSplat(SDValue Op, APInt &SplatVal) { static bool getTargetShuffleMaskIndices(SDValue MaskNode, unsigned MaskEltSizeInBits, - SmallVectorImpl &RawMask) { - APInt UndefElts; - SmallVector EltBits; - + SmallVectorImpl &RawMask, + APInt &UndefElts) { // Extract the raw target constant bits. - // FIXME: We currently don't support UNDEF bits or mask entries. + SmallVector EltBits; if (!getTargetConstantBitsFromNode(MaskNode, MaskEltSizeInBits, UndefElts, - EltBits, /* AllowWholeUndefs */ false, + EltBits, /* AllowWholeUndefs */ true, /* AllowPartialUndefs */ false)) return false; @@ -5873,6 +5871,8 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, SmallVectorImpl &Mask, bool &IsUnary) { unsigned NumElems = VT.getVectorNumElements(); unsigned MaskEltSize = VT.getScalarSizeInBits(); + SmallVector RawMask; + APInt RawUndefs; SDValue ImmN; assert(Mask.empty() && "getTargetShuffleMask expects an empty Mask vector"); @@ -6025,13 +6025,9 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); IsUnary = true; SDValue MaskNode = N->getOperand(1); - SmallVector RawMask; - if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) { - DecodeVPERMILPMask(NumElems, MaskEltSize, RawMask, Mask); - break; - } - if (auto *C = getTargetConstantFromNode(MaskNode)) { - DecodeVPERMILPMask(C, MaskEltSize, VT.getSizeInBits(), Mask); + if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask, + RawUndefs)) { + DecodeVPERMILPMask(NumElems, MaskEltSize, RawMask, RawUndefs, Mask); break; } return false; @@ -6042,13 +6038,8 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); IsUnary = true; SDValue MaskNode = N->getOperand(1); - SmallVector RawMask; - if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask)) { - DecodePSHUFBMask(RawMask, Mask); - break; - } - if (auto *C = getTargetConstantFromNode(MaskNode)) { - DecodePSHUFBMask(C, VT.getSizeInBits(), Mask); + if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask, RawUndefs)) { + DecodePSHUFBMask(RawMask, RawUndefs, Mask); break; } return false; @@ -6104,13 +6095,10 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, SDValue CtrlNode = N->getOperand(3); if (ConstantSDNode *CtrlOp = dyn_cast(CtrlNode)) { unsigned CtrlImm = CtrlOp->getZExtValue(); - SmallVector RawMask; - if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) { - DecodeVPERMIL2PMask(NumElems, MaskEltSize, CtrlImm, RawMask, Mask); - break; - } - if (auto *C = getTargetConstantFromNode(MaskNode)) { - DecodeVPERMIL2PMask(C, CtrlImm, MaskEltSize, VT.getSizeInBits(), Mask); + if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask, + RawUndefs)) { + DecodeVPERMIL2PMask(NumElems, MaskEltSize, CtrlImm, RawMask, RawUndefs, + Mask); break; } } @@ -6121,13 +6109,8 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); SDValue MaskNode = N->getOperand(2); - SmallVector RawMask; - if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask)) { - DecodeVPPERMMask(RawMask, Mask); - break; - } - if (auto *C = getTargetConstantFromNode(MaskNode)) { - DecodeVPPERMMask(C, VT.getSizeInBits(), Mask); + if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask, RawUndefs)) { + DecodeVPPERMMask(RawMask, RawUndefs, Mask); break; } return false; @@ -6138,13 +6121,9 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, // Unlike most shuffle nodes, VPERMV's mask operand is operand 0. Ops.push_back(N->getOperand(1)); SDValue MaskNode = N->getOperand(0); - SmallVector RawMask; - if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) { - DecodeVPERMVMask(RawMask, Mask); - break; - } - if (auto *C = getTargetConstantFromNode(MaskNode)) { - DecodeVPERMVMask(C, MaskEltSize, VT.getSizeInBits(), Mask); + if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask, + RawUndefs)) { + DecodeVPERMVMask(RawMask, RawUndefs, Mask); break; } return false; -- 2.11.0