From 1a36c64204cdf0a4d48ca3fe4563ceaf55130f31 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 28 Mar 2017 15:50:23 +0000 Subject: [PATCH] [X86][SSE] Refactored shuffle BLEND combining to make future 16i16 support easier. NFCI. Call the matchVectorShuffleAsBlend test as early as possible. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298925 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 67 +++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 34 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 753b30aee9e..cb38a85b83b 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -27140,44 +27140,43 @@ static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef Mask, } // Attempt to combine to X86ISD::BLENDI. + // TODO - add 16i16 support (requires lane duplication). if (NumMaskElts <= 8 && ((Subtarget.hasSSE41() && MaskVT.is128BitVector()) || (Subtarget.hasAVX() && MaskVT.is256BitVector()))) { - // Determine a type compatible with X86ISD::BLENDI. - // TODO - add 16i16 support (requires lane duplication). - MVT BlendVT = MaskVT; - if (Subtarget.hasAVX2()) { - if (BlendVT == MVT::v4i64) - BlendVT = MVT::v8i32; - else if (BlendVT == MVT::v2i64) - BlendVT = MVT::v4i32; - } else { - if (BlendVT == MVT::v2i64 || BlendVT == MVT::v4i32) - BlendVT = MVT::v8i16; - else if (BlendVT == MVT::v4i64) - BlendVT = MVT::v4f64; - else if (BlendVT == MVT::v8i32) - BlendVT = MVT::v8f32; - } - - if (NumMaskElts <= BlendVT.getVectorNumElements()) { - uint64_t BlendMask = 0; - bool ForceV1Zero = false, ForceV2Zero = false; - SmallVector TargetMask(Mask.begin(), Mask.end()); - if (matchVectorShuffleAsBlend(V1, V2, TargetMask, ForceV1Zero, - ForceV2Zero, BlendMask)) { - if (NumMaskElts < BlendVT.getVectorNumElements()) { - int Scale = BlendVT.getVectorNumElements() / NumMaskElts; - BlendMask = - scaleVectorShuffleBlendMask(BlendMask, NumMaskElts, Scale); - } + uint64_t BlendMask = 0; + bool ForceV1Zero = false, ForceV2Zero = false; + SmallVector TargetMask(Mask.begin(), Mask.end()); + if (matchVectorShuffleAsBlend(V1, V2, TargetMask, ForceV1Zero, ForceV2Zero, + BlendMask)) { + // Determine a type compatible with X86ISD::BLENDI. + ShuffleVT = MaskVT; + if (Subtarget.hasAVX2()) { + if (ShuffleVT == MVT::v4i64) + ShuffleVT = MVT::v8i32; + else if (ShuffleVT == MVT::v2i64) + ShuffleVT = MVT::v4i32; + } else { + if (ShuffleVT == MVT::v2i64 || ShuffleVT == MVT::v4i32) + ShuffleVT = MVT::v8i16; + else if (ShuffleVT == MVT::v4i64) + ShuffleVT = MVT::v4f64; + else if (ShuffleVT == MVT::v8i32) + ShuffleVT = MVT::v8f32; + } - V1 = ForceV1Zero ? getZeroVector(BlendVT, Subtarget, DAG, DL) : V1; - V2 = ForceV2Zero ? getZeroVector(BlendVT, Subtarget, DAG, DL) : V2; - PermuteImm = (unsigned)BlendMask; - Shuffle = X86ISD::BLENDI; - ShuffleVT = BlendVT; - return true; + V1 = ForceV1Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V1; + V2 = ForceV2Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V2; + + if (!ShuffleVT.isFloatingPoint()) { + int Scale = EltSizeInBits / ShuffleVT.getScalarSizeInBits(); + BlendMask = scaleVectorShuffleBlendMask(BlendMask, NumMaskElts, Scale); + ShuffleVT = MVT::getIntegerVT(EltSizeInBits / Scale); + ShuffleVT = MVT::getVectorVT(ShuffleVT, NumMaskElts * Scale); } + + PermuteImm = (unsigned)BlendMask; + Shuffle = X86ISD::BLENDI; + return true; } } -- 2.11.0