From 3589550b3ef0f25f3383a63dd7071861c401de4c Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 29 Sep 2014 01:32:54 +0000 Subject: [PATCH] [x86] Refactor all of the VSELECT-as-blend lowering code to avoid domain crossing and generally work more like the blend emission code in the new vector shuffle lowering. My goal is to have the new vector shuffle lowering just produce VSELECT nodes that are either matched here to BLENDI or are legal and matched in the .td files to specific blend instructions. That seems much cleaner as there are other ways to produce a VSELECT anyways. =] No *observable* functionality changed yet, mostly because this code appears to be near-dead. The behavior of this lowering routine did change though. This code being mostly dead and untestable will change with my next commit which will also point some new tests at it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218588 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 75 +++++++++++++++++++++++++++++--------- 1 file changed, 57 insertions(+), 18 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ed542560742..552d420b805 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -11841,41 +11841,80 @@ static SDValue lowerVSELECTtoBLENDI(SDValue Op, const X86Subtarget *Subtarget, SDValue Cond = Op.getOperand(0); SDValue LHS = Op.getOperand(1); SDValue RHS = Op.getOperand(2); - SDLoc dl(Op); + SDLoc DL(Op); MVT VT = Op.getSimpleValueType(); MVT EltVT = VT.getVectorElementType(); - unsigned NumElems = VT.getVectorNumElements(); // There is no blend with immediate in AVX-512. if (VT.is512BitVector()) return SDValue(); - if (!Subtarget->hasSSE41() || EltVT == MVT::i8) + // No blend instruction before SSE4.1. + if (!Subtarget->hasSSE41()) return SDValue(); - if (!Subtarget->hasInt256() && VT == MVT::v16i16) + // There is no byte-blend immediate controlled instruction. + if (EltVT == MVT::i8) return SDValue(); if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) return SDValue(); - // Check the mask for BLEND and build the value. - unsigned MaskValue = 0; - if (!BUILD_VECTORtoBlendMask(cast(Cond), MaskValue)) - return SDValue(); + auto *CondBV = cast(Cond); - // Convert i32 vectors to floating point if it is not AVX2. - // AVX2 introduced VPBLENDD instruction for 128 and 256-bit vectors. + unsigned BlendMask = 0; MVT BlendVT = VT; - if (EltVT == MVT::i64 || (EltVT == MVT::i32 && !Subtarget->hasInt256())) { - BlendVT = MVT::getVectorVT(MVT::getFloatingPointVT(EltVT.getSizeInBits()), - NumElems); - LHS = DAG.getNode(ISD::BITCAST, dl, VT, LHS); - RHS = DAG.getNode(ISD::BITCAST, dl, VT, RHS); + if (VT == MVT::v16i16) { + // v16i16 blends are completely special. We can only do them when we have + // a repeated blend across the two 128-bit halves and we have AVX2. + if (!Subtarget->hasAVX2()) + return SDValue(); + + for (int i = 0; i < 8; ++i) { + SDValue Lo = CondBV->getOperand(i); + SDValue Hi = CondBV->getOperand(i + 8); + bool IsLoZero = X86::isZeroNode(Lo); + bool IsHiZero = X86::isZeroNode(Hi); + if (Lo->getOpcode() != ISD::UNDEF && Hi->getOpcode() != ISD::UNDEF && + IsLoZero != IsHiZero) + // Asymmetric blends, bail. + return SDValue(); + BlendMask |= (unsigned)(IsLoZero || IsHiZero) << i; + } + } else { + // Everything else uses a generic blend mask computation with a custom type. + if (VT.isInteger()) { + if (VT.is256BitVector()) { + // The 256-bit integer blend instructions are only available on AVX2. + if (!Subtarget->hasAVX2()) + return SDValue(); + + // We do the blend on v8i32 for 256-bit integer types. + BlendVT = MVT::v8i32; + } else { + // For 128-bit vectors we do the blend on v8i16 types. + BlendVT = MVT::v8i16; + } + } + assert(BlendVT.getVectorNumElements() <= 8 && + "Cannot blend more than 8 elements with an immediate!"); + // Scale the blend mask based on the number of elements in the selected + // blend type. + int Scale = BlendVT.getVectorNumElements() / VT.getVectorNumElements(); + for (int i = 0, e = CondBV->getNumOperands(); i < e; ++i) { + SDValue CondElement = CondBV->getOperand(i); + if (CondElement->getOpcode() != ISD::UNDEF && + X86::isZeroNode(CondElement)) + for (int j = 0; j < Scale; ++j) + BlendMask |= 1u << (i * Scale + j); + } } - SDValue Ret = DAG.getNode(X86ISD::BLENDI, dl, BlendVT, LHS, RHS, - DAG.getConstant(MaskValue, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Ret); + LHS = DAG.getNode(ISD::BITCAST, DL, BlendVT, LHS); + RHS = DAG.getNode(ISD::BITCAST, DL, BlendVT, RHS); + + return DAG.getNode(ISD::BITCAST, DL, VT, + DAG.getNode(X86ISD::BLENDI, DL, BlendVT, LHS, RHS, + DAG.getConstant(BlendMask, MVT::i8))); } SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { -- 2.11.0