From 65fafbb1090201a727513fe9af50e443cfa900da Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Mon, 31 Mar 2014 11:43:19 +0000 Subject: [PATCH] Look at shuffles of build_vectors in DAGCombiner::visitEXTRACT_VECTOR_ELT When the loop vectorizer vectorizes code that uses the loop induction variable, we often end up with IR like this: %b1 = insertelement <2 x i32> undef, i32 %v, i32 0 %b2 = shufflevector <2 x i32> %b1, <2 x i32> undef, <2 x i32> zeroinitializer %i = add <2 x i32> %b2, If the add in this example is not legal (as is the case on PPC with VSX), it will be scalarized, and we'll end up with a number of extract_vector_elt nodes with the vector shuffle as the input operand, and that vector shuffle is fed by one or more build_vector nodes. By the time that vector operations are expanded, visitEXTRACT_VECTOR_ELT will not create new extract_vector_elt by looking through the vector shuffle (to make sure that no illegal operations are created), and so the extract_vector_elt -> vector shuffle -> build_vector is never simplified to an operand of the build vector. By looking at build_vectors through a shuffle we fix this particular situation, preventing a vector from being built, only to be deconstructed again (for the scalarized add) -- an expensive proposition when this all needs to be done via the stack. We probably want a more comprehensive fix here where we look back recursively through any shuffles to any build_vectors or scalar_to_vectors, etc. but that can come later. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205179 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 31 ++++++++++++++++++++++++------- test/CodeGen/PowerPC/vsx.ll | 16 ++++++++++++++++ 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index a9983c7faa5..931bf51d8b9 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9745,9 +9745,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // We only perform this optimization before the op legalization phase because // we may introduce new vector instructions which are not backed by TD // patterns. For example on AVX, extracting elements from a wide vector - // without using extract_subvector. + // without using extract_subvector. However, if we can find an underlying + // scalar value, then we can always use that. if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE - && ConstEltNo && !LegalOperations) { + && ConstEltNo) { int Elt = cast(EltNo)->getZExtValue(); int NumElem = VT.getVectorNumElements(); ShuffleVectorSDNode *SVOp = cast(InVec); @@ -9759,16 +9760,32 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { return DAG.getUNDEF(NVT); // Select the right vector half to extract from. + SDValue SVInVec; if (OrigElt < NumElem) { - InVec = InVec->getOperand(0); + SVInVec = InVec->getOperand(0); } else { - InVec = InVec->getOperand(1); + SVInVec = InVec->getOperand(1); OrigElt -= NumElem; } - EVT IndexTy = TLI.getVectorIdxTy(); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, - InVec, DAG.getConstant(OrigElt, IndexTy)); + if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) { + SDValue InOp = SVInVec.getOperand(OrigElt); + if (InOp.getValueType() != NVT) { + assert(InOp.getValueType().isInteger() && NVT.isInteger()); + InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT); + } + + return InOp; + } + + // FIXME: We should handle recursing on other vector shuffles and + // scalar_to_vector here as well. + + if (!LegalOperations) { + EVT IndexTy = TLI.getVectorIdxTy(); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, + SVInVec, DAG.getConstant(OrigElt, IndexTy)); + } } // Perform only after legalization to ensure build_vector / vector_shuffle diff --git a/test/CodeGen/PowerPC/vsx.ll b/test/CodeGen/PowerPC/vsx.ll index d0ff0f2829e..a1710938ab5 100644 --- a/test/CodeGen/PowerPC/vsx.ll +++ b/test/CodeGen/PowerPC/vsx.ll @@ -624,3 +624,19 @@ define <2 x double> @test70(<2 x i8> %a) { ; CHECK: blr } +define <2 x i32> @test80(i32 %v) { + %b1 = insertelement <2 x i32> undef, i32 %v, i32 0 + %b2 = shufflevector <2 x i32> %b1, <2 x i32> undef, <2 x i32> zeroinitializer + %i = add <2 x i32> %b2, + ret <2 x i32> %i + +; CHECK-LABEL: @test80 +; CHECK: addi +; CHECK: addi +; CHECK: lxvd2x +; CHECK-NOT: stxvd2x +; FIXME: We still make one vector for each vector element and this shuffle them +; together instead of just composing one vector on the stack. +; CHECK: blr +} + -- 2.11.0