From 65fafbb1090201a727513fe9af50e443cfa900da Mon Sep 17 00:00:00 2001
From: Hal Finkel <hfinkel@anl.gov>
Date: Mon, 31 Mar 2014 11:43:19 +0000
Subject: [PATCH] Look at shuffles of build_vectors in
 DAGCombiner::visitEXTRACT_VECTOR_ELT

When the loop vectorizer vectorizes code that uses the loop induction variable,
we often end up with IR like this:

  %b1 = insertelement <2 x i32> undef, i32 %v, i32 0
  %b2 = shufflevector <2 x i32> %b1, <2 x i32> undef, <2 x i32> zeroinitializer
  %i = add <2 x i32> %b2, <i32 2, i32 3>

If the add in this example is not legal (as is the case on PPC with VSX), it
will be scalarized, and we'll end up with a number of extract_vector_elt nodes
with the vector shuffle as the input operand, and that vector shuffle is fed by
one or more build_vector nodes. By the time that vector operations are
expanded, visitEXTRACT_VECTOR_ELT will not create new extract_vector_elt by
looking through the vector shuffle (to make sure that no illegal operations are
created), and so the extract_vector_elt -> vector shuffle -> build_vector is
never simplified to an operand of the build vector.

By looking at build_vectors through a shuffle we fix this particular situation,
preventing a vector from being built, only to be deconstructed again (for the
scalarized add) -- an expensive proposition when this all needs to be done via
the stack. We probably want a more comprehensive fix here where we look back
recursively through any shuffles to any build_vectors or scalar_to_vectors,
etc. but that can come later.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205179 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 31 ++++++++++++++++++++++++-------
 test/CodeGen/PowerPC/vsx.ll              | 16 ++++++++++++++++
 2 files changed, 40 insertions(+), 7 deletions(-)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a9983c7faa5..931bf51d8b9 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9745,9 +9745,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
   // We only perform this optimization before the op legalization phase because
   // we may introduce new vector instructions which are not backed by TD
   // patterns. For example on AVX, extracting elements from a wide vector
-  // without using extract_subvector.
+  // without using extract_subvector. However, if we can find an underlying
+  // scalar value, then we can always use that.
   if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE
-      && ConstEltNo && !LegalOperations) {
+      && ConstEltNo) {
     int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
     int NumElem = VT.getVectorNumElements();
     ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
@@ -9759,16 +9760,32 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
       return DAG.getUNDEF(NVT);
 
     // Select the right vector half to extract from.
+    SDValue SVInVec;
     if (OrigElt < NumElem) {
-      InVec = InVec->getOperand(0);
+      SVInVec = InVec->getOperand(0);
     } else {
-      InVec = InVec->getOperand(1);
+      SVInVec = InVec->getOperand(1);
       OrigElt -= NumElem;
     }
 
-    EVT IndexTy = TLI.getVectorIdxTy();
-    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT,
-                       InVec, DAG.getConstant(OrigElt, IndexTy));
+    if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
+      SDValue InOp = SVInVec.getOperand(OrigElt);
+      if (InOp.getValueType() != NVT) {
+        assert(InOp.getValueType().isInteger() && NVT.isInteger());
+        InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
+      }
+
+      return InOp;
+    }
+
+    // FIXME: We should handle recursing on other vector shuffles and
+    // scalar_to_vector here as well.
+
+    if (!LegalOperations) {
+      EVT IndexTy = TLI.getVectorIdxTy();
+      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT,
+                         SVInVec, DAG.getConstant(OrigElt, IndexTy));
+    }
   }
 
   // Perform only after legalization to ensure build_vector / vector_shuffle
diff --git a/test/CodeGen/PowerPC/vsx.ll b/test/CodeGen/PowerPC/vsx.ll
index d0ff0f2829e..a1710938ab5 100644
--- a/test/CodeGen/PowerPC/vsx.ll
+++ b/test/CodeGen/PowerPC/vsx.ll
@@ -624,3 +624,19 @@ define <2 x double> @test70(<2 x i8> %a) {
 ; CHECK: blr
 }
 
+define <2 x i32> @test80(i32 %v) {
+  %b1 = insertelement <2 x i32> undef, i32 %v, i32 0
+  %b2 = shufflevector <2 x i32> %b1, <2 x i32> undef, <2 x i32> zeroinitializer
+  %i = add <2 x i32> %b2, <i32 2, i32 3>
+  ret <2 x i32> %i
+
+; CHECK-LABEL: @test80
+; CHECK: addi
+; CHECK: addi
+; CHECK: lxvd2x
+; CHECK-NOT: stxvd2x
+; FIXME: We still make one vector for each vector element and this shuffle them
+; together instead of just composing one vector on the stack.
+; CHECK: blr
+}
+
-- 
2.11.0