[SLPVectorizer] Don't ignore scalar extraction instructions of aggregate value

author Guozhi Wei <carrot@google.com>

Thu, 14 Dec 2017 19:35:43 +0000 (19:35 +0000)

committer Guozhi Wei <carrot@google.com>

Thu, 14 Dec 2017 19:35:43 +0000 (19:35 +0000)
author Guozhi Wei <carrot@google.com>
Thu, 14 Dec 2017 19:35:43 +0000 (19:35 +0000)
committer Guozhi Wei <carrot@google.com>
Thu, 14 Dec 2017 19:35:43 +0000 (19:35 +0000)
diff --git a/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/include/llvm/Transforms/Vectorize/SLPVectorizer.h

index 06f6fa1..25f264c 100644 (file)
--- a/include/llvm/Transforms/Vectorize/SLPVectorizer.h
+++ b/include/llvm/Transforms/Vectorize/SLPVectorizer.h
@@ -96,11 +96,13 @@ private:
  
    /// \brief Try to vectorize a list of operands.
    /// \@param BuildVector A list of users to ignore for the purpose of
-  ///                     scheduling and that don't need extracting.
+  ///                     scheduling and cost estimation when NeedExtraction
+  ///                     is false.
    /// \returns true if a value was vectorized.
    bool tryToVectorizeList(ArrayRef<Value *> VL, slpvectorizer::BoUpSLP &R,
                            ArrayRef<Value *> BuildVector = None,
-                          bool AllowReorder = false);
+                          bool AllowReorder = false,
+                          bool NeedExtraction = false);
  
    /// \brief Try to vectorize a chain that may start at the operands of \p I.
    bool tryToVectorize(Instruction *I, slpvectorizer::BoUpSLP &R);
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp

index 9b35f35..76ba62f 100644 (file)
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -4533,7 +4533,8 @@ bool SLPVectorizerPass::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
  
  bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
                                             ArrayRef<Value *> BuildVector,
-                                           bool AllowReorder) {
+                                           bool AllowReorder,
+                                           bool NeedExtraction) {
    if (VL.size() < 2)
      return false;
  
@@ -4627,11 +4628,12 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
                     << "\n");
        ArrayRef<Value *> Ops = VL.slice(I, OpsWidth);
  
+      ArrayRef<Value *> EmptyArray;
        ArrayRef<Value *> BuildVectorSlice;
        if (!BuildVector.empty())
          BuildVectorSlice = BuildVector.slice(I, OpsWidth);
  
-      R.buildTree(Ops, BuildVectorSlice);
+      R.buildTree(Ops, NeedExtraction ? EmptyArray : BuildVectorSlice);
        // TODO: check if we can allow reordering for more cases.
        if (AllowReorder && R.shouldReorder()) {
          // Conceptually, there is nothing actually preventing us from trying to
@@ -5821,7 +5823,9 @@ bool SLPVectorizerPass::vectorizeInsertValueInst(InsertValueInst *IVI,
      return false;
  
    DEBUG(dbgs() << "SLP: array mappable to vector: " << *IVI << "\n");
-  return tryToVectorizeList(BuildVectorOpds, R, BuildVector, false);
+  // Aggregate value is unlikely to be processed in vector register, we need to
+  // extract scalars into scalar registers, so NeedExtraction is set true.
+  return tryToVectorizeList(BuildVectorOpds, R, BuildVector, false, true);
  }
  
  bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI,
diff --git a/test/Transforms/SLPVectorizer/PowerPC/aggregate.ll b/test/Transforms/SLPVectorizer/PowerPC/aggregate.ll

new file mode 100644 (file)

index 0000000..015c1f1
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/PowerPC/aggregate.ll
@@ -0,0 +1,19 @@
+; RUN: opt -S -mtriple=powerpc64-linux-gnu -mcpu=pwr9 -mattr=+vsx -slp-vectorizer < %s | FileCheck %s
+
+%struct.S = type { i8*, i8* }
+
+@kS0 = common global %struct.S zeroinitializer, align 8
+
+define { i64, i64 } @getS() {
+entry:
+  %0 = load i64, i64* bitcast (%struct.S* @kS0 to i64*), align 8
+  %1 = load i64, i64* bitcast (i8** getelementptr inbounds (%struct.S, %struct.S* @kS0, i64 0, i32 1) to i64*), align 8
+  %2 = insertvalue { i64, i64 } undef, i64 %0, 0
+  %3 = insertvalue { i64, i64 } %2, i64 %1, 1
+  ret { i64, i64 } %3
+}
+
+; CHECK: load i64
+; CHECK-NOT: load <2 x i64>
+; CHECK-NOT: extractelement
+
diff --git a/test/Transforms/SLPVectorizer/X86/aggregate.ll b/test/Transforms/SLPVectorizer/X86/aggregate.ll

new file mode 100644 (file)

index 0000000..350929d
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/aggregate.ll
@@ -0,0 +1,19 @@
+; RUN: opt -S -mtriple=x86_64-unknown-linux -mcpu=corei7 -slp-vectorizer < %s | FileCheck %s
+
+%struct.S = type { i8*, i8* }
+
+@kS0 = common global %struct.S zeroinitializer, align 8
+
+define { i64, i64 } @getS() {
+entry:
+  %0 = load i64, i64* bitcast (%struct.S* @kS0 to i64*), align 8
+  %1 = load i64, i64* bitcast (i8** getelementptr inbounds (%struct.S, %struct.S* @kS0, i64 0, i32 1) to i64*), align 8
+  %2 = insertvalue { i64, i64 } undef, i64 %0, 0
+  %3 = insertvalue { i64, i64 } %2, i64 %1, 1
+  ret { i64, i64 } %3
+}
+
+; CHECK: load i64
+; CHECK-NOT: load <2 x i64>
+; CHECK-NOT: extractelement
+
author	Guozhi Wei <carrot@google.com>
	Thu, 14 Dec 2017 19:35:43 +0000 (19:35 +0000)
committer	Guozhi Wei <carrot@google.com>
	Thu, 14 Dec 2017 19:35:43 +0000 (19:35 +0000)
include/llvm/Transforms/Vectorize/SLPVectorizer.h		patch \| blob \| history
lib/Transforms/Vectorize/SLPVectorizer.cpp		patch \| blob \| history
test/Transforms/SLPVectorizer/PowerPC/aggregate.ll	[new file with mode: 0644]	patch \| blob
test/Transforms/SLPVectorizer/X86/aggregate.ll	[new file with mode: 0644]	patch \| blob