From 44388667e99de222d3943ef56d099aa470d30925 Mon Sep 17 00:00:00 2001 From: Guozhi Wei Date: Thu, 14 Dec 2017 19:35:43 +0000 Subject: [PATCH] [SLPVectorizer] Don't ignore scalar extraction instructions of aggregate value In SLPVectorizer, the vector build instructions (insertvalue for aggregate type) is passed to BoUpSLP.buildTree, it is treated as UserIgnoreList, so later in cost estimation, the cost of these instructions are not counted. For aggregate value, later usage are more likely to be done in scalar registers, either used as individual scalars or used as a whole for function call or return value. Ignore scalar extraction instructions may cause too aggressive vectorization for aggregate values, and slow down performance. So for vectorization of aggregate value, the scalar extraction instructions are required in cost estimation. Differential Revision: https://reviews.llvm.org/D41139 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@320736 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Transforms/Vectorize/SLPVectorizer.h | 6 ++++-- lib/Transforms/Vectorize/SLPVectorizer.cpp | 10 +++++++--- test/Transforms/SLPVectorizer/PowerPC/aggregate.ll | 19 +++++++++++++++++++ test/Transforms/SLPVectorizer/X86/aggregate.ll | 19 +++++++++++++++++++ 4 files changed, 49 insertions(+), 5 deletions(-) create mode 100644 test/Transforms/SLPVectorizer/PowerPC/aggregate.ll create mode 100644 test/Transforms/SLPVectorizer/X86/aggregate.ll diff --git a/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/include/llvm/Transforms/Vectorize/SLPVectorizer.h index 06f6fa11a94..25f264c4722 100644 --- a/include/llvm/Transforms/Vectorize/SLPVectorizer.h +++ b/include/llvm/Transforms/Vectorize/SLPVectorizer.h @@ -96,11 +96,13 @@ private: /// \brief Try to vectorize a list of operands. /// \@param BuildVector A list of users to ignore for the purpose of - /// scheduling and that don't need extracting. + /// scheduling and cost estimation when NeedExtraction + /// is false. /// \returns true if a value was vectorized. bool tryToVectorizeList(ArrayRef VL, slpvectorizer::BoUpSLP &R, ArrayRef BuildVector = None, - bool AllowReorder = false); + bool AllowReorder = false, + bool NeedExtraction = false); /// \brief Try to vectorize a chain that may start at the operands of \p I. bool tryToVectorize(Instruction *I, slpvectorizer::BoUpSLP &R); diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 9b35f35e870..76ba62f5d59 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4533,7 +4533,8 @@ bool SLPVectorizerPass::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) { bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, ArrayRef BuildVector, - bool AllowReorder) { + bool AllowReorder, + bool NeedExtraction) { if (VL.size() < 2) return false; @@ -4627,11 +4628,12 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, << "\n"); ArrayRef Ops = VL.slice(I, OpsWidth); + ArrayRef EmptyArray; ArrayRef BuildVectorSlice; if (!BuildVector.empty()) BuildVectorSlice = BuildVector.slice(I, OpsWidth); - R.buildTree(Ops, BuildVectorSlice); + R.buildTree(Ops, NeedExtraction ? EmptyArray : BuildVectorSlice); // TODO: check if we can allow reordering for more cases. if (AllowReorder && R.shouldReorder()) { // Conceptually, there is nothing actually preventing us from trying to @@ -5821,7 +5823,9 @@ bool SLPVectorizerPass::vectorizeInsertValueInst(InsertValueInst *IVI, return false; DEBUG(dbgs() << "SLP: array mappable to vector: " << *IVI << "\n"); - return tryToVectorizeList(BuildVectorOpds, R, BuildVector, false); + // Aggregate value is unlikely to be processed in vector register, we need to + // extract scalars into scalar registers, so NeedExtraction is set true. + return tryToVectorizeList(BuildVectorOpds, R, BuildVector, false, true); } bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI, diff --git a/test/Transforms/SLPVectorizer/PowerPC/aggregate.ll b/test/Transforms/SLPVectorizer/PowerPC/aggregate.ll new file mode 100644 index 00000000000..015c1f1ed8f --- /dev/null +++ b/test/Transforms/SLPVectorizer/PowerPC/aggregate.ll @@ -0,0 +1,19 @@ +; RUN: opt -S -mtriple=powerpc64-linux-gnu -mcpu=pwr9 -mattr=+vsx -slp-vectorizer < %s | FileCheck %s + +%struct.S = type { i8*, i8* } + +@kS0 = common global %struct.S zeroinitializer, align 8 + +define { i64, i64 } @getS() { +entry: + %0 = load i64, i64* bitcast (%struct.S* @kS0 to i64*), align 8 + %1 = load i64, i64* bitcast (i8** getelementptr inbounds (%struct.S, %struct.S* @kS0, i64 0, i32 1) to i64*), align 8 + %2 = insertvalue { i64, i64 } undef, i64 %0, 0 + %3 = insertvalue { i64, i64 } %2, i64 %1, 1 + ret { i64, i64 } %3 +} + +; CHECK: load i64 +; CHECK-NOT: load <2 x i64> +; CHECK-NOT: extractelement + diff --git a/test/Transforms/SLPVectorizer/X86/aggregate.ll b/test/Transforms/SLPVectorizer/X86/aggregate.ll new file mode 100644 index 00000000000..350929dc539 --- /dev/null +++ b/test/Transforms/SLPVectorizer/X86/aggregate.ll @@ -0,0 +1,19 @@ +; RUN: opt -S -mtriple=x86_64-unknown-linux -mcpu=corei7 -slp-vectorizer < %s | FileCheck %s + +%struct.S = type { i8*, i8* } + +@kS0 = common global %struct.S zeroinitializer, align 8 + +define { i64, i64 } @getS() { +entry: + %0 = load i64, i64* bitcast (%struct.S* @kS0 to i64*), align 8 + %1 = load i64, i64* bitcast (i8** getelementptr inbounds (%struct.S, %struct.S* @kS0, i64 0, i32 1) to i64*), align 8 + %2 = insertvalue { i64, i64 } undef, i64 %0, 0 + %3 = insertvalue { i64, i64 } %2, i64 %1, 1 + ret { i64, i64 } %3 +} + +; CHECK: load i64 +; CHECK-NOT: load <2 x i64> +; CHECK-NOT: extractelement + -- 2.11.0