From 12ab473e2bc2bb16c5da409b58d0d323bc3805bc Mon Sep 17 00:00:00 2001 From: Guozhi Wei Date: Tue, 31 May 2016 20:41:19 +0000 Subject: [PATCH] [SLP] Pass in correct alignment when query memory access cost This patch fixes bug https://llvm.org/bugs/show_bug.cgi?id=27897. When query memory access cost, current SLP always passes in alignment value of 1 (unaligned), so it gets a very high cost of scalar memory access, and wrongly vectorize memory loads in the test case. It can be fixed by simply giving correct alignment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@271333 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/SLPVectorizer.cpp | 12 ++++++--- .../Transforms/SLPVectorizer/PowerPC/lit.local.cfg | 2 ++ test/Transforms/SLPVectorizer/PowerPC/pr27897.ll | 29 ++++++++++++++++++++++ 3 files changed, 39 insertions(+), 4 deletions(-) create mode 100644 test/Transforms/SLPVectorizer/PowerPC/lit.local.cfg create mode 100644 test/Transforms/SLPVectorizer/PowerPC/pr27897.ll diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 30da577d0bd..57824434b1b 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1726,16 +1726,20 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { } case Instruction::Load: { // Cost of wide load - cost of scalar loads. + unsigned alignment = dyn_cast(VL0)->getAlignment(); int ScalarLdCost = VecTy->getNumElements() * - TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0); - int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, VecTy, 1, 0); + TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0); + int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, + VecTy, alignment, 0); return VecLdCost - ScalarLdCost; } case Instruction::Store: { // We know that we can merge the stores. Calculate the cost. + unsigned alignment = dyn_cast(VL0)->getAlignment(); int ScalarStCost = VecTy->getNumElements() * - TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0); - int VecStCost = TTI->getMemoryOpCost(Instruction::Store, VecTy, 1, 0); + TTI->getMemoryOpCost(Instruction::Store, ScalarTy, alignment, 0); + int VecStCost = TTI->getMemoryOpCost(Instruction::Store, + VecTy, alignment, 0); return VecStCost - ScalarStCost; } case Instruction::Call: { diff --git a/test/Transforms/SLPVectorizer/PowerPC/lit.local.cfg b/test/Transforms/SLPVectorizer/PowerPC/lit.local.cfg new file mode 100644 index 00000000000..091332439b1 --- /dev/null +++ b/test/Transforms/SLPVectorizer/PowerPC/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'PowerPC' in config.root.targets: + config.unsupported = True diff --git a/test/Transforms/SLPVectorizer/PowerPC/pr27897.ll b/test/Transforms/SLPVectorizer/PowerPC/pr27897.ll new file mode 100644 index 00000000000..dabb3380ef1 --- /dev/null +++ b/test/Transforms/SLPVectorizer/PowerPC/pr27897.ll @@ -0,0 +1,29 @@ +; RUN: opt -S -mtriple=powerpc64-linux-gnu -mcpu=pwr8 -mattr=+vsx -slp-vectorizer < %s | FileCheck %s + +%struct.A = type { i8*, i8* } + +define i64 @foo(%struct.A* nocapture readonly %this) { +entry: + %end.i = getelementptr inbounds %struct.A, %struct.A* %this, i64 0, i32 1 + %0 = bitcast i8** %end.i to i64* + %1 = load i64, i64* %0, align 8 + %2 = bitcast %struct.A* %this to i64* + %3 = load i64, i64* %2, align 8 + %sub.ptr.sub.i = sub i64 %1, %3 + %cmp = icmp sgt i64 %sub.ptr.sub.i, 9 + br i1 %cmp, label %return, label %lor.lhs.false + +lor.lhs.false: + %4 = inttoptr i64 %3 to i8* + %5 = inttoptr i64 %1 to i8* + %cmp2 = icmp ugt i8* %5, %4 + %. = select i1 %cmp2, i64 2, i64 -1 + ret i64 %. + +return: + ret i64 2 +} + +; CHECK: load i64 +; CHECK-NOT: load <2 x i64> +; CHECK-NOT: extractelement -- 2.11.0