From d5b92c389133c5d587e4094af553ec345ed40045 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Sat, 5 Jan 2013 01:15:47 +0000 Subject: [PATCH] iLoopVectorize: Non commutative operators can be used as reduction variables as long as the reduction chain is used in the LHS. PR14803. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171583 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 17 ++++++++++---- test/Transforms/LoopVectorize/gcc-examples.ll | 2 +- test/Transforms/LoopVectorize/reduction.ll | 32 +++++++++++++++++++++++++-- 3 files changed, 44 insertions(+), 7 deletions(-) diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 5e2d7971f84..af2e8464d01 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1912,10 +1912,6 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, if (Iter->use_empty()) return false; - // Any reduction instr must be of one of the allowed kinds. - if (!isReductionInstr(Iter, Kind)) - return false; - // Did we find a user inside this loop already ? bool FoundInBlockUser = false; // Did we reach the initial PHI node already ? @@ -1953,6 +1949,16 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, if (FoundInBlockUser) return false; FoundInBlockUser = true; + + // Any reduction instr must be of one of the allowed kinds. + if (!isReductionInstr(U, Kind)) + return false; + + // Reductions of instructions such as Div, and Sub is only + // possible if the LHS is the reduction variable. + if (!U->isCommutative() && U->getOperand(0) != Iter) + return false; + Iter = U; } @@ -1985,8 +1991,11 @@ LoopVectorizationLegality::isReductionInstr(Instruction *I, case Instruction::PHI: // possibly. return true; + case Instruction::Sub: case Instruction::Add: return Kind == IntegerAdd; + case Instruction::SDiv: + case Instruction::UDiv: case Instruction::Mul: return Kind == IntegerMult; case Instruction::And: diff --git a/test/Transforms/LoopVectorize/gcc-examples.ll b/test/Transforms/LoopVectorize/gcc-examples.ll index de3f59f41af..b8b125f3ee7 100644 --- a/test/Transforms/LoopVectorize/gcc-examples.ll +++ b/test/Transforms/LoopVectorize/gcc-examples.ll @@ -241,7 +241,7 @@ define void @example8(i32 %x) nounwind uwtable ssp { } ;CHECK: @example9 -;CHECK-NOT: phi <4 x i32> +;CHECK: phi <4 x i32> ;CHECK: ret i32 define i32 @example9() nounwind uwtable readonly ssp { br label %1 diff --git a/test/Transforms/LoopVectorize/reduction.ll b/test/Transforms/LoopVectorize/reduction.ll index fbce072b8b1..6b29e2537bd 100644 --- a/test/Transforms/LoopVectorize/reduction.ll +++ b/test/Transforms/LoopVectorize/reduction.ll @@ -296,11 +296,12 @@ for.end: ; preds = %for.body, %entry ret i32 %x.0.lcssa } -;CHECK: @reduction_sub_lhs +; In this code the subtracted variable is on the RHS and this is not an induction variable. +;CHECK: @reduction_sub_rhs ;CHECK-NOT: phi <4 x i32> ;CHECK-NOT: sub nsw <4 x i32> ;CHECK: ret i32 -define i32 @reduction_sub_lhs(i32 %n, i32* noalias nocapture %A) nounwind uwtable readonly { +define i32 @reduction_sub_rhs(i32 %n, i32* noalias nocapture %A) nounwind uwtable readonly { entry: %cmp4 = icmp sgt i32 %n, 0 br i1 %cmp4, label %for.body, label %for.end @@ -320,3 +321,30 @@ for.end: ; preds = %for.body, %entry %x.0.lcssa = phi i32 [ 0, %entry ], [ %sub, %for.body ] ret i32 %x.0.lcssa } + + +; In this test the reduction variable is on the LHS and we can vectorize it. +;CHECK: @reduction_sub_lhs +;CHECK: phi <4 x i32> +;CHECK: sub nsw <4 x i32> +;CHECK: ret i32 +define i32 @reduction_sub_lhs(i32 %n, i32* noalias nocapture %A) nounwind uwtable readonly { +entry: + %cmp4 = icmp sgt i32 %n, 0 + br i1 %cmp4, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %x.05 = phi i32 [ %sub, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %sub = sub nsw i32 %x.05, %0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %x.0.lcssa = phi i32 [ 0, %entry ], [ %sub, %for.body ] + ret i32 %x.0.lcssa +} -- 2.11.0