From 2588aa94253d33f1d90279e4ef492fc021aa39a6 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Fri, 13 Nov 2015 21:51:02 +0000 Subject: [PATCH] [LIR] Add support for creating memcpys from loops with a negative stride. This allows us to transform the below loop into a memcpy. void test(unsigned *__restrict__ a, unsigned *__restrict__ b) { for (int i = 2047; i >= 0; --i) { a[i] = b[i]; } } This is the memcpy version of r251518, which added support for memset with negative strided loops. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@253091 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 33 ++++++++++++++++++---------- test/Transforms/LoopIdiom/basic.ll | 31 ++++++++++++++++++++++++-- 2 files changed, 50 insertions(+), 14 deletions(-) diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index f933e82d928..c2fb8cd49b7 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -129,7 +129,7 @@ private: bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, const SCEVAddRecExpr *StoreEv, const SCEVAddRecExpr *LoadEv, - const SCEV *BECount); + const SCEV *BECount, bool NegStride); /// @} /// \name Noncountable Loop Idiom Handling @@ -362,10 +362,6 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { StoredVal, SI, StoreEv, BECount, NegStride)) return true; - // TODO: We don't handle negative stride memcpys. - if (NegStride) - return false; - // If the stored value is a strided load in the same loop with the same stride // this may be transformable into a memcpy. This kicks in for stuff like // for (i) A[i] = B[i]; @@ -374,7 +370,8 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { dyn_cast(SE->getSCEV(LI->getOperand(0))); if (LoadEv && LoadEv->getLoop() == CurLoop && LoadEv->isAffine() && StoreEv->getOperand(1) == LoadEv->getOperand(1) && LI->isSimple()) - if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount)) + if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount, + NegStride)) return true; } // errs() << "UNHANDLED strided store: " << *StoreEv << " - " << *SI << "\n"; @@ -626,7 +623,7 @@ bool LoopIdiomRecognize::processLoopStridedStore( /// same-strided load. bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( StoreInst *SI, unsigned StoreSize, const SCEVAddRecExpr *StoreEv, - const SCEVAddRecExpr *LoadEv, const SCEV *BECount) { + const SCEVAddRecExpr *LoadEv, const SCEV *BECount, bool NegStride) { // If we're not allowed to form memcpy, we fail. if (!TLI->has(LibFunc::memcpy)) return false; @@ -640,6 +637,14 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( IRBuilder<> Builder(Preheader->getTerminator()); SCEVExpander Expander(*SE, *DL, "loop-idiom"); + const SCEV *StrStart = StoreEv->getStart(); + unsigned StrAS = SI->getPointerAddressSpace(); + Type *IntPtrTy = Builder.getIntPtrTy(*DL, StrAS); + + // Handle negative strided loops. + if (NegStride) + StrStart = getStartForNegStride(StrStart, BECount, IntPtrTy, StoreSize, SE); + // Okay, we have a strided store "p[i]" of a loaded value. We can turn // this into a memcpy in the loop preheader now if we want. However, this // would be unsafe to do if there is anything else in the loop that may read @@ -647,8 +652,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( // feeds the stores. Check for an alias by generating the base address and // checking everything. Value *StoreBasePtr = Expander.expandCodeFor( - StoreEv->getStart(), Builder.getInt8PtrTy(SI->getPointerAddressSpace()), - Preheader->getTerminator()); + StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator()); if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount, StoreSize, *AA, SI)) { @@ -658,11 +662,17 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( return false; } + const SCEV *LdStart = LoadEv->getStart(); + unsigned LdAS = LI->getPointerAddressSpace(); + + // Handle negative strided loops. + if (NegStride) + LdStart = getStartForNegStride(LdStart, BECount, IntPtrTy, StoreSize, SE); + // For a memcpy, we have to make sure that the input array is not being // mutated by the loop. Value *LoadBasePtr = Expander.expandCodeFor( - LoadEv->getStart(), Builder.getInt8PtrTy(LI->getPointerAddressSpace()), - Preheader->getTerminator()); + LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator()); if (mayLoopAccessLocation(LoadBasePtr, MRI_Mod, CurLoop, BECount, StoreSize, *AA, SI)) { @@ -677,7 +687,6 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( // The # stored bytes is (BECount+1)*Size. Expand the trip count out to // pointer size if it isn't already. - Type *IntPtrTy = Builder.getIntPtrTy(*DL, SI->getPointerAddressSpace()); BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy); const SCEV *NumBytesS = diff --git a/test/Transforms/LoopIdiom/basic.ll b/test/Transforms/LoopIdiom/basic.ll index 9743caee25d..27a955175b5 100644 --- a/test/Transforms/LoopIdiom/basic.ll +++ b/test/Transforms/LoopIdiom/basic.ll @@ -469,7 +469,7 @@ for.cond.cleanup: ; preds = %for.body ; CHECK: ret void } -; We don't handle memcpy-able loops with negative stride. +; Handle memcpy-able loops with negative stride. define noalias i32* @test17(i32* nocapture readonly %a, i32 %c) { entry: %conv = sext i32 %c to i64 @@ -499,8 +499,35 @@ while.end.loopexit: ; preds = %while.body while.end: ; preds = %while.end.loopexit, %entry ret i32* %0 ; CHECK-LABEL: @test17( -; CHECK-NOT: call void @llvm.memcpy +; CHECK: call void @llvm.memcpy ; CHECK: ret i32* } declare noalias i8* @malloc(i64) + +; Handle memcpy-able loops with negative stride. +; void test18(unsigned *__restrict__ a, unsigned *__restrict__ b) { +; for (int i = 2047; i >= 0; --i) { +; a[i] = b[i]; +; } +; } +define void @test18(i32* noalias nocapture %a, i32* noalias nocapture readonly %b) #0 { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 2047, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + store i32 %0, i32* %arrayidx2, align 4 + %indvars.iv.next = add nsw i64 %indvars.iv, -1 + %cmp = icmp sgt i64 %indvars.iv, 0 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.body + ret void +; CHECK-LABEL: @test18( +; CHECK: call void @llvm.memcpy +; CHECK: ret +} -- 2.11.0