From: Daniel Neilson Date: Wed, 21 Mar 2018 14:14:55 +0000 (+0000) Subject: [MemCpyOpt] Update to new API for memory intrinsic alignment X-Git-Tag: android-x86-7.1-r4~3471 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=8bf245f39a7fdc7858b113309b59ea62024b0d74;p=android-x86%2Fexternal-llvm.git [MemCpyOpt] Update to new API for memory intrinsic alignment Summary: This change is part of step five in the series of changes to remove alignment argument from memcpy/memmove/memset in favour of alignment attributes. In particular, this changes the MemCpyOpt pass to cease using: 1) The old getAlignment() API of MemoryIntrinsic in favour of getting source & dest specific alignments through the new API. 2) The old IRBuilder CreateMemCpy/CreateMemMove single-alignment APIs in favour of the new API that allows setting source and destination alignments independently. We also add a few tests to fill gaps in the testing of this pass. Steps: Step 1) Remove alignment parameter and create alignment parameter attributes for memcpy/memmove/memset. ( rL322965, rC322964, rL322963 ) Step 2) Expand the IRBuilder API to allow creation of memcpy/memmove with differing source and dest alignments. ( rL323597 ) Step 3) Update Clang to use the new IRBuilder API. ( rC323617 ) Step 4) Update Polly to use the new IRBuilder API. ( rL323618 ) Step 5) Update LLVM passes that create memcpy/memmove calls to use the new IRBuilder API, and those that use use MemIntrinsicInst::[get|set]Alignment() to use [get|set]DestAlignment() and [get|set]SourceAlignment() instead. ( rL323886, rL323891, rL324148, rL324273, rL324278, rL324384, rL324395, rL324402, rL324626, rL324642, rL324653, rL324654, rL324773, rL324774, rL324781, rL324784, rL324955, rL324960, rL325816, rL327398, rL327421 ) Step 6) Remove the single-alignment IRBuilder API for memcpy/memmove, and the MemIntrinsicInst::[get|set]Alignment() methods. Reference http://lists.llvm.org/pipermail/llvm-dev/2015-August/089384.html http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20151109/312083.html git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@328097 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 9c870b42a74..e2de0ee6bcf 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -263,7 +263,7 @@ public: void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) { int64_t Size = cast(MSI->getLength())->getZExtValue(); - addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getAlignment(), MSI); + addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlignment(), MSI); } void addRange(int64_t Start, int64_t Size, Value *Ptr, @@ -498,16 +498,25 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, return AMemSet; } -static unsigned findCommonAlignment(const DataLayout &DL, const StoreInst *SI, - const LoadInst *LI) { +static unsigned findStoreAlignment(const DataLayout &DL, const StoreInst *SI) { unsigned StoreAlign = SI->getAlignment(); if (!StoreAlign) StoreAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType()); + return StoreAlign; +} + +static unsigned findLoadAlignment(const DataLayout &DL, const LoadInst *LI) { unsigned LoadAlign = LI->getAlignment(); if (!LoadAlign) LoadAlign = DL.getABITypeAlignment(LI->getType()); + return LoadAlign; +} - return std::min(StoreAlign, LoadAlign); +static unsigned findCommonAlignment(const DataLayout &DL, const StoreInst *SI, + const LoadInst *LI) { + unsigned StoreAlign = findStoreAlignment(DL, SI); + unsigned LoadAlign = findLoadAlignment(DL, LI); + return MinAlign(StoreAlign, LoadAlign); } // This method try to lift a store instruction before position P. @@ -656,19 +665,20 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (!AA.isNoAlias(MemoryLocation::get(SI), LoadLoc)) UseMemMove = true; - unsigned Align = findCommonAlignment(DL, SI, LI); uint64_t Size = DL.getTypeStoreSize(T); IRBuilder<> Builder(P); Instruction *M; if (UseMemMove) - M = Builder.CreateMemMove(SI->getPointerOperand(), - LI->getPointerOperand(), Size, - Align, SI->isVolatile()); + M = Builder.CreateMemMove( + SI->getPointerOperand(), findStoreAlignment(DL, SI), + LI->getPointerOperand(), findLoadAlignment(DL, LI), Size, + SI->isVolatile()); else - M = Builder.CreateMemCpy(SI->getPointerOperand(), - LI->getPointerOperand(), Size, - Align, SI->isVolatile()); + M = Builder.CreateMemCpy( + SI->getPointerOperand(), findStoreAlignment(DL, SI), + LI->getPointerOperand(), findLoadAlignment(DL, LI), Size, + SI->isVolatile()); DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => " << *M << "\n"); @@ -1047,20 +1057,17 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M, // If all checks passed, then we can transform M. - // Make sure to use the lesser of the alignment of the source and the dest - // since we're changing where we're reading from, but don't want to increase - // the alignment past what can be read from or written to. // TODO: Is this worth it if we're creating a less aligned memcpy? For // example we could be moving from movaps -> movq on x86. - unsigned Align = std::min(MDep->getAlignment(), M->getAlignment()); - IRBuilder<> Builder(M); if (UseMemMove) - Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(), - Align, M->isVolatile()); + Builder.CreateMemMove(M->getRawDest(), M->getDestAlignment(), + MDep->getRawSource(), MDep->getSourceAlignment(), + M->getLength(), M->isVolatile()); else - Builder.CreateMemCpy(M->getRawDest(), MDep->getRawSource(), M->getLength(), - Align, M->isVolatile()); + Builder.CreateMemCpy(M->getRawDest(), M->getDestAlignment(), + MDep->getRawSource(), MDep->getSourceAlignment(), + M->getLength(), M->isVolatile()); // Remove the instruction we're replacing. MD->removeInstruction(M); @@ -1106,7 +1113,7 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy, // If Dest is aligned, and SrcSize is constant, use the minimum alignment // of the sum. const unsigned DestAlign = - std::max(MemSet->getAlignment(), MemCpy->getAlignment()); + std::max(MemSet->getDestAlignment(), MemCpy->getDestAlignment()); if (DestAlign > 1) if (ConstantInt *SrcSizeC = dyn_cast(SrcSize)) Align = MinAlign(SrcSizeC->getZExtValue(), DestAlign); @@ -1166,7 +1173,7 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, IRBuilder<> Builder(MemCpy); Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1), - CopySize, MemCpy->getAlignment()); + CopySize, MemCpy->getDestAlignment()); return true; } @@ -1192,7 +1199,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) { if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) { IRBuilder<> Builder(M); Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(), - M->getAlignment(), false); + M->getDestAlignment(), false); MD->removeInstruction(M); M->eraseFromParent(); ++NumCpyToSet; @@ -1221,8 +1228,11 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) { // d) memcpy from a just-memset'd source can be turned into memset. if (DepInfo.isClobber()) { if (CallInst *C = dyn_cast(DepInfo.getInst())) { + // FIXME: Can we pass in either of dest/src alignment here instead + // of conservatively taking the minimum? + unsigned Align = MinAlign(M->getDestAlignment(), M->getSourceAlignment()); if (performCallSlotOptzn(M, M->getDest(), M->getSource(), - CopySize->getZExtValue(), M->getAlignment(), + CopySize->getZExtValue(), Align, C)) { MD->removeInstruction(M); M->eraseFromParent(); @@ -1337,7 +1347,7 @@ bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) { // source of the memcpy to the alignment we need. If we fail, we bail out. AssumptionCache &AC = LookupAssumptionCache(); DominatorTree &DT = LookupDomTree(); - if (MDep->getAlignment() < ByValAlign && + if (MDep->getSourceAlignment() < ByValAlign && getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, CS.getInstruction(), &AC, &DT) < ByValAlign) return false; diff --git a/test/Transforms/MemCpyOpt/load-store-to-memcpy.ll b/test/Transforms/MemCpyOpt/load-store-to-memcpy.ll index f6b175e4171..9dbba093fe2 100644 --- a/test/Transforms/MemCpyOpt/load-store-to-memcpy.ll +++ b/test/Transforms/MemCpyOpt/load-store-to-memcpy.ll @@ -3,6 +3,34 @@ %T = type { i8, i32 } +; Ensure load-store forwarding of an aggregate is interpreted as +; a memmove when the source and dest may alias +define void @test_memmove(%T* align 8 %a, %T* align 16 %b) { +; CHECK-LABEL: @test_memmove( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast %T* [[B:%.*]] to i8* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast %T* [[A:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 16 [[TMP1]], i8* align 8 [[TMP2]], i64 8, i1 false) +; CHECK-NEXT: ret void +; + %val = load %T, %T* %a, align 8 + store %T %val, %T* %b, align 16 + ret void +} + +; Ensure load-store forwarding of an aggregate is interpreted as +; a memcpy when the source and dest do not alias +define void @test_memcpy(%T* noalias align 8 %a, %T* noalias align 16 %b) { +; CHECK-LABEL: @test_memcpy( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast %T* [[B:%.*]] to i8* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast %T* [[A:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP1]], i8* align 8 [[TMP2]], i64 8, i1 false) +; CHECK-NEXT: ret void +; + %val = load %T, %T* %a, align 8 + store %T %val, %T* %b, align 16 + ret void +} + ; memcpy(%d, %a) should not be generated since store2 may-aliases load %a. define void @f(%T* %a, %T* %b, %T* %c, %T* %d) { ; CHECK-LABEL: @f( diff --git a/test/Transforms/MemCpyOpt/memcpy.ll b/test/Transforms/MemCpyOpt/memcpy.ll index 7e1e4d54a24..4c5f6cbeb1a 100644 --- a/test/Transforms/MemCpyOpt/memcpy.ll +++ b/test/Transforms/MemCpyOpt/memcpy.ll @@ -46,6 +46,20 @@ define void @test2(i8* %P, i8* %Q) nounwind { ; CHECK-NEXT: ret void } +; The intermediate alloca and one of the memcpy's should be eliminated, the +; other should be related with a memcpy. +define void @test2_memcpy(i8* noalias %P, i8* noalias %Q) nounwind { + %memtmp = alloca %0, align 16 + %R = bitcast %0* %memtmp to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %R, i8* align 16 %P, i32 32, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %Q, i8* align 16 %R, i32 32, i1 false) + ret void + +; CHECK-LABEL: @test2_memcpy( +; CHECK-NEXT: call void @llvm.memcpy{{.*}}(i8* align 16 %Q, i8* align 16 %P +; CHECK-NEXT: ret void +} +