From a116623e06d0cf833cbefd4921dbca8581f1c806 Mon Sep 17 00:00:00 2001 From: Pete Cooper Date: Tue, 28 Feb 2012 04:27:10 +0000 Subject: [PATCH] DSE: Shorten memset when a later store overwrites the start of it git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@151620 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/DeadStoreElimination.cpp | 68 ++++++++++++++++++++- .../DeadStoreElimination/OverwriteStoreStart.ll | 71 ++++++++++++++++++++++ 2 files changed, 137 insertions(+), 2 deletions(-) create mode 100644 test/Transforms/DeadStoreElimination/OverwriteStoreStart.ll diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index c8c53606015..9f6a9533adf 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -259,6 +259,13 @@ static bool isShortenable(Instruction *I) { } } + +/// isMemset - Returns true if this instruction is an intrinsic memset +static bool isMemset(Instruction *I) { + IntrinsicInst *II = dyn_cast(I); + return II && II->getIntrinsicID() == Intrinsic::memset; +} + /// getStoredPointerOperand - Return the pointer that is being written to. static Value *getStoredPointerOperand(Instruction *I) { if (StoreInst *SI = dyn_cast(I)) @@ -310,14 +317,17 @@ namespace { { OverwriteComplete, OverwriteEnd, + OverwriteStart, OverwriteUnknown }; } /// isOverwrite - Return 'OverwriteComplete' if a store to the 'Later' location /// completely overwrites a store to the 'Earlier' location. -/// 'OverwriteEnd' if the end of the 'Earlier' location is completely -/// overwritten by 'Later', or 'OverwriteUnknown' if nothing can be determined +/// 'OverwriteEnd' if the end of the 'Earlier' location is completely +/// overwritten by 'Later', 'OverWriteStart' if the start of 'Earlier' +/// is completely overwritten by 'Later' or 'OverwriteUnknown' if nothing +/// can be determined static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, const AliasAnalysis::Location &Earlier, AliasAnalysis &AA, @@ -418,6 +428,21 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, LaterOff < int64_t(EarlierOff + Earlier.Size) && int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size)) return OverwriteEnd; + + // The other interesting case is if the later store overwrites the end of + // the earlier store + // + // |--earlier--| + // |-- later --| + // + // In this case we may want to trim the size of earlier to avoid generating + // writes to addresses which will definitely be overwritten later + if (EarlierOff >= LaterOff && + EarlierOff < int64_t(LaterOff + Later.Size) && + int64_t(EarlierOff + Earlier.Size) >= int64_t(LaterOff + Later.Size)) { + LaterOff = LaterOff + Later.Size; + return OverwriteStart; + } // Otherwise, they don't completely overlap. return OverwriteUnknown; @@ -589,6 +614,45 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { DepIntrinsic->setLength(TrimmedLength); MadeChange = true; } + } else if (OR == OverwriteStart && isMemset(DepWrite)) { + // TODO: base this on the target vector size so that if the earlier + // store was too small to get vector writes anyway then its likely + // a good idea to shorten it + // Power of 2 vector writes are probably always a bad idea to optimize + // as any store/memset/memcpy is likely using vector instructions so + // shortening it to not vector size is likely to be slower + // TODO: shorten memcpy and memmove by offsetting the source address. + MemIntrinsic* DepIntrinsic = cast(DepWrite); + unsigned DepWriteAlign = DepIntrinsic->getAlignment(); + if (llvm::isPowerOf2_64(InstWriteOffset) || + ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) { + + DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW START: " + << *DepWrite << "\n KILLER (offset " + << InstWriteOffset << ", " + << DepWriteOffset << ", " + << DepLoc.Size << ")" + << *Inst << '\n'); + + Value* DepWriteLength = DepIntrinsic->getLength(); + Value* TrimmedLength = ConstantInt::get(DepWriteLength->getType(), + DepLoc.Size - + (InstWriteOffset - + DepWriteOffset)); + DepIntrinsic->setLength(TrimmedLength); + const TargetData *TD = AA->getTargetData(); + Type *IntPtrTy = TD->getIntPtrType(BB.getContext()); + Value* Offset = ConstantInt::get(IntPtrTy, + InstWriteOffset - DepWriteOffset); + // Offset the start of the memset with a GEP. As the memset type is + // i8* a GEP will do this without needing to use ptrtoint, etc. + Value *Dest = GetElementPtrInst::Create(DepIntrinsic->getRawDest(), + Offset, + "", + DepWrite); + DepIntrinsic->setDest(Dest); + MadeChange = true; + } } } diff --git a/test/Transforms/DeadStoreElimination/OverwriteStoreStart.ll b/test/Transforms/DeadStoreElimination/OverwriteStoreStart.ll new file mode 100644 index 00000000000..56ba3f49270 --- /dev/null +++ b/test/Transforms/DeadStoreElimination/OverwriteStoreStart.ll @@ -0,0 +1,71 @@ +; RUN: opt < %s -basicaa -dse -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +%struct.vec2 = type { <4 x i32>, <4 x i32> } +%struct.vec2plusi = type { <4 x i32>, <4 x i32>, i32 } + +@glob1 = global %struct.vec2 zeroinitializer, align 16 +@glob2 = global %struct.vec2plusi zeroinitializer, align 16 + +define void @write4to8(i32* nocapture %p) nounwind uwtable ssp { +; CHECK: @write4to8 +entry: + %arrayidx0 = getelementptr inbounds i32* %p, i64 1 + %p3 = bitcast i32* %arrayidx0 to i8* +; CHECK: call void @llvm.memset.p0i8.i64(i8* %{{[0-9]+}}, i8 0, i64 24, i32 4, i1 false) + call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false) + %arrayidx1 = getelementptr inbounds i32* %p, i64 1 + store i32 1, i32* %arrayidx1, align 4 + ret void +} + +define void @write4to12(i32* nocapture %p) nounwind uwtable ssp { +; CHECK: @write4to12 +entry: +%arrayidx0 = getelementptr inbounds i32* %p, i64 1 +%p3 = bitcast i32* %arrayidx0 to i8* +; CHECK: call void @llvm.memset.p0i8.i64(i8* %{{[0-9]+}}, i8 0, i64 20, i32 4, i1 false) +call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false) +%arrayidx1 = bitcast i32* %arrayidx0 to i64* +store i64 1, i64* %arrayidx1, align 4 +ret void +} + +define void @write4to8_2(i32* nocapture %p) nounwind uwtable ssp { +; CHECK: @write4to8_2 +entry: +%arrayidx0 = getelementptr inbounds i32* %p, i64 1 +%p3 = bitcast i32* %arrayidx0 to i8* +; CHECK: call void @llvm.memset.p0i8.i64(i8* %{{[0-9]+}}, i8 0, i64 24, i32 4, i1 false) +call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false) +%arrayidx1 = bitcast i32* %p to i64* +store i64 1, i64* %arrayidx1, align 4 +ret void +} + +define void @dontwrite4to6(i32* nocapture %p) nounwind uwtable ssp { +; CHECK: @dontwrite4to6 +entry: +%arrayidx0 = getelementptr inbounds i32* %p, i64 1 +%p3 = bitcast i32* %arrayidx0 to i8* +; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false) +call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false) +%arrayidx1 = bitcast i32* %arrayidx0 to i16* +store i16 1, i16* %arrayidx1, align 4 +ret void +} + +define void @write4to8_neg_gep(i32* nocapture %p) nounwind uwtable ssp { +; CHECK: @write4to8_neg_gep +entry: +%arrayidx0 = getelementptr inbounds i32* %p, i64 -1 +%p3 = bitcast i32* %arrayidx0 to i8* +; CHECK: call void @llvm.memset.p0i8.i64(i8* %{{[0-9]+}}, i8 0, i64 24, i32 4, i1 false) +call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false) +%neg2 = getelementptr inbounds i32* %p, i64 -2 +%arrayidx1 = bitcast i32* %neg2 to i64* +store i64 1, i64* %arrayidx1, align 4 +ret void +} + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind -- 2.11.0