From: Clement Courbet Date: Wed, 15 May 2019 14:21:59 +0000 (+0000) Subject: Revert r360771 "[MergeICmps] Simplify the code." X-Git-Tag: android-x86-9.0-r1~3396 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=1e3d019b42b75147cd239ba2f434ec3c28822436;p=android-x86%2Fexternal-llvm.git Revert r360771 "[MergeICmps] Simplify the code." Breaks a bunch of builbdots. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@360776 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/Scalar/MergeICmps.cpp b/lib/Transforms/Scalar/MergeICmps.cpp index 82d186250df..9a57ed6c6dc 100644 --- a/lib/Transforms/Scalar/MergeICmps.cpp +++ b/lib/Transforms/Scalar/MergeICmps.cpp @@ -48,7 +48,6 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/Pass.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include #include @@ -407,6 +406,13 @@ class BCECmpChain { First.Rhs().Offset + First.SizeBits() / 8 == Second.Rhs().Offset; } + // Merges the given comparison blocks into one memcmp block and update + // branches. Comparisons are assumed to be continguous. If NextBBInChain is + // null, the merged block will link to the phi block. + void mergeComparisons(ArrayRef Comparisons, + BasicBlock *const NextBBInChain, PHINode &Phi, + const TargetLibraryInfo *const TLI, AliasAnalysis *AA); + PHINode &Phi_; std::vector Comparisons_; // The original entry block (before sorting); @@ -446,7 +452,7 @@ BCECmpChain::BCECmpChain(const std::vector &Blocks, PHINode &Phi, // chain before sorting. Unless we can abort the chain at this point // and start anew. // - // NOTE: we only handle blocks a with single predecessor for now. + // NOTE: we only handle block with single predecessor for now. if (Comparison.canSplit(AA)) { LLVM_DEBUG(dbgs() << "Split initial block '" << Comparison.BB->getName() @@ -534,173 +540,162 @@ void BCECmpChain::dump() const { } #endif // MERGEICMPS_DOT_ON -namespace { +bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI, + AliasAnalysis *AA) { + // First pass to check if there is at least one merge. If not, we don't do + // anything and we keep analysis passes intact. + { + bool AtLeastOneMerged = false; + for (size_t I = 1; I < Comparisons_.size(); ++I) { + if (IsContiguous(Comparisons_[I - 1], Comparisons_[I])) { + AtLeastOneMerged = true; + break; + } + } + if (!AtLeastOneMerged) return false; + } -// A class to compute the name of a set of merged basic blocks. -// This is optimized for the common case of no block names. -class MergedBlockName { - // Storage for the uncommon case of several named blocks. - SmallString<16> Scratch; + // Remove phi references to comparison blocks, they will be rebuilt as we + // merge the blocks. + for (const auto &Comparison : Comparisons_) { + Phi_.removeIncomingValue(Comparison.BB, false); + } -public: - explicit MergedBlockName(ArrayRef Comparisons) - : Name(makeName(Comparisons)) {} - const StringRef Name; + // If entry block is part of the chain, we need to make the first block + // of the chain the new entry block of the function. + BasicBlock *Entry = &Comparisons_[0].BB->getParent()->getEntryBlock(); + for (size_t I = 1; I < Comparisons_.size(); ++I) { + if (Entry == Comparisons_[I].BB) { + BasicBlock *NEntryBB = BasicBlock::Create(Entry->getContext(), "", + Entry->getParent(), Entry); + BranchInst::Create(Entry, NEntryBB); + break; + } + } -private: - StringRef makeName(ArrayRef Comparisons) { - assert(!Comparisons.empty() && "no basic block"); - // Fast path: only one block, or no names at all. - if (Comparisons.size() == 1) - return Comparisons[0].BB->getName(); - const int size = std::accumulate(Comparisons.begin(), Comparisons.end(), 0, - [](int i, const BCECmpBlock &Cmp) { - return i + Cmp.BB->getName().size(); - }); - if (size == 0) - return StringRef("", 0); - - // Slow path: at least two blocks, at least one block with a name. - Scratch.clear(); - // We'll have `size` bytes for name and `Comparisons.size() - 1` bytes for - // separators. - Scratch.reserve(size + Comparisons.size() - 1); - const auto append = [this](StringRef str) { - Scratch.append(str.begin(), str.end()); - }; - append(Comparisons[0].BB->getName()); - for (int I = 1, E = Comparisons.size(); I < E; ++I) { - const BasicBlock *const BB = Comparisons[I].BB; - if (!BB->getName().empty()) { - append("+"); - append(BB->getName()); - } + // Point the predecessors of the chain to the first comparison block (which is + // the new entry point) and update the entry block of the chain. + if (EntryBlock_ != Comparisons_[0].BB) { + EntryBlock_->replaceAllUsesWith(Comparisons_[0].BB); + EntryBlock_ = Comparisons_[0].BB; + } + + // Effectively merge blocks. + int NumMerged = 1; + for (size_t I = 1; I < Comparisons_.size(); ++I) { + if (IsContiguous(Comparisons_[I - 1], Comparisons_[I])) { + ++NumMerged; + } else { + // Merge all previous comparisons and start a new merge block. + mergeComparisons( + makeArrayRef(Comparisons_).slice(I - NumMerged, NumMerged), + Comparisons_[I].BB, Phi_, TLI, AA); + NumMerged = 1; } - return StringRef(Scratch); } -}; -} // namespace - -// Merges the given contiguous comparison blocks into one memcmp block. -static BasicBlock *mergeComparisons(ArrayRef Comparisons, - BasicBlock *const NextCmpBlock, - PHINode &Phi, - const TargetLibraryInfo *const TLI, - AliasAnalysis *AA) { - assert(!Comparisons.empty() && "merging zero comparisons"); - LLVMContext &Context = NextCmpBlock->getContext(); - const BCECmpBlock &FirstCmp = Comparisons[0]; - - // Create a new cmp block before next cmp block. - BasicBlock *const BB = - BasicBlock::Create(Context, MergedBlockName(Comparisons).Name, - NextCmpBlock->getParent(), NextCmpBlock); - IRBuilder<> Builder(BB); - // Add the GEPs from the first BCECmpBlock. - Value *const Lhs = Builder.Insert(FirstCmp.Lhs().GEP->clone()); - Value *const Rhs = Builder.Insert(FirstCmp.Rhs().GEP->clone()); - - Value *IsEqual = nullptr; - if (Comparisons.size() == 1) { - LLVM_DEBUG(dbgs() << "Only one comparison, updating branches\n"); - Value *const LhsLoad = - Builder.CreateLoad(FirstCmp.Lhs().LoadI->getType(), Lhs); - Value *const RhsLoad = - Builder.CreateLoad(FirstCmp.Rhs().LoadI->getType(), Rhs); - // There are no blocks to merge, just do the comparison. - IsEqual = Builder.CreateICmpEQ(LhsLoad, RhsLoad); - } else { - LLVM_DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons\n"); + mergeComparisons(makeArrayRef(Comparisons_) + .slice(Comparisons_.size() - NumMerged, NumMerged), + nullptr, Phi_, TLI, AA); + return true; +} + +void BCECmpChain::mergeComparisons(ArrayRef Comparisons, + BasicBlock *const NextBBInChain, + PHINode &Phi, + const TargetLibraryInfo *const TLI, + AliasAnalysis *AA) { + assert(!Comparisons.empty()); + const auto &FirstComparison = *Comparisons.begin(); + BasicBlock *const BB = FirstComparison.BB; + LLVMContext &Context = BB->getContext(); + + if (Comparisons.size() >= 2) { // If there is one block that requires splitting, we do it now, i.e. // just before we know we will collapse the chain. The instructions // can be executed before any of the instructions in the chain. - const auto ToSplit = - std::find_if(Comparisons.begin(), Comparisons.end(), - [](const BCECmpBlock &B) { return B.RequireSplit; }); - if (ToSplit != Comparisons.end()) { - LLVM_DEBUG(dbgs() << "Splitting non_BCE work to header\n"); - ToSplit->split(BB, AA); - } + auto C = std::find_if(Comparisons.begin(), Comparisons.end(), + [](const BCECmpBlock &B) { return B.RequireSplit; }); + if (C != Comparisons.end()) + C->split(EntryBlock_, AA); - const unsigned TotalSizeBits = std::accumulate( - Comparisons.begin(), Comparisons.end(), 0u, - [](int Size, const BCECmpBlock &C) { return Size + C.SizeBits(); }); - - // Create memcmp() == 0. + LLVM_DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons\n"); + const auto TotalSize = + std::accumulate(Comparisons.begin(), Comparisons.end(), 0, + [](int Size, const BCECmpBlock &C) { + return Size + C.SizeBits(); + }) / + 8; + + // Incoming edges do not need to be updated, and both GEPs are already + // computing the right address, we just need to: + // - replace the two loads and the icmp with the memcmp + // - update the branch + // - update the incoming values in the phi. + FirstComparison.BranchI->eraseFromParent(); + FirstComparison.CmpI->eraseFromParent(); + FirstComparison.Lhs().LoadI->eraseFromParent(); + FirstComparison.Rhs().LoadI->eraseFromParent(); + + IRBuilder<> Builder(BB); const auto &DL = Phi.getModule()->getDataLayout(); Value *const MemCmpCall = emitMemCmp( - Lhs, Rhs, - ConstantInt::get(DL.getIntPtrType(Context), TotalSizeBits / 8), Builder, - DL, TLI); - IsEqual = Builder.CreateICmpEQ( + FirstComparison.Lhs().GEP, FirstComparison.Rhs().GEP, + ConstantInt::get(DL.getIntPtrType(Context), TotalSize), + Builder, DL, TLI); + Value *const MemCmpIsZero = Builder.CreateICmpEQ( MemCmpCall, ConstantInt::get(Type::getInt32Ty(Context), 0)); - } - - BasicBlock *const PhiBB = Phi.getParent(); - // Add a branch to the next basic block in the chain. - if (NextCmpBlock == PhiBB) { - // Continue to phi, passing it the comparison result. - Builder.CreateBr(Phi.getParent()); - Phi.addIncoming(IsEqual, BB); - } else { - // Continue to next block if equal, exit to phi else. - Builder.CreateCondBr(IsEqual, NextCmpBlock, PhiBB); - Phi.addIncoming(ConstantInt::getFalse(Context), BB); - } - return BB; -} -bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI, - AliasAnalysis *AA) { - assert(Comparisons_.size() >= 2 && "simplifying trivial BCECmpChain"); - // First pass to check if there is at least one merge. If not, we don't do - // anything and we keep analysis passes intact. - const auto AtLeastOneMerged = [this]() { - for (size_t I = 1; I < Comparisons_.size(); ++I) { - if (IsContiguous(Comparisons_[I - 1], Comparisons_[I])) - return true; + // Add a branch to the next basic block in the chain. + if (NextBBInChain) { + Builder.CreateCondBr(MemCmpIsZero, NextBBInChain, Phi.getParent()); + Phi.addIncoming(ConstantInt::getFalse(Context), BB); + } else { + Builder.CreateBr(Phi.getParent()); + Phi.addIncoming(MemCmpIsZero, BB); } - return false; - }; - if (!AtLeastOneMerged()) - return false; - // Effectively merge blocks. We go in the reverse direction from the phi block - // so that the next block is always available to branch to. - const auto mergeRange = [this, TLI, AA](int I, int Num, BasicBlock *Next) { - return mergeComparisons(makeArrayRef(Comparisons_).slice(I, Num), Next, - Phi_, TLI, AA); - }; - int NumMerged = 1; - BasicBlock *NextCmpBlock = Phi_.getParent(); - for (int I = static_cast(Comparisons_.size()) - 2; I >= 0; --I) { - if (IsContiguous(Comparisons_[I], Comparisons_[I + 1])) { - ++NumMerged; + // Delete merged blocks. + for (size_t I = 1; I < Comparisons.size(); ++I) { + BasicBlock *CBB = Comparisons[I].BB; + CBB->replaceAllUsesWith(BB); + CBB->eraseFromParent(); + } + } else { + assert(Comparisons.size() == 1); + // There are no blocks to merge, but we still need to update the branches. + LLVM_DEBUG(dbgs() << "Only one comparison, updating branches\n"); + if (NextBBInChain) { + if (FirstComparison.BranchI->isConditional()) { + LLVM_DEBUG(dbgs() << "conditional -> conditional\n"); + // Just update the "true" target, the "false" target should already be + // the phi block. + assert(FirstComparison.BranchI->getSuccessor(1) == Phi.getParent()); + FirstComparison.BranchI->setSuccessor(0, NextBBInChain); + Phi.addIncoming(ConstantInt::getFalse(Context), BB); + } else { + LLVM_DEBUG(dbgs() << "unconditional -> conditional\n"); + // Replace the unconditional branch by a conditional one. + FirstComparison.BranchI->eraseFromParent(); + IRBuilder<> Builder(BB); + Builder.CreateCondBr(FirstComparison.CmpI, NextBBInChain, + Phi.getParent()); + Phi.addIncoming(FirstComparison.CmpI, BB); + } } else { - NextCmpBlock = mergeRange(I + 1, NumMerged, NextCmpBlock); - NumMerged = 1; + if (FirstComparison.BranchI->isConditional()) { + LLVM_DEBUG(dbgs() << "conditional -> unconditional\n"); + // Replace the conditional branch by an unconditional one. + FirstComparison.BranchI->eraseFromParent(); + IRBuilder<> Builder(BB); + Builder.CreateBr(Phi.getParent()); + Phi.addIncoming(FirstComparison.CmpI, BB); + } else { + LLVM_DEBUG(dbgs() << "unconditional -> unconditional\n"); + Phi.addIncoming(FirstComparison.CmpI, BB); + } } } - NextCmpBlock = mergeRange(0, NumMerged, NextCmpBlock); - - // Replace the original cmp chain with the new cmp chain by pointing all - // predecessors of EntryBlock_ to NextCmpBlock instead. This makes all cmp - // blocks in the old chain unreachable. - for (BasicBlock *Pred : predecessors(EntryBlock_)) { - Pred->getTerminator()->replaceUsesOfWith(EntryBlock_, NextCmpBlock); - } - EntryBlock_ = nullptr; - - // Delete merged blocks. This also removes incoming values in phi. - SmallVector DeadBlocks; - for (auto &Cmp : Comparisons_) { - DeadBlocks.push_back(Cmp.BB); - } - DeleteDeadBlocks(DeadBlocks); - - Comparisons_.clear(); - return true; } std::vector getOrderedBlocks(PHINode &Phi, diff --git a/test/CodeGen/PowerPC/memcmp-mergeexpand.ll b/test/CodeGen/PowerPC/memcmp-mergeexpand.ll index 298ce90b74e..c1e81074c86 100644 --- a/test/CodeGen/PowerPC/memcmp-mergeexpand.ll +++ b/test/CodeGen/PowerPC/memcmp-mergeexpand.ll @@ -7,7 +7,7 @@ define zeroext i1 @opeq1( ; PPC64LE-LABEL: opeq1: -; PPC64LE: # %bb.0: # %"entry+land.rhs.i" +; PPC64LE: # %bb.0: # %entry ; PPC64LE-NEXT: ld 3, 0(3) ; PPC64LE-NEXT: ld 4, 0(4) ; PPC64LE-NEXT: xor 3, 3, 4 diff --git a/test/CodeGen/X86/memcmp-mergeexpand.ll b/test/CodeGen/X86/memcmp-mergeexpand.ll index 0be463daaeb..785ba403465 100644 --- a/test/CodeGen/X86/memcmp-mergeexpand.ll +++ b/test/CodeGen/X86/memcmp-mergeexpand.ll @@ -8,7 +8,7 @@ define zeroext i1 @opeq1( ; X86-LABEL: opeq1: -; X86: # %bb.0: # %"entry+land.rhs.i" +; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl (%ecx), %edx @@ -20,7 +20,7 @@ define zeroext i1 @opeq1( ; X86-NEXT: retl ; ; X64-LABEL: opeq1: -; X64: # %bb.0: # %"entry+land.rhs.i" +; X64: # %bb.0: # %entry ; X64-NEXT: movq (%rdi), %rax ; X64-NEXT: cmpq (%rsi), %rax ; X64-NEXT: sete %al diff --git a/test/Transforms/MergeICmps/X86/alias-merge-blocks.ll b/test/Transforms/MergeICmps/X86/alias-merge-blocks.ll index 00c70fba9c9..fa4af66e639 100644 --- a/test/Transforms/MergeICmps/X86/alias-merge-blocks.ll +++ b/test/Transforms/MergeICmps/X86/alias-merge-blocks.ll @@ -5,18 +5,19 @@ define zeroext i1 @opeq1( ; X86-LABEL: @opeq1( -; X86-NEXT: "entry+land.rhs.i+land.rhs.i.2+land.rhs.i.3": +; X86-NEXT: entry: ; X86-NEXT: [[PTR:%.*]] = alloca i32 ; X86-NEXT: store i32 42, i32* [[PTR]] -; X86-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0 -; X86-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0 -; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[TMP0]] to i8* -; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[TMP1]] to i8* +; X86-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0 +; X86-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0 +; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[FIRST_I]] to i8* +; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[FIRST1_I]] to i8* ; X86-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 16) -; X86-NEXT: [[TMP2:%.*]] = icmp eq i32 [[MEMCMP]], 0 +; X86-NEXT: [[TMP0:%.*]] = icmp eq i32 [[MEMCMP]], 0 ; X86-NEXT: br label [[OPEQ1_EXIT:%.*]] ; X86: opeq1.exit: -; X86-NEXT: ret i1 [[TMP2]] +; X86-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[ENTRY:%.*]] ] +; X86-NEXT: ret i1 [[TMP1]] ; %S* nocapture readonly dereferenceable(16) %a, %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 { diff --git a/test/Transforms/MergeICmps/X86/entry-block-shuffled.ll b/test/Transforms/MergeICmps/X86/entry-block-shuffled.ll index 2123b7969c3..f416fa451eb 100644 --- a/test/Transforms/MergeICmps/X86/entry-block-shuffled.ll +++ b/test/Transforms/MergeICmps/X86/entry-block-shuffled.ll @@ -3,37 +3,37 @@ %S = type { i32, i32, i32, i32 } -; The entry block is part of the chain. It however can not be merged. We need to -; make sure that the control flow is still consistent (goes through each of the -; blocks). +; The entry block is part of the chain. It however can not be merged. We need to make the +; first comparison block in the chain the new entry block of the function. define zeroext i1 @opeq1( ; CHECK-LABEL: @opeq1( -; CHECK-NEXT: "land.rhs.i+land.rhs.i.2": -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0 -; CHECK-NEXT: [[CSTR:%.*]] = bitcast i32* [[TMP0]] to i8* -; CHECK-NEXT: [[CSTR3:%.*]] = bitcast i32* [[TMP1]] to i8* -; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR3]], i64 8) -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[MEMCMP]], 0 -; CHECK-NEXT: br i1 [[TMP2]], label [[ENTRY2:%.*]], label [[OPEQ1_EXIT:%.*]] -; CHECK: entry2: -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 2 -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] -; CHECK-NEXT: br i1 [[TMP7]], label [[LAND_RHS_I_31:%.*]], label [[OPEQ1_EXIT]] -; CHECK: land.rhs.i.31: -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 3 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 3 -; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]] -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP10]], [[TMP11]] +; CHECK-NEXT: br label [[LAND_RHS_I:%.*]] +; CHECK: entry: +; CHECK-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 3 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[FIRST_I]], align 4 +; CHECK-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 2 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[FIRST1_I]], align 4 +; CHECK-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: br i1 [[CMP_I]], label [[LAND_RHS_I_3:%.*]], label [[OPEQ1_EXIT:%.*]] +; CHECK: land.rhs.i: +; CHECK-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 0 +; CHECK-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 0 +; CHECK-NEXT: [[CSTR:%.*]] = bitcast i32* [[SECOND_I]] to i8* +; CHECK-NEXT: [[CSTR1:%.*]] = bitcast i32* [[SECOND2_I]] to i8* +; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 8) +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[MEMCMP]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[ENTRY:%.*]], label [[OPEQ1_EXIT]] +; CHECK: land.rhs.i.3: +; CHECK-NEXT: [[FOURTH_I:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[FOURTH_I]], align 4 +; CHECK-NEXT: [[FOURTH2_I:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[FOURTH2_I]], align 4 +; CHECK-NEXT: [[CMP5_I:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]] ; CHECK-NEXT: br label [[OPEQ1_EXIT]] ; CHECK: opeq1.exit: -; CHECK-NEXT: [[TMP13:%.*]] = phi i1 [ [[TMP12]], [[LAND_RHS_I_31]] ], [ false, [[ENTRY2]] ], [ false, %"land.rhs.i+land.rhs.i.2" ] -; CHECK-NEXT: ret i1 [[TMP13]] +; CHECK-NEXT: [[TMP6:%.*]] = phi i1 [ false, [[LAND_RHS_I]] ], [ false, [[ENTRY]] ], [ [[CMP5_I]], [[LAND_RHS_I_3]] ] +; CHECK-NEXT: ret i1 [[TMP6]] ; %S* nocapture readonly dereferenceable(16) %a, %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 { diff --git a/test/Transforms/MergeICmps/X86/multiple-blocks-does-work.ll b/test/Transforms/MergeICmps/X86/multiple-blocks-does-work.ll index 0a75d3bdd01..790c0e9854d 100644 --- a/test/Transforms/MergeICmps/X86/multiple-blocks-does-work.ll +++ b/test/Transforms/MergeICmps/X86/multiple-blocks-does-work.ll @@ -23,18 +23,18 @@ define zeroext i1 @opeq1( ; X86-NEXT: [[TMP3:%.*]] = load i32, i32* [[SECOND2_I]], align 4 ; X86-NEXT: call void (...) @foo() ; X86-NEXT: [[CMP2_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]] -; X86-NEXT: br i1 [[CMP2_I]], label %"land.rhs.i.2+land.rhs.i.3", label [[OPEQ1_EXIT]] -; X86: "land.rhs.i.2+land.rhs.i.3": -; X86-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 2 -; X86-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 2 -; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[TMP4]] to i8* -; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[TMP5]] to i8* +; X86-NEXT: br i1 [[CMP2_I]], label [[LAND_RHS_I_2:%.*]], label [[OPEQ1_EXIT]] +; X86: land.rhs.i.2: +; X86-NEXT: [[THIRD_I:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 2 +; X86-NEXT: [[THIRD2_I:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 2 +; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[THIRD_I]] to i8* +; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[THIRD2_I]] to i8* ; X86-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 8) -; X86-NEXT: [[TMP6:%.*]] = icmp eq i32 [[MEMCMP]], 0 +; X86-NEXT: [[TMP4:%.*]] = icmp eq i32 [[MEMCMP]], 0 ; X86-NEXT: br label [[OPEQ1_EXIT]] ; X86: opeq1.exit: -; X86-NEXT: [[TMP7:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ false, [[LAND_RHS_I]] ], [ [[TMP6]], %"land.rhs.i.2+land.rhs.i.3" ] -; X86-NEXT: ret i1 [[TMP7]] +; X86-NEXT: [[TMP5:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ false, [[LAND_RHS_I]] ], [ [[TMP4]], [[LAND_RHS_I_2]] ] +; X86-NEXT: ret i1 [[TMP5]] ; %S* nocapture readonly dereferenceable(16) %a, %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 { diff --git a/test/Transforms/MergeICmps/X86/pair-int32-int32.ll b/test/Transforms/MergeICmps/X86/pair-int32-int32.ll index 0a6a681e9d9..13f2f4874d8 100644 --- a/test/Transforms/MergeICmps/X86/pair-int32-int32.ll +++ b/test/Transforms/MergeICmps/X86/pair-int32-int32.ll @@ -6,16 +6,17 @@ define zeroext i1 @opeq1( ; X86-LABEL: @opeq1( -; X86-NEXT: "entry+land.rhs.i": -; X86-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0 -; X86-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0 -; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[TMP0]] to i8* -; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[TMP1]] to i8* +; X86-NEXT: entry: +; X86-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0 +; X86-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0 +; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[FIRST_I]] to i8* +; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[FIRST1_I]] to i8* ; X86-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 8) -; X86-NEXT: [[TMP2:%.*]] = icmp eq i32 [[MEMCMP]], 0 +; X86-NEXT: [[TMP0:%.*]] = icmp eq i32 [[MEMCMP]], 0 ; X86-NEXT: br label [[OPEQ1_EXIT:%.*]] ; X86: opeq1.exit: -; X86-NEXT: ret i1 [[TMP2]] +; X86-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[ENTRY:%.*]] ] +; X86-NEXT: ret i1 [[TMP1]] ; ; X86-NOBUILTIN-LABEL: @opeq1( ; X86-NOBUILTIN-NEXT: entry: @@ -66,15 +67,17 @@ opeq1.exit: ; Same as above, but the two blocks are in inverse order. define zeroext i1 @opeq1_inverse( ; X86-LABEL: @opeq1_inverse( -; X86-NEXT: "land.rhs.i+entry": -; X86-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0 -; X86-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0 -; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[TMP0]] to i8* -; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[TMP1]] to i8* +; X86-NEXT: br label [[LAND_RHS_I:%.*]] +; X86: land.rhs.i: +; X86-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0 +; X86-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0 +; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[SECOND_I]] to i8* +; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[SECOND2_I]] to i8* ; X86-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 8) -; X86-NEXT: [[TMP2:%.*]] = icmp eq i32 [[MEMCMP]], 0 +; X86-NEXT: [[TMP1:%.*]] = icmp eq i32 [[MEMCMP]], 0 ; X86-NEXT: br label [[OPEQ1_EXIT:%.*]] ; X86: opeq1.exit: +; X86-NEXT: [[TMP2:%.*]] = phi i1 [ [[TMP1]], [[LAND_RHS_I]] ] ; X86-NEXT: ret i1 [[TMP2]] ; ; X86-NOBUILTIN-LABEL: @opeq1_inverse( diff --git a/test/Transforms/MergeICmps/X86/split-block-does-work.ll b/test/Transforms/MergeICmps/X86/split-block-does-work.ll index 63283edd0ca..91ef9b1622c 100644 --- a/test/Transforms/MergeICmps/X86/split-block-does-work.ll +++ b/test/Transforms/MergeICmps/X86/split-block-does-work.ll @@ -8,17 +8,18 @@ declare void @foo(...) nounwind readnone ; We can split %entry and create a memcmp(16 bytes). define zeroext i1 @opeq1( ; X86-LABEL: @opeq1( -; X86-NEXT: "entry+land.rhs.i+land.rhs.i.2+land.rhs.i.3": +; X86-NEXT: entry: ; X86-NEXT: call void (...) @foo() -; X86-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0 -; X86-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0 -; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[TMP0]] to i8* -; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[TMP1]] to i8* +; X86-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0 +; X86-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0 +; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[FIRST_I]] to i8* +; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[FIRST1_I]] to i8* ; X86-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 16) -; X86-NEXT: [[TMP2:%.*]] = icmp eq i32 [[MEMCMP]], 0 +; X86-NEXT: [[TMP0:%.*]] = icmp eq i32 [[MEMCMP]], 0 ; X86-NEXT: br label [[OPEQ1_EXIT:%.*]] ; X86: opeq1.exit: -; X86-NEXT: ret i1 [[TMP2]] +; X86-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[ENTRY:%.*]] ] +; X86-NEXT: ret i1 [[TMP1]] ; ; Make sure this call is moved to the beginning of the entry block. %S* nocapture readonly dereferenceable(16) %a,