From 15f8060f6f2388d503fcd400e1226c69554c1924 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Tue, 1 Dec 2020 11:07:28 +0300 Subject: [PATCH] [SimplifyCFG] FoldBranchToCommonDest: don't require that cmp of br is last instruction There is no correctness need for that, and since we allow live-out uses, this could theoretically happen, because currently nothing will move the cond to right before the branch in those tests. But regardless, lifting that restriction even makes the transform easier to understand. This makes the transform happen in 81 more cases (+0.55%) ) --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 59 ++++++++-------------- .../SimplifyCFG/fold-branch-to-common-dest.ll | 50 +++++++++--------- 2 files changed, 47 insertions(+), 62 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 35bef13ffaa..3083b692930 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -2766,16 +2766,6 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU, Cond->getParent() != BB || !Cond->hasOneUse()) return Changed; - // Make sure the instruction after the condition is the cond branch. - BasicBlock::iterator CondIt = ++Cond->getIterator(); - - // Ignore dbg intrinsics. - while (isa(CondIt)) - ++CondIt; - - if (&*CondIt != BI) - return Changed; - // Only allow this transformation if computing the condition doesn't involve // too many instructions and these involved instructions can be executed // unconditionally. We denote all involved instructions except the condition @@ -2783,12 +2773,15 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU, // number of the bonus instructions we'll need to create when cloning into // each predecessor does not exceed a certain threshold. unsigned NumBonusInsts = 0; - for (auto I = BB->begin(); Cond != &*I; ++I) { - // Ignore dbg intrinsics. - if (isa(I)) + for (Instruction &I : *BB) { + // Don't check the branch condition comparison itself. + if (&I == Cond) + continue; + // Ignore dbg intrinsics, and the terminator. + if (isa(I) || isa(I)) continue; // I must be safe to execute unconditionally. - if (!isSafeToSpeculativelyExecute(&*I)) + if (!isSafeToSpeculativelyExecute(&I)) return Changed; // Account for the cost of duplicating this instruction into each @@ -2896,13 +2889,15 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU, // Note that there may be multiple predecessor blocks, so we cannot move // bonus instructions to a predecessor block. ValueToValueMapTy VMap; // maps original values to cloned values - // We already make sure Cond is the last instruction before BI. Therefore, - // all instructions before Cond other than DbgInfoIntrinsic are bonus - // instructions. - for (auto BonusInst = BB->begin(); Cond != &*BonusInst; ++BonusInst) { - if (isa(BonusInst)) + Instruction *CondInPred; + for (Instruction &BonusInst : *BB) { + if (isa(BonusInst) || isa(BonusInst)) continue; - Instruction *NewBonusInst = BonusInst->clone(); + + Instruction *NewBonusInst = BonusInst.clone(); + + if (&BonusInst == Cond) + CondInPred = NewBonusInst; // When we fold the bonus instructions we want to make sure we // reset their debug locations in order to avoid stepping on dead @@ -2911,7 +2906,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU, RemapInstruction(NewBonusInst, VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); - VMap[&*BonusInst] = NewBonusInst; + VMap[&BonusInst] = NewBonusInst; // If we moved a load, we cannot any longer claim any knowledge about // its potential value. The previous information might have been valid @@ -2921,9 +2916,9 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU, NewBonusInst->dropUnknownNonDebugMetadata(); PredBlock->getInstList().insert(PBI->getIterator(), NewBonusInst); - NewBonusInst->takeName(&*BonusInst); - BonusInst->setName(BonusInst->getName() + ".old"); - BonusInst->replaceUsesWithIf(NewBonusInst, [BB](Use &U) { + NewBonusInst->takeName(&BonusInst); + BonusInst.setName(BonusInst.getName() + ".old"); + BonusInst.replaceUsesWithIf(NewBonusInst, [BB](Use &U) { auto *User = cast(U.getUser()); // Ignore uses in the same block as the bonus instruction itself. if (User->getParent() == BB) @@ -2937,20 +2932,8 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU, }); } - // Clone Cond into the predecessor basic block, and or/and the - // two conditions together. - Instruction *CondInPred = Cond->clone(); - - // Reset the condition debug location to avoid jumping on dead code - // as the result of folding dead branches. - CondInPred->setDebugLoc(DebugLoc()); - - RemapInstruction(CondInPred, VMap, - RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); - PredBlock->getInstList().insert(PBI->getIterator(), CondInPred); - CondInPred->takeName(Cond); - Cond->setName(CondInPred->getName() + ".old"); - + // Now that the Cond was cloned into the predecessor basic block, + // or/and the two conditions together. if (BI->isConditional()) { Instruction *NewCond = cast( Builder.CreateBinOp(Opc, PBI->getCondition(), CondInPred, "or.cond")); diff --git a/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll b/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll index 3d3c3bbf630..c2a4bc00d41 100644 --- a/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll +++ b/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll @@ -43,9 +43,9 @@ define void @two_preds(i8 %v0, i8 %v1, i8 %v2, i8 %v3) { ; CHECK-NEXT: br i1 [[C0]], label [[PRED0:%.*]], label [[PRED1:%.*]] ; CHECK: pred0: ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[V1:%.*]], 0 -; CHECK-NEXT: [[C3_OLD:%.*]] = icmp eq i8 [[V3:%.*]], 0 -; CHECK-NEXT: [[OR_COND1:%.*]] = or i1 [[C1]], [[C3_OLD]] -; CHECK-NEXT: br i1 [[OR_COND1]], label [[FINAL_LEFT:%.*]], label [[FINAL_RIGHT:%.*]] +; CHECK-NEXT: [[DOTOLD:%.*]] = icmp eq i8 [[V3:%.*]], 0 +; CHECK-NEXT: [[OR_COND2:%.*]] = or i1 [[C1]], [[DOTOLD]] +; CHECK-NEXT: br i1 [[OR_COND2]], label [[FINAL_LEFT:%.*]], label [[FINAL_RIGHT:%.*]] ; CHECK: pred1: ; CHECK-NEXT: [[C2:%.*]] = icmp eq i8 [[V2:%.*]], 0 ; CHECK-NEXT: [[C3:%.*]] = icmp eq i8 [[V3]], 0 @@ -118,9 +118,9 @@ define void @two_preds_with_extra_op(i8 %v0, i8 %v1, i8 %v2, i8 %v3) { ; CHECK: pred0: ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[V1:%.*]], 0 ; CHECK-NEXT: [[DOTOLD:%.*]] = add i8 [[V1]], [[V2:%.*]] -; CHECK-NEXT: [[C3_OLD:%.*]] = icmp eq i8 [[DOTOLD]], 0 -; CHECK-NEXT: [[OR_COND2:%.*]] = or i1 [[C1]], [[C3_OLD]] -; CHECK-NEXT: br i1 [[OR_COND2]], label [[FINAL_LEFT:%.*]], label [[FINAL_RIGHT:%.*]] +; CHECK-NEXT: [[DOTOLD1:%.*]] = icmp eq i8 [[DOTOLD]], 0 +; CHECK-NEXT: [[OR_COND4:%.*]] = or i1 [[C1]], [[DOTOLD1]] +; CHECK-NEXT: br i1 [[OR_COND4]], label [[FINAL_LEFT:%.*]], label [[FINAL_RIGHT:%.*]] ; CHECK: pred1: ; CHECK-NEXT: [[C2:%.*]] = icmp eq i8 [[V2]], 0 ; CHECK-NEXT: [[V3_ADJ:%.*]] = add i8 [[V1]], [[V2]] @@ -198,9 +198,9 @@ define void @two_preds_with_extra_op_multiuse(i8 %v0, i8 %v1, i8 %v2, i8 %v3) { ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[V1:%.*]], 0 ; CHECK-NEXT: [[DOTOLD:%.*]] = add i8 [[V1]], [[V2:%.*]] ; CHECK-NEXT: [[DOTOLD1:%.*]] = add i8 [[DOTOLD]], [[DOTOLD]] -; CHECK-NEXT: [[C3_OLD:%.*]] = icmp eq i8 [[DOTOLD1]], 0 -; CHECK-NEXT: [[OR_COND4:%.*]] = or i1 [[C1]], [[C3_OLD]] -; CHECK-NEXT: br i1 [[OR_COND4]], label [[FINAL_LEFT:%.*]], label [[FINAL_RIGHT:%.*]] +; CHECK-NEXT: [[DOTOLD2:%.*]] = icmp eq i8 [[DOTOLD1]], 0 +; CHECK-NEXT: [[OR_COND6:%.*]] = or i1 [[C1]], [[DOTOLD2]] +; CHECK-NEXT: br i1 [[OR_COND6]], label [[FINAL_LEFT:%.*]], label [[FINAL_RIGHT:%.*]] ; CHECK: pred1: ; CHECK-NEXT: [[C2:%.*]] = icmp eq i8 [[V2]], 0 ; CHECK-NEXT: [[V3_ADJ:%.*]] = add i8 [[V1]], [[V2]] @@ -369,8 +369,8 @@ define void @two_preds_with_extra_op_liveout(i8 %v0, i8 %v1, i8 %v2, i8 %v3) { ; CHECK-NEXT: br i1 [[OR_COND]], label [[FINAL_LEFT]], label [[FINAL_RIGHT:%.*]] ; CHECK: dispatch: ; CHECK-NEXT: [[DOTOLD:%.*]] = add i8 [[V1]], [[V2]] -; CHECK-NEXT: [[C3_OLD:%.*]] = icmp eq i8 [[DOTOLD]], 0 -; CHECK-NEXT: br i1 [[C3_OLD]], label [[FINAL_LEFT]], label [[FINAL_RIGHT]] +; CHECK-NEXT: [[DOTOLD1:%.*]] = icmp eq i8 [[DOTOLD]], 0 +; CHECK-NEXT: br i1 [[DOTOLD1]], label [[FINAL_LEFT]], label [[FINAL_RIGHT]] ; CHECK: final_left: ; CHECK-NEXT: [[MERGE_LEFT:%.*]] = phi i8 [ [[DOTOLD]], [[DISPATCH]] ], [ 0, [[PRED0]] ], [ [[V3_ADJ]], [[PRED1]] ] ; CHECK-NEXT: call void @use8(i8 [[MERGE_LEFT]]) @@ -419,8 +419,8 @@ define void @two_preds_with_extra_op_liveout_multiuse(i8 %v0, i8 %v1, i8 %v2, i8 ; CHECK-NEXT: br i1 [[OR_COND]], label [[FINAL_LEFT]], label [[FINAL_RIGHT:%.*]] ; CHECK: dispatch: ; CHECK-NEXT: [[DOTOLD:%.*]] = add i8 [[V1]], [[V2]] -; CHECK-NEXT: [[C3_OLD:%.*]] = icmp eq i8 [[DOTOLD]], 0 -; CHECK-NEXT: br i1 [[C3_OLD]], label [[FINAL_LEFT]], label [[FINAL_RIGHT]] +; CHECK-NEXT: [[DOTOLD1:%.*]] = icmp eq i8 [[DOTOLD]], 0 +; CHECK-NEXT: br i1 [[DOTOLD1]], label [[FINAL_LEFT]], label [[FINAL_RIGHT]] ; CHECK: final_left: ; CHECK-NEXT: [[MERGE_LEFT:%.*]] = phi i8 [ [[DOTOLD]], [[DISPATCH]] ], [ 0, [[PRED0]] ], [ [[V3_ADJ]], [[PRED1]] ] ; CHECK-NEXT: [[MERGE_LEFT_2:%.*]] = phi i8 [ [[DOTOLD]], [[DISPATCH]] ], [ 42, [[PRED0]] ], [ [[V3_ADJ]], [[PRED1]] ] @@ -541,8 +541,8 @@ define void @two_preds_with_extra_op_externally_used_only(i8 %v0, i8 %v1, i8 %v2 ; CHECK-NEXT: br i1 [[OR_COND]], label [[FINAL_LEFT]], label [[FINAL_RIGHT:%.*]] ; CHECK: dispatch: ; CHECK-NEXT: [[DOTOLD:%.*]] = add i8 [[V1]], [[V2]] -; CHECK-NEXT: [[C3_OLD:%.*]] = icmp eq i8 [[V3]], 0 -; CHECK-NEXT: br i1 [[C3_OLD]], label [[FINAL_LEFT]], label [[FINAL_RIGHT]] +; CHECK-NEXT: [[DOTOLD1:%.*]] = icmp eq i8 [[V3]], 0 +; CHECK-NEXT: br i1 [[DOTOLD1]], label [[FINAL_LEFT]], label [[FINAL_RIGHT]] ; CHECK: final_left: ; CHECK-NEXT: [[MERGE_LEFT:%.*]] = phi i8 [ [[DOTOLD]], [[DISPATCH]] ], [ 0, [[PRED0]] ], [ [[V3_ADJ]], [[PRED1]] ] ; CHECK-NEXT: call void @use8(i8 [[MERGE_LEFT]]) @@ -591,8 +591,8 @@ define void @two_preds_with_extra_op_externally_used_only_multiuse(i8 %v0, i8 %v ; CHECK-NEXT: br i1 [[OR_COND]], label [[FINAL_LEFT]], label [[FINAL_RIGHT:%.*]] ; CHECK: dispatch: ; CHECK-NEXT: [[DOTOLD:%.*]] = add i8 [[V1]], [[V2]] -; CHECK-NEXT: [[C3_OLD:%.*]] = icmp eq i8 [[V3]], 0 -; CHECK-NEXT: br i1 [[C3_OLD]], label [[FINAL_LEFT]], label [[FINAL_RIGHT]] +; CHECK-NEXT: [[DOTOLD1:%.*]] = icmp eq i8 [[V3]], 0 +; CHECK-NEXT: br i1 [[DOTOLD1]], label [[FINAL_LEFT]], label [[FINAL_RIGHT]] ; CHECK: final_left: ; CHECK-NEXT: [[MERGE_LEFT:%.*]] = phi i8 [ [[DOTOLD]], [[DISPATCH]] ], [ 0, [[PRED0]] ], [ [[V3_ADJ]], [[PRED1]] ] ; CHECK-NEXT: [[MERGE_LEFT_2:%.*]] = phi i8 [ [[DOTOLD]], [[DISPATCH]] ], [ 42, [[PRED0]] ], [ [[V3_ADJ]], [[PRED1]] ] @@ -637,11 +637,10 @@ define void @one_pred_with_extra_op_externally_used_only_after_cond_distant_phi( ; CHECK-NEXT: br i1 [[C0]], label [[PRED:%.*]], label [[LEFT_END:%.*]] ; CHECK: pred: ; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[V1:%.*]], 0 -; CHECK-NEXT: br i1 [[C1]], label [[DISPATCH:%.*]], label [[FINAL_RIGHT:%.*]] -; CHECK: dispatch: ; CHECK-NEXT: [[C3:%.*]] = icmp eq i8 [[V3:%.*]], 0 ; CHECK-NEXT: [[V2_ADJ:%.*]] = add i8 [[V4:%.*]], [[V5:%.*]] -; CHECK-NEXT: br i1 [[C3]], label [[FINAL_LEFT:%.*]], label [[FINAL_RIGHT]] +; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[C1]], [[C3]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[FINAL_LEFT:%.*]], label [[FINAL_RIGHT:%.*]] ; CHECK: final_left: ; CHECK-NEXT: call void @sideeffect0() ; CHECK-NEXT: call void @use8(i8 [[V2_ADJ]]) @@ -688,13 +687,16 @@ define void @two_preds_with_extra_op_externally_used_only_after_cond(i8 %v0, i8 ; CHECK-NEXT: br i1 [[C1]], label [[FINAL_LEFT:%.*]], label [[DISPATCH:%.*]] ; CHECK: pred1: ; CHECK-NEXT: [[C2:%.*]] = icmp eq i8 [[V2:%.*]], 0 -; CHECK-NEXT: br i1 [[C2]], label [[DISPATCH]], label [[FINAL_RIGHT:%.*]] -; CHECK: dispatch: ; CHECK-NEXT: [[C3:%.*]] = icmp eq i8 [[V3:%.*]], 0 ; CHECK-NEXT: [[V3_ADJ:%.*]] = add i8 [[V4:%.*]], [[V5:%.*]] -; CHECK-NEXT: br i1 [[C3]], label [[FINAL_LEFT]], label [[FINAL_RIGHT]] +; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[C2]], [[C3]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[FINAL_LEFT]], label [[FINAL_RIGHT:%.*]] +; CHECK: dispatch: +; CHECK-NEXT: [[DOTOLD:%.*]] = icmp eq i8 [[V3]], 0 +; CHECK-NEXT: [[DOTOLD1:%.*]] = add i8 [[V4]], [[V5]] +; CHECK-NEXT: br i1 [[DOTOLD]], label [[FINAL_LEFT]], label [[FINAL_RIGHT]] ; CHECK: final_left: -; CHECK-NEXT: [[MERGE_LEFT:%.*]] = phi i8 [ [[V3_ADJ]], [[DISPATCH]] ], [ 0, [[PRED0]] ] +; CHECK-NEXT: [[MERGE_LEFT:%.*]] = phi i8 [ [[DOTOLD1]], [[DISPATCH]] ], [ 0, [[PRED0]] ], [ [[V3_ADJ]], [[PRED1]] ] ; CHECK-NEXT: call void @use8(i8 [[MERGE_LEFT]]) ; CHECK-NEXT: call void @sideeffect0() ; CHECK-NEXT: ret void -- 2.11.0