From 1db780ba2244edf46f1f6880e5730511a9d5f12d Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Sat, 29 Mar 2014 08:22:29 +0000 Subject: [PATCH] CodeGenPrep: wrangle IR to exploit AArch64 tbz/tbnz inst. Given IR like: %bit = and %val, #imm-with-1-bit-set %tst = icmp %bit, 0 br i1 %tst, label %true, label %false some targets can emit just a single instruction (tbz/tbnz in the AArch64 case). However, with ISel acting at the basic-block level, all three instructions need to be together for this to be possible. This adds another transformation to CodeGenPrep to expose these opportunities, if targets opt in via the hook. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205086 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetLowering.h | 19 +++++++++ lib/CodeGen/CodeGenPrepare.cpp | 80 ++++++++++++++++++++++++++++++++++++ lib/CodeGen/TargetLoweringBase.cpp | 1 + 3 files changed, 100 insertions(+) diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 7f35244f196..dbb0be410a8 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -222,6 +222,21 @@ public: return true; } + /// \brief Return if the target supports combining a + /// chain like: + /// \code + /// %andResult = and %val1, #imm-with-one-bit-set; + /// %icmpResult = icmp %andResult, 0 + /// br i1 %icmpResult, label %dest1, label %dest2 + /// \endcode + /// into a single machine instruction of a form like: + /// \code + /// brOnBitSet %register, #bitNumber, dest + /// \endcode + bool isMaskAndBranchFoldingLegal() const { + return MaskAndBranchFoldingIsLegal; + } + /// Return the ValueType of the result of SETCC operations. Also used to /// obtain the target's preferred type for the condition operand of SELECT and /// BRCOND nodes. In the case of BRCOND the argument passed is MVT::Other @@ -1724,6 +1739,10 @@ protected: /// the branch is usually predicted right. bool PredictableSelectIsExpensive; + /// MaskAndBranchFoldingIsLegal - Indicates if the target supports folding + /// a mask of a single bit, a compare, and a branch into a single instruction. + bool MaskAndBranchFoldingIsLegal; + protected: /// Return true if the value types that can be represented by the specified /// register class are all legal. diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index a3961e80031..72d6ac56362 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -60,6 +60,7 @@ STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); STATISTIC(NumRetsDup, "Number of return instructions duplicated"); STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); STATISTIC(NumSelectsExpanded, "Number of selects turned into branches"); +STATISTIC(NumAndCmpsMoved, "Number of and/cmp's pushed into branches"); static cl::opt DisableBranchOpts( "disable-cgp-branch-opts", cl::Hidden, cl::init(false), @@ -69,6 +70,10 @@ static cl::opt DisableSelectToBranch( "disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion.")); +static cl::opt EnableAndCmpSinking( + "enable-andcmp-sinking", cl::Hidden, cl::init(true), + cl::desc("Enable sinkinig and/cmp into branches.")); + namespace { typedef SmallPtrSet SetOfInstrs; typedef DenseMap InstrToOrigTy; @@ -135,6 +140,7 @@ typedef DenseMap InstrToOrigTy; bool OptimizeShuffleVectorInst(ShuffleVectorInst *SI); bool DupRetToEnableTailCallOpts(BasicBlock *BB); bool PlaceDbgValues(Function &F); + bool sinkAndCmp(Function &F); }; } @@ -190,6 +196,13 @@ bool CodeGenPrepare::runOnFunction(Function &F) { // find a node corresponding to the value. EverMadeChange |= PlaceDbgValues(F); + // If there is a mask, compare against zero, and branch that can be combined + // into a single target instruction, push the mask and compare into branch + // users. Do this before OptimizeBlock -> OptimizeInst -> + // OptimizeCmpExpression, which perturbs the pattern being searched for. + if (!DisableBranchOpts) + EverMadeChange |= sinkAndCmp(F); + bool MadeChange = true; while (MadeChange) { MadeChange = false; @@ -2926,3 +2939,70 @@ bool CodeGenPrepare::PlaceDbgValues(Function &F) { } return MadeChange; } + +// If there is a sequence that branches based on comparing a single bit +// against zero that can be combined into a single instruction, and the +// target supports folding these into a single instruction, sink the +// mask and compare into the branch uses. Do this before OptimizeBlock -> +// OptimizeInst -> OptimizeCmpExpression, which perturbs the pattern being +// searched for. +bool CodeGenPrepare::sinkAndCmp(Function &F) { + if (!EnableAndCmpSinking) + return false; + if (!TLI || !TLI->isMaskAndBranchFoldingLegal()) + return false; + bool MadeChange = false; + for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { + BasicBlock *BB = I++; + + // Does this BB end with the following? + // %andVal = and %val, #single-bit-set + // %icmpVal = icmp %andResult, 0 + // br i1 %cmpVal label %dest1, label %dest2" + BranchInst *Brcc = dyn_cast(BB->getTerminator()); + if (!Brcc || !Brcc->isConditional()) + continue; + ICmpInst *Cmp = dyn_cast(Brcc->getOperand(0)); + if (!Cmp || Cmp->getParent() != BB) + continue; + ConstantInt *Zero = dyn_cast(Cmp->getOperand(1)); + if (!Zero || !Zero->isZero()) + continue; + Instruction *And = dyn_cast(Cmp->getOperand(0)); + if (!And || And->getOpcode() != Instruction::And || And->getParent() != BB) + continue; + ConstantInt* Mask = dyn_cast(And->getOperand(1)); + if (!Mask || !Mask->getUniqueInteger().isPowerOf2()) + continue; + DEBUG(dbgs() << "found and; icmp ?,0; brcc\n"); DEBUG(BB->dump()); + + // Push the "and; icmp" for any users that are conditional branches. + // Since there can only be one branch use per BB, we don't need to keep + // track of which BBs we insert into. + for (Value::use_iterator UI = Cmp->use_begin(), E = Cmp->use_end(); + UI != E; ) { + Use &TheUse = *UI; + // Find brcc use. + BranchInst *BrccUser = dyn_cast(*UI); + ++UI; + if (!BrccUser || !BrccUser->isConditional()) + continue; + BasicBlock *UserBB = BrccUser->getParent(); + if (UserBB == BB) continue; + DEBUG(dbgs() << "found Brcc use\n"); + + // Sink the "and; icmp" to use. + MadeChange = true; + BinaryOperator *NewAnd = + BinaryOperator::CreateAnd(And->getOperand(0), And->getOperand(1), "", + BrccUser); + CmpInst *NewCmp = + CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(), NewAnd, Zero, + "", BrccUser); + TheUse = NewCmp; + ++NumAndCmpsMoved; + DEBUG(BrccUser->getParent()->dump()); + } + } + return MadeChange; +} diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index f1b79691913..d0c02002c3d 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -679,6 +679,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, Pow2DivIsCheap = false; JumpIsExpensive = false; PredictableSelectIsExpensive = false; + MaskAndBranchFoldingIsLegal = false; StackPointerRegisterToSaveRestore = 0; ExceptionPointerRegister = 0; ExceptionSelectorRegister = 0; -- 2.11.0