From: Juergen Ributzka Date: Thu, 18 Sep 2014 02:44:13 +0000 (+0000) Subject: [FastISel][AArch64] Fold bit test and branch into TBZ and TBNZ. X-Git-Tag: android-x86-7.1-r4~57419 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=710fc316fbbf0c58cb550064070a602e218f0d9a;p=android-x86%2Fexternal-llvm.git [FastISel][AArch64] Fold bit test and branch into TBZ and TBNZ. Teach selectBranch to fold bit test and branch into a single instruction (TBZ or TBNZ). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218010 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index 347e0364d1d..d25956c7bc4 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -1796,7 +1796,7 @@ static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { /// \brief Check if the comparison against zero and the following branch can be /// folded into a single instruction (CBZ or CBNZ). -static bool canFoldZeroIntoBranch(const CmpInst *CI) { +static bool canFoldZeroCheckIntoBranch(const CmpInst *CI) { CmpInst::Predicate Predicate = CI->getPredicate(); if ((Predicate != CmpInst::ICMP_EQ) && (Predicate != CmpInst::ICMP_NE)) return false; @@ -1854,7 +1854,7 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { } // Try to optimize comparisons against zero. - if (canFoldZeroIntoBranch(CI)) { + if (canFoldZeroCheckIntoBranch(CI)) { const Value *LHS = CI->getOperand(0); const Value *RHS = CI->getOperand(1); @@ -1863,12 +1863,33 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { if (C->isNullValue()) std::swap(LHS, RHS); - static const unsigned OpcTable[2][2] = { - {AArch64::CBZW, AArch64::CBZX }, {AArch64::CBNZW, AArch64::CBNZX} + int TestBit = -1; + if (const auto *AI = dyn_cast(LHS)) + if (AI->getOpcode() == Instruction::And) { + const Value *AndLHS = AI->getOperand(0); + const Value *AndRHS = AI->getOperand(1); + + if (const auto *C = dyn_cast(AndLHS)) + if (C->getValue().isPowerOf2()) + std::swap(AndLHS, AndRHS); + + if (const auto *C = dyn_cast(AndRHS)) + if (C->getValue().isPowerOf2()) { + TestBit = C->getValue().logBase2(); + LHS = AndLHS; + } + } + + static const unsigned OpcTable[2][2][2] = { + { {AArch64::CBZW, AArch64::CBZX }, + {AArch64::CBNZW, AArch64::CBNZX} }, + { {AArch64::TBZW, AArch64::TBZX }, + {AArch64::TBNZW, AArch64::TBNZX} } }; + bool IsBitTest = TestBit != -1; bool IsCmpNE = Predicate == CmpInst::ICMP_NE; bool Is64Bit = LHS->getType()->isIntegerTy(64); - unsigned Opc = OpcTable[IsCmpNE][Is64Bit]; + unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit]; unsigned SrcReg = getRegForValue(LHS); if (!SrcReg) @@ -1876,9 +1897,12 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { bool SrcIsKill = hasTrivialKill(LHS); // Emit the combined compare and branch instruction. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) - .addReg(SrcReg, getKillRegState(SrcIsKill)) - .addMBB(TBB); + MachineInstrBuilder MIB = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) + .addReg(SrcReg, getKillRegState(SrcIsKill)); + if (IsBitTest) + MIB.addImm(TestBit); + MIB.addMBB(TBB); // Obtain the branch weight and add the TrueBB to the successor list. uint32_t BranchWeight = 0; diff --git a/test/CodeGen/AArch64/fast-isel-tbz.ll b/test/CodeGen/AArch64/fast-isel-tbz.ll new file mode 100644 index 00000000000..313abd31829 --- /dev/null +++ b/test/CodeGen/AArch64/fast-isel-tbz.ll @@ -0,0 +1,50 @@ +; RUN: llc -fast-isel -fast-isel-abort -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck %s + +define i32 @icmp_eq_i8(i8 zeroext %a) { +; CHECK-LABEL: icmp_eq_i8 +; CHECK: tbz w0, #0, {{LBB.+_2}} + %1 = and i8 %a, 1 + %2 = icmp eq i8 %1, 0 + br i1 %2, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @icmp_eq_i16(i16 zeroext %a) { +; CHECK-LABEL: icmp_eq_i16 +; CHECK: tbz w0, #1, {{LBB.+_2}} + %1 = and i16 %a, 2 + %2 = icmp eq i16 %1, 0 + br i1 %2, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @icmp_eq_i32(i32 %a) { +; CHECK-LABEL: icmp_eq_i32 +; CHECK: tbz w0, #2, {{LBB.+_2}} + %1 = and i32 %a, 4 + %2 = icmp eq i32 %1, 0 + br i1 %2, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} + +define i32 @icmp_eq_i64(i64 %a) { +; CHECK-LABEL: icmp_eq_i64 +; CHECK: tbz x0, #3, {{LBB.+_2}} + %1 = and i64 %a, 8 + %2 = icmp eq i64 %1, 0 + br i1 %2, label %bb1, label %bb2 +bb2: + ret i32 1 +bb1: + ret i32 0 +} +