From 3fad2bcd25d5f46d98ea7e41c6654833f197b960 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Sun, 25 Aug 2013 22:33:42 +0000 Subject: [PATCH] [PowerPC] Add fast-isel branch and compare selection. First chunk of actual fast-isel selection code. This handles direct and indirect branches, as well as feeding compares for direct branches. PPCFastISel::PPCEmitIntExt() is just roughed in and will be expanded in a future patch. This also corrects a problem with selection for constant pool entries in JIT mode or with small code model. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189202 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCFastISel.cpp | 281 ++++++++++++++++++++++++++- test/CodeGen/PowerPC/fast-isel-br-const.ll | 43 ++++ test/CodeGen/PowerPC/fast-isel-indirectbr.ll | 15 ++ 3 files changed, 330 insertions(+), 9 deletions(-) create mode 100644 test/CodeGen/PowerPC/fast-isel-br-const.ll create mode 100644 test/CodeGen/PowerPC/fast-isel-indirectbr.ll diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 8cbf1fb6c07..ebc705704d9 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -89,9 +89,19 @@ class PPCFastISel : public FastISel { virtual bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, const LoadInst *LI); virtual bool FastLowerArguments(); + virtual unsigned FastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm); + + // Instruction selection routines. + private: + bool SelectBranch(const Instruction *I); + bool SelectIndirectBr(const Instruction *I); // Utility routines. private: + bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value, + bool isZExt, unsigned DestReg); + bool PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, + unsigned DestReg, bool IsZExt); unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT); unsigned PPCMaterializeInt(const Constant *C, MVT VT); unsigned PPCMaterialize32BitInt(int64_t Imm, @@ -106,10 +116,241 @@ class PPCFastISel : public FastISel { } // end anonymous namespace +static Optional getComparePred(CmpInst::Predicate Pred) { + switch (Pred) { + // These are not representable with any single compare. + case CmpInst::FCMP_FALSE: + case CmpInst::FCMP_UEQ: + case CmpInst::FCMP_UGT: + case CmpInst::FCMP_UGE: + case CmpInst::FCMP_ULT: + case CmpInst::FCMP_ULE: + case CmpInst::FCMP_UNE: + case CmpInst::FCMP_TRUE: + default: + return Optional(); + + case CmpInst::FCMP_OEQ: + case CmpInst::ICMP_EQ: + return PPC::PRED_EQ; + + case CmpInst::FCMP_OGT: + case CmpInst::ICMP_UGT: + case CmpInst::ICMP_SGT: + return PPC::PRED_GT; + + case CmpInst::FCMP_OGE: + case CmpInst::ICMP_UGE: + case CmpInst::ICMP_SGE: + return PPC::PRED_GE; + + case CmpInst::FCMP_OLT: + case CmpInst::ICMP_ULT: + case CmpInst::ICMP_SLT: + return PPC::PRED_LT; + + case CmpInst::FCMP_OLE: + case CmpInst::ICMP_ULE: + case CmpInst::ICMP_SLE: + return PPC::PRED_LE; + + case CmpInst::FCMP_ONE: + case CmpInst::ICMP_NE: + return PPC::PRED_NE; + + case CmpInst::FCMP_ORD: + return PPC::PRED_NU; + + case CmpInst::FCMP_UNO: + return PPC::PRED_UN; + } +} + +// Attempt to fast-select a branch instruction. +bool PPCFastISel::SelectBranch(const Instruction *I) { + const BranchInst *BI = cast(I); + MachineBasicBlock *BrBB = FuncInfo.MBB; + MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; + MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; + + // For now, just try the simplest case where it's fed by a compare. + if (const CmpInst *CI = dyn_cast(BI->getCondition())) { + Optional OptPPCPred = getComparePred(CI->getPredicate()); + if (!OptPPCPred) + return false; + + PPC::Predicate PPCPred = OptPPCPred.getValue(); + + // Take advantage of fall-through opportunities. + if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { + std::swap(TBB, FBB); + PPCPred = PPC::InvertPredicate(PPCPred); + } + + unsigned CondReg = createResultReg(&PPC::CRRCRegClass); + + if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(), + CondReg)) + return false; + + BuildMI(*BrBB, FuncInfo.InsertPt, DL, TII.get(PPC::BCC)) + .addImm(PPCPred).addReg(CondReg).addMBB(TBB); + FastEmitBranch(FBB, DL); + FuncInfo.MBB->addSuccessor(TBB); + return true; + + } else if (const ConstantInt *CI = + dyn_cast(BI->getCondition())) { + uint64_t Imm = CI->getZExtValue(); + MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; + FastEmitBranch(Target, DL); + return true; + } + + // FIXME: ARM looks for a case where the block containing the compare + // has been split from the block containing the branch. If this happens, + // there is a vreg available containing the result of the compare. I'm + // not sure we can do much, as we've lost the predicate information with + // the compare instruction -- we have a 4-bit CR but don't know which bit + // to test here. + return false; +} + +// Attempt to emit a compare of the two source values. Signed and unsigned +// comparisons are supported. Return false if we can't handle it. +bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, + bool IsZExt, unsigned DestReg) { + Type *Ty = SrcValue1->getType(); + EVT SrcEVT = TLI.getValueType(Ty, true); + if (!SrcEVT.isSimple()) + return false; + MVT SrcVT = SrcEVT.getSimpleVT(); + + // See if operand 2 is an immediate encodeable in the compare. + // FIXME: Operands are not in canonical order at -O0, so an immediate + // operand in position 1 is a lost opportunity for now. We are + // similar to ARM in this regard. + long Imm = 0; + bool UseImm = false; + + // Only 16-bit integer constants can be represented in compares for + // PowerPC. Others will be materialized into a register. + if (const ConstantInt *ConstInt = dyn_cast(SrcValue2)) { + if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 || + SrcVT == MVT::i8 || SrcVT == MVT::i1) { + const APInt &CIVal = ConstInt->getValue(); + Imm = (IsZExt) ? (long)CIVal.getZExtValue() : (long)CIVal.getSExtValue(); + if ((IsZExt && isUInt<16>(Imm)) || (!IsZExt && isInt<16>(Imm))) + UseImm = true; + } + } + + unsigned CmpOpc; + bool NeedsExt = false; + switch (SrcVT.SimpleTy) { + default: return false; + case MVT::f32: + CmpOpc = PPC::FCMPUS; + break; + case MVT::f64: + CmpOpc = PPC::FCMPUD; + break; + case MVT::i1: + case MVT::i8: + case MVT::i16: + NeedsExt = true; + // Intentional fall-through. + case MVT::i32: + if (!UseImm) + CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW; + else + CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI; + break; + case MVT::i64: + if (!UseImm) + CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD; + else + CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI; + break; + } + + unsigned SrcReg1 = getRegForValue(SrcValue1); + if (SrcReg1 == 0) + return false; + + unsigned SrcReg2 = 0; + if (!UseImm) { + SrcReg2 = getRegForValue(SrcValue2); + if (SrcReg2 == 0) + return false; + } + + if (NeedsExt) { + unsigned ExtReg = createResultReg(&PPC::GPRCRegClass); + if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt)) + return false; + SrcReg1 = ExtReg; + + if (!UseImm) { + unsigned ExtReg = createResultReg(&PPC::GPRCRegClass); + if (!PPCEmitIntExt(SrcVT, SrcReg2, MVT::i32, ExtReg, IsZExt)) + return false; + SrcReg2 = ExtReg; + } + } + + if (!UseImm) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc), DestReg) + .addReg(SrcReg1).addReg(SrcReg2); + else + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc), DestReg) + .addReg(SrcReg1).addImm(Imm); + + return true; +} + +// Attempt to emit an integer extend of SrcReg into DestReg. Both +// signed and zero extensions are supported. Return false if we +// can't handle it. Not yet implemented. +bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, + unsigned DestReg, bool IsZExt) { + return (SrcVT == MVT::i8 && SrcReg && DestVT == MVT::i8 && DestReg + && IsZExt && false); +} + +// Attempt to fast-select an indirect branch instruction. +bool PPCFastISel::SelectIndirectBr(const Instruction *I) { + unsigned AddrReg = getRegForValue(I->getOperand(0)); + if (AddrReg == 0) + return false; + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::MTCTR8)) + .addReg(AddrReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::BCTR8)); + + const IndirectBrInst *IB = cast(I); + for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i) + FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[IB->getSuccessor(i)]); + + return true; +} + // Attempt to fast-select an instruction that wasn't handled by -// the table-generated machinery. TBD. +// the table-generated machinery. bool PPCFastISel::TargetSelectInstruction(const Instruction *I) { - return I && false; + + switch (I->getOpcode()) { + case Instruction::Br: + return SelectBranch(I); + case Instruction::IndirectBr: + return SelectIndirectBr(I); + // Here add other flavors of Instruction::XXX that automated + // cases don't catch. For example, switches are terminators + // that aren't yet handled. + default: + break; + } + return false; } // Materialize a floating-point constant into a register, and return @@ -131,15 +372,18 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) { MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad, (VT == MVT::f32) ? 4 : 8, Align); - // For small code model, generate a LDtocCPT. - if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) + unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD; + unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); + + // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)). + if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocCPT), - DestReg) - .addConstantPoolIndex(Idx).addReg(PPC::X2).addMemOperand(MMO); - else { + TmpReg) + .addConstantPoolIndex(Idx).addReg(PPC::X2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + .addImm(0).addReg(TmpReg).addMemOperand(MMO); + } else { // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA(X2, Idx)). - unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD; - unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDIStocHA), TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) @@ -312,6 +556,25 @@ bool PPCFastISel::FastLowerArguments() { return false; } +// Handle materializing integer constants into a register. This is not +// automatically generated for PowerPC, so must be explicitly created here. +unsigned PPCFastISel::FastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) { + + if (Opc != ISD::Constant) + return 0; + + if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && + VT != MVT::i8 && VT != MVT::i1) + return 0; + + const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass : + &PPC::GPRCRegClass); + if (VT == MVT::i64) + return PPCMaterialize64BitInt(Imm, RC); + else + return PPCMaterialize32BitInt(Imm, RC); +} + namespace llvm { // Create the fast instruction selector for PowerPC64 ELF. FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo, diff --git a/test/CodeGen/PowerPC/fast-isel-br-const.ll b/test/CodeGen/PowerPC/fast-isel-br-const.ll new file mode 100644 index 00000000000..2cfb8a22574 --- /dev/null +++ b/test/CodeGen/PowerPC/fast-isel-br-const.ll @@ -0,0 +1,43 @@ +; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64 + +define i32 @t1(i32 %a, i32 %b) nounwind uwtable ssp { +entry: +; ELF64: t1 + %x = add i32 %a, %b + br i1 1, label %if.then, label %if.else +; ELF64-NOT: b {{\.?}}LBB0_1 + +if.then: ; preds = %entry + call void @foo1() + br label %if.end7 + +if.else: ; preds = %entry + br i1 0, label %if.then2, label %if.else3 +; ELF64: b {{\.?}}LBB0_4 + +if.then2: ; preds = %if.else + call void @foo2() + br label %if.end6 + +if.else3: ; preds = %if.else + %y = sub i32 %a, %b + br i1 1, label %if.then5, label %if.end +; ELF64-NOT: b {{\.?}}LBB0_5 + +if.then5: ; preds = %if.else3 + call void @foo1() + br label %if.end + +if.end: ; preds = %if.then5, %if.else3 + br label %if.end6 + +if.end6: ; preds = %if.end, %if.then2 + br label %if.end7 + +if.end7: ; preds = %if.end6, %if.then + ret i32 0 +} + +declare void @foo1() + +declare void @foo2() diff --git a/test/CodeGen/PowerPC/fast-isel-indirectbr.ll b/test/CodeGen/PowerPC/fast-isel-indirectbr.ll new file mode 100644 index 00000000000..88ccf918ae9 --- /dev/null +++ b/test/CodeGen/PowerPC/fast-isel-indirectbr.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64 + +define void @t1(i8* %x) { +entry: +; ELF64: t1 + br label %L0 + +L0: + br label %L1 + +L1: + indirectbr i8* %x, [ label %L0, label %L1 ] +; ELF64: mtctr 3 +; ELF64: bctr +} -- 2.11.0