X-Git-Url: http://git.osdn.net/view?a=blobdiff_plain;f=lib%2FTarget%2FARM%2FARMFrameLowering.cpp;h=16b54e8848c23328b9555a4e582aa8921608fbf1;hb=e3e43d9d574cf0a829e9a58525372ba0868a3292;hp=831c2eb7984099e09dcb4c511075e9baed73df9e;hpb=6f059afde6e3b689006c7491640fbdedc2dcfdf7;p=android-x86%2Fexternal-llvm.git diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 831c2eb7984..16b54e8848c 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -16,18 +16,51 @@ #include "ARMBaseRegisterInfo.h" #include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" +#include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "MCTargetDesc/ARMBaseInfo.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include +#include +#include +#include +#include + +#define DEBUG_TYPE "arm-frame-lowering" using namespace llvm; @@ -43,23 +76,29 @@ ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti) : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4), STI(sti) {} +bool ARMFrameLowering::noFramePointerElim(const MachineFunction &MF) const { + // iOS always has a FP for backtracking, force other targets to keep their FP + // when doing FastISel. The emitted code is currently superior, and in cases + // like test-suite's lencod FastISel isn't quite correct when FP is eliminated. + return TargetFrameLowering::noFramePointerElim(MF) || + MF.getSubtarget().useFastISel(); +} + /// hasFP - Return true if the specified function should have a dedicated frame /// pointer register. This is true if the function has variable sized allocas /// or if frame pointer elimination is disabled. bool ARMFrameLowering::hasFP(const MachineFunction &MF) const { const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); - // iOS requires FP not to be clobbered for backtracing purpose. - if (STI.isTargetIOS()) + // ABI-required frame pointer. + if (MF.getTarget().Options.DisableFramePointerElim(MF)) return true; - const MachineFrameInfo *MFI = MF.getFrameInfo(); - // Always eliminate non-leaf frame pointers. - return ((MF.getTarget().Options.DisableFramePointerElim(MF) && - MFI->hasCalls()) || - RegInfo->needsStackRealignment(MF) || - MFI->hasVarSizedObjects() || - MFI->isFrameAddressTaken()); + // Frame pointer required for use within this function. + return (RegInfo->needsStackRealignment(MF) || + MFI.hasVarSizedObjects() || + MFI.isFrameAddressTaken()); } /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is @@ -68,8 +107,8 @@ bool ARMFrameLowering::hasFP(const MachineFunction &MF) const { /// add/sub sp brackets around call sites. Returns true if the call frame is /// included as part of the stack frame. bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { - const MachineFrameInfo *FFI = MF.getFrameInfo(); - unsigned CFSize = FFI->getMaxCallFrameSize(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); + unsigned CFSize = MFI.getMaxCallFrameSize(); // It's not always a good idea to include the call frame as part of the // stack frame. ARM (especially Thumb) has small immediate offset to // address the stack frame. So a large call frame can cause poor codegen @@ -77,7 +116,7 @@ bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12 return false; - return !MF.getFrameInfo()->hasVarSizedObjects(); + return !MFI.hasVarSizedObjects(); } /// canSimplifyCallFramePseudos - If there is a reserved call frame, the @@ -86,38 +125,35 @@ bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { /// even when FP is available in Thumb2 mode. bool ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const { - return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects(); + return hasReservedCallFrame(MF) || MF.getFrameInfo().hasVarSizedObjects(); } -static bool isCSRestore(MachineInstr *MI, - const ARMBaseInstrInfo &TII, +static bool isCSRestore(MachineInstr &MI, const ARMBaseInstrInfo &TII, const MCPhysReg *CSRegs) { // Integer spill area is handled with "pop". - if (isPopOpcode(MI->getOpcode())) { + if (isPopOpcode(MI.getOpcode())) { // The first two operands are predicates. The last two are // imp-def and imp-use of SP. Check everything in between. - for (int i = 5, e = MI->getNumOperands(); i != e; ++i) - if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs)) + for (int i = 5, e = MI.getNumOperands(); i != e; ++i) + if (!isCalleeSavedRegister(MI.getOperand(i).getReg(), CSRegs)) return false; return true; } - if ((MI->getOpcode() == ARM::LDR_POST_IMM || - MI->getOpcode() == ARM::LDR_POST_REG || - MI->getOpcode() == ARM::t2LDR_POST) && - isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) && - MI->getOperand(1).getReg() == ARM::SP) + if ((MI.getOpcode() == ARM::LDR_POST_IMM || + MI.getOpcode() == ARM::LDR_POST_REG || + MI.getOpcode() == ARM::t2LDR_POST) && + isCalleeSavedRegister(MI.getOperand(0).getReg(), CSRegs) && + MI.getOperand(1).getReg() == ARM::SP) return true; return false; } -static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, DebugLoc dl, - const ARMBaseInstrInfo &TII, unsigned DestReg, - unsigned SrcReg, int NumBytes, - unsigned MIFlags = MachineInstr::NoFlags, - ARMCC::CondCodes Pred = ARMCC::AL, - unsigned PredReg = 0) { +static void emitRegPlusImmediate( + bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, + const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg, + unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags, + ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) { if (isARM) emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes, Pred, PredReg, TII, MIFlags); @@ -127,7 +163,7 @@ static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB, } static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, DebugLoc dl, + MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags, ARMCC::CondCodes Pred = ARMCC::AL, @@ -136,9 +172,9 @@ static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, MIFlags, Pred, PredReg); } -static int sizeOfSPAdjustment(const MachineInstr *MI) { +static int sizeOfSPAdjustment(const MachineInstr &MI) { int RegSize; - switch (MI->getOpcode()) { + switch (MI.getOpcode()) { case ARM::VSTMDDB_UPD: RegSize = 8; break; @@ -156,20 +192,25 @@ static int sizeOfSPAdjustment(const MachineInstr *MI) { int count = 0; // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+ // pred) so the list starts at 4. - for (int i = MI->getNumOperands() - 1; i >= 4; --i) + for (int i = MI.getNumOperands() - 1; i >= 4; --i) count += RegSize; return count; } static bool WindowsRequiresStackProbe(const MachineFunction &MF, size_t StackSizeInBytes) { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - if (MFI->getStackProtectorIndex() > 0) - return StackSizeInBytes >= 4080; - return StackSizeInBytes >= 4096; + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const Function *F = MF.getFunction(); + unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096; + if (F->hasFnAttribute("stack-probe-size")) + F->getFnAttribute("stack-probe-size") + .getValueAsString() + .getAsInteger(0, StackProbeSize); + return StackSizeInBytes >= StackProbeSize; } namespace { + struct StackAdjustingInsts { struct InstInfo { MachineBasicBlock::iterator I; @@ -186,21 +227,22 @@ struct StackAdjustingInsts { } void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) { - auto Info = std::find_if(Insts.begin(), Insts.end(), - [&](InstInfo &Info) { return Info.I == I; }); + auto Info = + llvm::find_if(Insts, [&](InstInfo &Info) { return Info.I == I; }); assert(Info != Insts.end() && "invalid sp adjusting instruction"); Info->SPAdjust += ExtraBytes; } - void emitDefCFAOffsets(MachineModuleInfo &MMI, MachineBasicBlock &MBB, - DebugLoc dl, const ARMBaseInstrInfo &TII, bool HasFP) { + void emitDefCFAOffsets(MachineBasicBlock &MBB, const DebugLoc &dl, + const ARMBaseInstrInfo &TII, bool HasFP) { + MachineFunction &MF = *MBB.getParent(); unsigned CFAOffset = 0; for (auto &Info : Insts) { if (HasFP && !Info.BeforeFPSet) return; CFAOffset -= Info.SPAdjust; - unsigned CFIIndex = MMI.addFrameInst( + unsigned CFIIndex = MF.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); BuildMI(MBB, std::next(Info.I), dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) @@ -209,30 +251,112 @@ struct StackAdjustingInsts { } } }; + +} // end anonymous namespace + +/// Emit an instruction sequence that will align the address in +/// register Reg by zero-ing out the lower bits. For versions of the +/// architecture that support Neon, this must be done in a single +/// instruction, since skipAlignedDPRCS2Spills assumes it is done in a +/// single instruction. That function only gets called when optimizing +/// spilling of D registers on a core with the Neon instruction set +/// present. +static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, + const TargetInstrInfo &TII, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, const unsigned Reg, + const unsigned Alignment, + const bool MustBeSingleInstruction) { + const ARMSubtarget &AST = + static_cast(MF.getSubtarget()); + const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops(); + const unsigned AlignMask = Alignment - 1; + const unsigned NrBitsToZero = countTrailingZeros(Alignment); + assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported"); + if (!AFI->isThumbFunction()) { + // if the BFC instruction is available, use that to zero the lower + // bits: + // bfc Reg, #0, log2(Alignment) + // otherwise use BIC, if the mask to zero the required number of bits + // can be encoded in the bic immediate field + // bic Reg, Reg, Alignment-1 + // otherwise, emit + // lsr Reg, Reg, log2(Alignment) + // lsl Reg, Reg, log2(Alignment) + if (CanUseBFC) { + BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg) + .addReg(Reg, RegState::Kill) + .addImm(~AlignMask) + .add(predOps(ARMCC::AL)); + } else if (AlignMask <= 255) { + BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg) + .addReg(Reg, RegState::Kill) + .addImm(AlignMask) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); + } else { + assert(!MustBeSingleInstruction && + "Shouldn't call emitAligningInstructions demanding a single " + "instruction to be emitted for large stack alignment for a target " + "without BFC."); + BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg) + .addReg(Reg, RegState::Kill) + .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero)) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); + BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg) + .addReg(Reg, RegState::Kill) + .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero)) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); + } + } else { + // Since this is only reached for Thumb-2 targets, the BFC instruction + // should always be available. + assert(CanUseBFC); + BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg) + .addReg(Reg, RegState::Kill) + .addImm(~AlignMask) + .add(predOps(ARMCC::AL)); + } } -void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); +/// We need the offset of the frame pointer relative to other MachineFrameInfo +/// offsets which are encoded relative to SP at function begin. +/// See also emitPrologue() for how the FP is set up. +/// Unfortunately we cannot determine this value in determineCalleeSaves() yet +/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use +/// this to produce a conservative estimate that we check in an assert() later. +static int getMaxFPOffset(const Function &F, const ARMFunctionInfo &AFI) { + // This is a conservative estimation: Assume the frame pointer being r7 and + // pc("r15") up to r8 getting spilled before (= 8 registers). + return -AFI.getArgRegsSaveSize() - (8 * 4); +} + +void ARMFrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); - MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo(); MachineModuleInfo &MMI = MF.getMMI(); MCContext &Context = MMI.getContext(); const TargetMachine &TM = MF.getTarget(); const MCRegisterInfo *MRI = Context.getRegisterInfo(); - const ARMBaseRegisterInfo *RegInfo = static_cast( - TM.getSubtargetImpl()->getRegisterInfo()); - const ARMBaseInstrInfo &TII = *static_cast( - TM.getSubtargetImpl()->getInstrInfo()); + const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo(); + const ARMBaseInstrInfo &TII = *STI.getInstrInfo(); assert(!AFI->isThumb1OnlyFunction() && "This emitPrologue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); - unsigned Align = - TM.getSubtargetImpl()->getFrameLowering()->getStackAlignment(); - unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); - unsigned NumBytes = MFI->getStackSize(); - const std::vector &CSI = MFI->getCalleeSavedInfo(); - DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + unsigned Align = STI.getFrameLowering()->getStackAlignment(); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); + unsigned NumBytes = MFI.getStackSize(); + const std::vector &CSI = MFI.getCalleeSavedInfo(); + + // Debug location must be unknown since the first debug location is used + // to determine the end of the prologue. + DebugLoc dl; + unsigned FramePtr = RegInfo->getFrameRegister(MF); // Determine the sizes of each callee-save spill areas and record which frame @@ -247,6 +371,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { return; StackAdjustingInsts DefCFAOffsetCandidates; + bool HasFP = hasFP(MF); // Allocate the vararg register save area. if (ArgRegsSaveSize) { @@ -263,6 +388,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes - ArgRegsSaveSize, true); } + DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP); return; } @@ -276,11 +402,11 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { case ARM::R10: case ARM::R11: case ARM::R12: - if (STI.isTargetDarwin()) { + if (STI.splitFramePushPop(MF)) { GPRCS2Size += 4; break; } - // fallthrough + LLVM_FALLTHROUGH; case ARM::R0: case ARM::R1: case ARM::R2: @@ -311,7 +437,6 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { } // Determine starting offsets of spill areas. - bool HasFP = hasFP(MF); unsigned GPRCS1Offset = NumBytes - ArgRegsSaveSize - GPRCS1Size; unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size; unsigned DPRAlign = DPRCSSize ? std::min(8U, Align) : 4U; @@ -319,9 +444,11 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize; int FramePtrOffsetInPush = 0; if (HasFP) { - FramePtrOffsetInPush = - MFI->getObjectOffset(FramePtrSpillFI) + ArgRegsSaveSize; - AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + + int FPOffset = MFI.getObjectOffset(FramePtrSpillFI); + assert(getMaxFPOffset(*MF.getFunction(), *AFI) <= FPOffset && + "Max FP estimation is wrong"); + FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize; + AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) + NumBytes); } AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); @@ -338,7 +465,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { // .cfi_offset operations will reflect that. if (DPRGapSize) { assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs"); - if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, DPRGapSize)) + if (LastPush != MBB.end() && + tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, DPRGapSize)) DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize); else { emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize, @@ -352,7 +480,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { // Since vpush register list cannot have gaps, there may be multiple vpush // instructions in the prologue. while (MBBI->getOpcode() == ARM::VSTMDDB_UPD) { - DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(MBBI)); + DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI)); LastPush = MBBI++; } } @@ -364,7 +492,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { // leaves the stack pointer pointing to the DPRCS2 area. // // Adjust NumBytes to represent the stack slots below the DPRCS2 area. - NumBytes += MFI->getObjectOffset(D8SpillFI); + NumBytes += MFI.getObjectOffset(D8SpillFI); } else NumBytes = DPRCSOffset; @@ -372,9 +500,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { uint32_t NumWords = NumBytes >> 2; if (NumWords < 65536) - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4) - .addImm(NumWords) - .setMIFlags(MachineInstr::FrameSetup)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4) + .addImm(NumWords) + .setMIFlags(MachineInstr::FrameSetup) + .add(predOps(ARMCC::AL)); else BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4) .addImm(NumWords) @@ -386,10 +515,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { case CodeModel::Default: case CodeModel::Kernel: BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL)) - .addImm((unsigned)ARMCC::AL).addReg(0) - .addExternalSymbol("__chkstk") - .addReg(ARM::R4, RegState::Implicit) - .setMIFlags(MachineInstr::FrameSetup); + .add(predOps(ARMCC::AL)) + .addExternalSymbol("__chkstk") + .addReg(ARM::R4, RegState::Implicit) + .setMIFlags(MachineInstr::FrameSetup); break; case CodeModel::Large: case CodeModel::JITDefault: @@ -398,24 +527,26 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { .setMIFlags(MachineInstr::FrameSetup); BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr)) - .addImm((unsigned)ARMCC::AL).addReg(0) - .addReg(ARM::R12, RegState::Kill) - .addReg(ARM::R4, RegState::Implicit) - .setMIFlags(MachineInstr::FrameSetup); + .add(predOps(ARMCC::AL)) + .addReg(ARM::R12, RegState::Kill) + .addReg(ARM::R4, RegState::Implicit) + .setMIFlags(MachineInstr::FrameSetup); break; } - AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), - ARM::SP) - .addReg(ARM::SP, RegState::Define) - .addReg(ARM::R4, RegState::Kill) - .setMIFlags(MachineInstr::FrameSetup))); + BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP) + .addReg(ARM::SP, RegState::Kill) + .addReg(ARM::R4, RegState::Kill) + .setMIFlags(MachineInstr::FrameSetup) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); NumBytes = 0; } if (NumBytes) { // Adjust SP after all the callee-save spills. - if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes)) + if (AFI->getNumAlignedDPRCS2Regs() == 0 && + tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, NumBytes)) DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes); else { emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, @@ -443,13 +574,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { // that push. if (HasFP) { MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push); - unsigned PushSize = sizeOfSPAdjustment(GPRCS1Push); + unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push); emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII, FramePtr, ARM::SP, PushSize + FramePtrOffsetInPush, MachineInstr::FrameSetup); if (FramePtrOffsetInPush + PushSize != 0) { - unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa( + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa( nullptr, MRI->getDwarfRegNum(FramePtr, true), -(ArgRegsSaveSize - FramePtrOffsetInPush))); BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) @@ -457,7 +588,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { .setMIFlags(MachineInstr::FrameSetup); } else { unsigned CFIIndex = - MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister( + MF.addFrameInst(MCCFIInstruction::createDefCfaRegister( nullptr, MRI->getDwarfRegNum(FramePtr, true))); BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) @@ -480,9 +611,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { case ARM::R10: case ARM::R11: case ARM::R12: - if (STI.isTargetDarwin()) + if (STI.splitFramePushPop(MF)) break; - // fallthrough + LLVM_FALLTHROUGH; case ARM::R0: case ARM::R1: case ARM::R2: @@ -492,8 +623,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { case ARM::R6: case ARM::R7: case ARM::LR: - CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( - nullptr, MRI->getDwarfRegNum(Reg, true), MFI->getObjectOffset(FI))); + CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); @@ -513,10 +644,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { case ARM::R10: case ARM::R11: case ARM::R12: - if (STI.isTargetDarwin()) { + if (STI.splitFramePushPop(MF)) { unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); - unsigned Offset = MFI->getObjectOffset(FI); - unsigned CFIIndex = MMI.addFrameInst( + unsigned Offset = MFI.getObjectOffset(FI); + unsigned CFIIndex = MF.addFrameInst( MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) @@ -537,8 +668,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { if ((Reg >= ARM::D0 && Reg <= ARM::D31) && (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) { unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); - unsigned Offset = MFI->getObjectOffset(FI); - unsigned CFIIndex = MMI.addFrameInst( + unsigned Offset = MFI.getObjectOffset(FI); + unsigned CFIIndex = MF.addFrameInst( MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) @@ -551,11 +682,11 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { // throughout the process. If we have a frame pointer, it takes over the job // half-way through, so only the first few .cfi_def_cfa_offset instructions // actually get emitted. - DefCFAOffsetCandidates.emitDefCFAOffsets(MMI, MBB, dl, TII, HasFP); + DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP); if (STI.isTargetELF() && hasFP(MF)) - MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - - AFI->getFramePtrSpillOffset()); + MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() - + AFI->getFramePtrSpillOffset()); AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); @@ -567,29 +698,27 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { // If aligned NEON registers were spilled, the stack has already been // realigned. if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) { - unsigned MaxAlign = MFI->getMaxAlignment(); - assert (!AFI->isThumb1OnlyFunction()); + unsigned MaxAlign = MFI.getMaxAlignment(); + assert(!AFI->isThumb1OnlyFunction()); if (!AFI->isThumbFunction()) { - // Emit bic sp, sp, MaxAlign - AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, - TII.get(ARM::BICri), ARM::SP) - .addReg(ARM::SP, RegState::Kill) - .addImm(MaxAlign-1))); + emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign, + false); } else { - // We cannot use sp as source/dest register here, thus we're emitting the - // following sequence: + // We cannot use sp as source/dest register here, thus we're using r4 to + // perform the calculations. We're emitting the following sequence: // mov r4, sp - // bic r4, r4, MaxAlign + // -- use emitAligningInstructions to produce best sequence to zero + // -- out lower bits in r4 // mov sp, r4 // FIXME: It will be better just to find spare register here. - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4) - .addReg(ARM::SP, RegState::Kill)); - AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, - TII.get(ARM::t2BICri), ARM::R4) - .addReg(ARM::R4, RegState::Kill) - .addImm(MaxAlign-1))); - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) - .addReg(ARM::R4, RegState::Kill)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4) + .addReg(ARM::SP, RegState::Kill) + .add(predOps(ARMCC::AL)); + emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign, + false); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) + .addReg(ARM::R4, RegState::Kill) + .add(predOps(ARMCC::AL)); } AFI->setShouldRestoreSPFromFP(true); @@ -602,78 +731,26 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { // FIXME: Clarify FrameSetup flags here. if (RegInfo->hasBasePointer(MF)) { if (isARM) - BuildMI(MBB, MBBI, dl, - TII.get(ARM::MOVr), RegInfo->getBaseRegister()) - .addReg(ARM::SP) - .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); + BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), RegInfo->getBaseRegister()) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); else - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), - RegInfo->getBaseRegister()) - .addReg(ARM::SP)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), RegInfo->getBaseRegister()) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)); } // If the frame has variable sized objects then the epilogue must restore // the sp from fp. We can assume there's an FP here since hasFP already // checks for hasVarSizedObjects. - if (MFI->hasVarSizedObjects()) + if (MFI.hasVarSizedObjects()) AFI->setShouldRestoreSPFromFP(true); } -// Resolve TCReturn pseudo-instruction -void ARMFrameLowering::fixTCReturn(MachineFunction &MF, - MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); - assert(MBBI->isReturn() && "Can only insert epilog into returning blocks"); - unsigned RetOpcode = MBBI->getOpcode(); - DebugLoc dl = MBBI->getDebugLoc(); - const ARMBaseInstrInfo &TII = - *static_cast(MF.getSubtarget().getInstrInfo()); - - if (!(RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri)) - return; - - // Tail call return: adjust the stack pointer and jump to callee. - MBBI = MBB.getLastNonDebugInstr(); - MachineOperand &JumpTarget = MBBI->getOperand(0); - - // Jump to label or value in register. - if (RetOpcode == ARM::TCRETURNdi) { - unsigned TCOpcode = STI.isThumb() ? - (STI.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) : - ARM::TAILJMPd; - MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode)); - if (JumpTarget.isGlobal()) - MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), - JumpTarget.getTargetFlags()); - else { - assert(JumpTarget.isSymbol()); - MIB.addExternalSymbol(JumpTarget.getSymbolName(), - JumpTarget.getTargetFlags()); - } - - // Add the default predicate in Thumb mode. - if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0); - } else if (RetOpcode == ARM::TCRETURNri) { - BuildMI(MBB, MBBI, dl, - TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)). - addReg(JumpTarget.getReg(), RegState::Kill); - } - - MachineInstr *NewMI = std::prev(MBBI); - for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i) - NewMI->addOperand(MBBI->getOperand(i)); - - // Delete the pseudo instruction TCRETURN. - MBB.erase(MBBI); - MBBI = NewMI; -} - void ARMFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); - assert(MBBI->isReturn() && "Can only insert epilog into returning blocks"); - DebugLoc dl = MBBI->getDebugLoc(); - MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo(); const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); const ARMBaseInstrInfo &TII = @@ -682,20 +759,18 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, "This emitEpilogue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); - unsigned Align = MF.getTarget() - .getSubtargetImpl() - ->getFrameLowering() - ->getStackAlignment(); - unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); - int NumBytes = (int)MFI->getStackSize(); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); + int NumBytes = (int)MFI.getStackSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. - if (MF.getFunction()->getCallingConv() == CallingConv::GHC) { - fixTCReturn(MF, MBB); + if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; - } + + // First put ourselves on the first (from top) terminator instructions. + MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); + DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); if (!AFI->hasStackFrame()) { if (NumBytes - ArgRegsSaveSize != 0) @@ -706,8 +781,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, if (MBBI != MBB.begin()) { do { --MBBI; - } while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs)); - if (!isCSRestore(MBBI, TII, CSRegs)) + } while (MBBI != MBB.begin() && isCSRestore(*MBBI, TII, CSRegs)); + if (!isCSRestore(*MBBI, TII, CSRegs)) ++MBBI; } @@ -734,34 +809,36 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, // This is bad, if an interrupt is taken after the mov, sp is in an // inconsistent state. // Use the first callee-saved register as a scratch register. - assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) && + assert(!MFI.getPristineRegs(MF).test(ARM::R4) && "No scratch register to restore SP from FP!"); emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, ARMCC::AL, 0, TII); - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), - ARM::SP) - .addReg(ARM::R4)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) + .addReg(ARM::R4) + .add(predOps(ARMCC::AL)); } } else { // Thumb2 or ARM. if (isARM) BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP) - .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); + .addReg(FramePtr) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); else - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), - ARM::SP) - .addReg(FramePtr)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) + .addReg(FramePtr) + .add(predOps(ARMCC::AL)); } } else if (NumBytes && - !tryFoldSPUpdateIntoPushPop(STI, MF, MBBI, NumBytes)) - emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); + !tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes)) + emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); // Increment past our save areas. - if (AFI->getDPRCalleeSavedAreaSize()) { + if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) { MBBI++; // Since vpop register list cannot have gaps, there may be multiple vpop // instructions in the epilogue. - while (MBBI->getOpcode() == ARM::VLDMDIA_UPD) + while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD) MBBI++; } if (AFI->getDPRCalleeSavedGapSize()) { @@ -774,8 +851,6 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, if (AFI->getGPRCalleeSavedArea1Size()) MBBI++; } - fixTCReturn(MF, MBB); - if (ArgRegsSaveSize) emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize); } @@ -794,13 +869,13 @@ int ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg, int SPAdj) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); const ARMBaseRegisterInfo *RegInfo = static_cast( MF.getSubtarget().getRegisterInfo()); const ARMFunctionInfo *AFI = MF.getInfo(); - int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize(); + int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize(); int FPOffset = Offset - AFI->getFramePtrSpillOffset(); - bool isFixed = MFI->isFixedObjectIndex(FI); + bool isFixed = MFI.isFixedObjectIndex(FI); FrameReg = ARM::SP; Offset += SPAdj; @@ -812,7 +887,7 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, // When dynamically realigning the stack, use the frame pointer for // parameters, and the stack/base pointer for locals. if (RegInfo->needsStackRealignment(MF)) { - assert (hasFP(MF) && "dynamic stack realignment without a FP!"); + assert(hasFP(MF) && "dynamic stack realignment without a FP!"); if (isFixed) { FrameReg = RegInfo->getFrameRegister(MF); Offset = FPOffset; @@ -866,12 +941,6 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, return Offset; } -int ARMFrameLowering::getFrameIndexOffset(const MachineFunction &MF, - int FI) const { - unsigned FrameReg; - return getFrameIndexReference(MF, FI, FrameReg); -} - void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, @@ -882,67 +951,71 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); DebugLoc DL; - if (MI != MBB.end()) DL = MI->getDebugLoc(); - SmallVector, 4> Regs; + typedef std::pair RegAndKill; + SmallVector Regs; unsigned i = CSI.size(); while (i != 0) { unsigned LastReg = 0; for (; i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - if (!(Func)(Reg, STI.isTargetDarwin())) continue; + if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue; // D-registers in the aligned area DPRCS2 are NOT spilled here. if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) continue; - // Add the callee-saved register as live-in unless it's LR and - // @llvm.returnaddress is called. If LR is returned for - // @llvm.returnaddress then it's already added to the function and - // entry block live-in sets. - bool isKill = true; - if (Reg == ARM::LR) { - if (MF.getFrameInfo()->isReturnAddressTaken() && - MF.getRegInfo().isLiveIn(Reg)) - isKill = false; - } - - if (isKill) + const MachineRegisterInfo &MRI = MF.getRegInfo(); + bool isLiveIn = MRI.isLiveIn(Reg); + if (!isLiveIn && !MRI.isReserved(Reg)) MBB.addLiveIn(Reg); - // If NoGap is true, push consecutive registers and then leave the rest // for other instructions. e.g. // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11} if (NoGap && LastReg && LastReg != Reg-1) break; LastReg = Reg; - Regs.push_back(std::make_pair(Reg, isKill)); + // Do not set a kill flag on values that are also marked as live-in. This + // happens with the @llvm-returnaddress intrinsic and with arguments + // passed in callee saved registers. + // Omitting the kill flags is conservatively correct even if the live-in + // is not used after all. + Regs.push_back(std::make_pair(Reg, /*isKill=*/!isLiveIn)); } if (Regs.empty()) continue; + + std::sort(Regs.begin(), Regs.end(), [&](const RegAndKill &LHS, + const RegAndKill &RHS) { + return TRI.getEncodingValue(LHS.first) < TRI.getEncodingValue(RHS.first); + }); + if (Regs.size() > 1 || StrOpc== 0) { - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP) - .addReg(ARM::SP).setMIFlags(MIFlags)); + MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP) + .addReg(ARM::SP) + .setMIFlags(MIFlags) + .add(predOps(ARMCC::AL)); for (unsigned i = 0, e = Regs.size(); i < e; ++i) MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second)); } else if (Regs.size() == 1) { - MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc), - ARM::SP) - .addReg(Regs[0].first, getKillRegState(Regs[0].second)) - .addReg(ARM::SP).setMIFlags(MIFlags) - .addImm(-4); - AddDefaultPred(MIB); + BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP) + .addReg(Regs[0].first, getKillRegState(Regs[0].second)) + .addReg(ARM::SP) + .setMIFlags(MIFlags) + .addImm(-4) + .add(predOps(ARMCC::AL)); } Regs.clear(); // Put any subsequent vpush instructions before this one: they will refer to // higher register numbers so need to be pushed first in order to preserve // monotonicity. - --MI; + if (MI != MBB.begin()) + --MI; } } @@ -955,13 +1028,22 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, unsigned NumAlignedDPRCS2Regs) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); ARMFunctionInfo *AFI = MF.getInfo(); - DebugLoc DL = MI->getDebugLoc(); - unsigned RetOpcode = MI->getOpcode(); - bool isTailCall = (RetOpcode == ARM::TCRETURNdi || - RetOpcode == ARM::TCRETURNri); - bool isInterrupt = - RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR; + DebugLoc DL; + bool isTailCall = false; + bool isInterrupt = false; + bool isTrap = false; + if (MBB.end() != MI) { + DL = MI->getDebugLoc(); + unsigned RetOpcode = MI->getOpcode(); + isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri); + isInterrupt = + RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR; + isTrap = + RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl || + RetOpcode == ARM::tTRAP; + } SmallVector Regs; unsigned i = CSI.size(); @@ -970,18 +1052,21 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, bool DeleteRet = false; for (; i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - if (!(Func)(Reg, STI.isTargetDarwin())) continue; + if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue; // The aligned reloads from area DPRCS2 are not inserted here. if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) continue; if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt && - STI.hasV5TOps()) { - Reg = ARM::PC; - LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET; + !isTrap && STI.hasV5TOps()) { + if (MBB.succ_empty()) { + Reg = ARM::PC; + DeleteRet = true; + LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET; + } else + LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD; // Fold the return instruction into the LDM. - DeleteRet = true; } // If NoGap is true, pop consecutive registers and then leave the rest @@ -996,14 +1081,19 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, if (Regs.empty()) continue; + + std::sort(Regs.begin(), Regs.end(), [&](unsigned LHS, unsigned RHS) { + return TRI.getEncodingValue(LHS) < TRI.getEncodingValue(RHS); + }); + if (Regs.size() > 1 || LdrOpc == 0) { - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP) - .addReg(ARM::SP)); + MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)); for (unsigned i = 0, e = Regs.size(); i < e; ++i) MIB.addReg(Regs[i], getDefRegState(true)); - if (DeleteRet) { - MIB.copyImplicitOps(&*MI); + if (DeleteRet && MI != MBB.end()) { + MIB.copyImplicitOps(*MI); MI->eraseFromParent(); } MI = MIB; @@ -1023,13 +1113,14 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift)); } else MIB.addImm(4); - AddDefaultPred(MIB); + MIB.add(predOps(ARMCC::AL)); } Regs.clear(); // Put any subsequent vpop instructions after this one: they will refer to // higher register numbers so need to be popped afterwards. - ++MI; + if (MI != MBB.end()) + ++MI; } } @@ -1043,16 +1134,16 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) { MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo(); - DebugLoc DL = MI->getDebugLoc(); + DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); // Mark the D-register spill slots as properly aligned. Since MFI computes // stack slot layout backwards, this can actually mean that the d-reg stack // slot offsets can be wrong. The offset for d8 will always be correct. for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned DNum = CSI[i].getReg() - ARM::D8; - if (DNum >= 8) + if (DNum > NumAlignedDPRCS2Regs - 1) continue; int FI = CSI[i].getFrameIdx(); // The even-numbered registers will be 16-byte aligned, the odd-numbered @@ -1083,16 +1174,19 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, // sub r4, sp, #numregs * 8 // The immediate is <= 64, so it doesn't need any special encoding. unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri; - AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) - .addReg(ARM::SP) - .addImm(8 * NumAlignedDPRCS2Regs))); - - // bic r4, r4, #align-1 - Opc = isThumb ? ARM::t2BICri : ARM::BICri; - unsigned MaxAlign = MF.getFrameInfo()->getMaxAlignment(); - AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) - .addReg(ARM::R4, RegState::Kill) - .addImm(MaxAlign - 1))); + BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) + .addReg(ARM::SP) + .addImm(8 * NumAlignedDPRCS2Regs) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); + + unsigned MaxAlign = MF.getFrameInfo().getMaxAlignment(); + // We must set parameter MustBeSingleInstruction to true, since + // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform + // stack alignment. Luckily, this can always be done since all ARM + // architecture versions that support Neon also support the BFC + // instruction. + emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true); // mov sp, r4 // The stack pointer must be adjusted before spilling anything, otherwise @@ -1100,10 +1194,10 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, // Leave r4 live, it is used below. Opc = isThumb ? ARM::tMOVr : ARM::MOVr; MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP) - .addReg(ARM::R4); - MIB = AddDefaultPred(MIB); + .addReg(ARM::R4) + .add(predOps(ARMCC::AL)); if (!isThumb) - AddDefaultCC(MIB); + MIB.add(condCodeOp()); // Now spill NumAlignedDPRCS2Regs registers starting from d8. // r4 holds the stack slot address. @@ -1115,11 +1209,12 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QQPRRegClass); MBB.addLiveIn(SupReg); - AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), - ARM::R4) - .addReg(ARM::R4, RegState::Kill).addImm(16) - .addReg(NextReg) - .addReg(SupReg, RegState::ImplicitKill)); + BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ARM::R4) + .addReg(ARM::R4, RegState::Kill) + .addImm(16) + .addReg(NextReg) + .addReg(SupReg, RegState::ImplicitKill) + .add(predOps(ARMCC::AL)); NextReg += 4; NumAlignedDPRCS2Regs -= 4; } @@ -1133,9 +1228,12 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QQPRRegClass); MBB.addLiveIn(SupReg); - AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q)) - .addReg(ARM::R4).addImm(16).addReg(NextReg) - .addReg(SupReg, RegState::ImplicitKill)); + BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q)) + .addReg(ARM::R4) + .addImm(16) + .addReg(NextReg) + .addReg(SupReg, RegState::ImplicitKill) + .add(predOps(ARMCC::AL)); NextReg += 4; NumAlignedDPRCS2Regs -= 4; } @@ -1145,8 +1243,11 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QPRRegClass); MBB.addLiveIn(SupReg); - AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64)) - .addReg(ARM::R4).addImm(16).addReg(SupReg)); + BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64)) + .addReg(ARM::R4) + .addImm(16) + .addReg(SupReg) + .add(predOps(ARMCC::AL)); NextReg += 2; NumAlignedDPRCS2Regs -= 2; } @@ -1155,9 +1256,11 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, if (NumAlignedDPRCS2Regs) { MBB.addLiveIn(NextReg); // vstr.64 uses addrmode5 which has an offset scale of 4. - AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD)) - .addReg(NextReg) - .addReg(ARM::R4).addImm((NextReg-R4BaseReg)*2)); + BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD)) + .addReg(NextReg) + .addReg(ARM::R4) + .addImm((NextReg - R4BaseReg) * 2) + .add(predOps(ARMCC::AL)); } // The last spill instruction inserted should kill the scratch register r4. @@ -1202,7 +1305,7 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) { MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo(); - DebugLoc DL = MI->getDebugLoc(); + DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); // Find the frame index assigned to d8. @@ -1222,8 +1325,11 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1"); unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri; - AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) - .addFrameIndex(D8SpillFI).addImm(0))); + BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) + .addFrameIndex(D8SpillFI) + .addImm(0) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); // Now restore NumAlignedDPRCS2Regs registers starting from d8. unsigned NextReg = ARM::D8; @@ -1232,10 +1338,12 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, if (NumAlignedDPRCS2Regs >= 6) { unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QQPRRegClass); - AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg) - .addReg(ARM::R4, RegState::Define) - .addReg(ARM::R4, RegState::Kill).addImm(16) - .addReg(SupReg, RegState::ImplicitDefine)); + BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg) + .addReg(ARM::R4, RegState::Define) + .addReg(ARM::R4, RegState::Kill) + .addImm(16) + .addReg(SupReg, RegState::ImplicitDefine) + .add(predOps(ARMCC::AL)); NextReg += 4; NumAlignedDPRCS2Regs -= 4; } @@ -1248,9 +1356,11 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, if (NumAlignedDPRCS2Regs >= 4) { unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QQPRRegClass); - AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg) - .addReg(ARM::R4).addImm(16) - .addReg(SupReg, RegState::ImplicitDefine)); + BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg) + .addReg(ARM::R4) + .addImm(16) + .addReg(SupReg, RegState::ImplicitDefine) + .add(predOps(ARMCC::AL)); NextReg += 4; NumAlignedDPRCS2Regs -= 4; } @@ -1259,16 +1369,20 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, if (NumAlignedDPRCS2Regs >= 2) { unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QPRRegClass); - AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg) - .addReg(ARM::R4).addImm(16)); + BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg) + .addReg(ARM::R4) + .addImm(16) + .add(predOps(ARMCC::AL)); NextReg += 2; NumAlignedDPRCS2Regs -= 2; } // Finally, use a vanilla vldr.64 for the remaining odd register. if (NumAlignedDPRCS2Regs) - AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg) - .addReg(ARM::R4).addImm(2*(NextReg-R4BaseReg))); + BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg) + .addReg(ARM::R4) + .addImm(2 * (NextReg - R4BaseReg)) + .add(predOps(ARMCC::AL)); // Last store kills r4. std::prev(MI)->addRegisterKilled(ARM::R4, TRI); @@ -1341,7 +1455,7 @@ static unsigned GetFunctionSizeInBytes(const MachineFunction &MF, unsigned FnSize = 0; for (auto &MBB : MF) { for (auto &MI : MBB) - FnSize += TII.GetInstSizeInBytes(&MI); + FnSize += TII.getInstSizeInBytes(MI); } return FnSize; } @@ -1403,31 +1517,27 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF, // callee-saved vector registers after realigning the stack. The vst1 and vld1 // instructions take alignment hints that can improve performance. // -static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) { +static void +checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) { MF.getInfo()->setNumAlignedDPRCS2Regs(0); if (!SpillAlignedNEONRegs) return; // Naked functions don't spill callee-saved registers. - if (MF.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::Naked)) + if (MF.getFunction()->hasFnAttribute(Attribute::Naked)) return; // We are planning to use NEON instructions vst1 / vld1. - if (!MF.getTarget().getSubtarget().hasNEON()) + if (!static_cast(MF.getSubtarget()).hasNEON()) return; // Don't bother if the default stack alignment is sufficiently high. - if (MF.getTarget() - .getSubtargetImpl() - ->getFrameLowering() - ->getStackAlignment() >= 8) + if (MF.getSubtarget().getFrameLowering()->getStackAlignment() >= 8) return; // Aligned spills require stack realignment. - const ARMBaseRegisterInfo *RegInfo = static_cast( - MF.getSubtarget().getRegisterInfo()); - if (!RegInfo->canRealignStack(MF)) + if (!static_cast( + MF.getSubtarget().getRegisterInfo())->canRealignStack(MF)) return; // We always spill contiguous d-registers starting from d8. Count how many @@ -1435,10 +1545,9 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) { // callee-saved registers in order, but it can happen that there are holes in // the range. Registers above the hole will be spilled to the standard DPRCS // area. - MachineRegisterInfo &MRI = MF.getRegInfo(); unsigned NumSpills = 0; for (; NumSpills < 8; ++NumSpills) - if (!MRI.isPhysRegUsed(ARM::D8 + NumSpills)) + if (!SavedRegs.test(ARM::D8 + NumSpills)) break; // Don't do this for just one d-register. It's not worth it. @@ -1449,12 +1558,13 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) { MF.getInfo()->setNumAlignedDPRCS2Regs(NumSpills); // A scratch register is required for the vst1 / vld1 instructions. - MF.getRegInfo().setPhysRegUsed(ARM::R4); + SavedRegs.set(ARM::R4); } -void -ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { +void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, + BitVector &SavedRegs, + RegScavenger *RS) const { + TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); // This tells PEI to spill the FP as if it is any other callee-save register // to take advantage the eliminateFrameIndex machinery. This also ensures it // is spilled in the order specified by getCalleeSavedRegs() to make it easier @@ -1463,6 +1573,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, bool CS1Spilled = false; bool LRSpilled = false; unsigned NumGPRSpills = 0; + unsigned NumFPRSpills = 0; SmallVector UnspilledCS1GPRs; SmallVector UnspilledCS2GPRs; const ARMBaseRegisterInfo *RegInfo = static_cast( @@ -1470,8 +1581,10 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, const ARMBaseInstrInfo &TII = *static_cast(MF.getSubtarget().getInstrInfo()); ARMFunctionInfo *AFI = MF.getInfo(); - MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + (void)TRI; // Silence unused warning in non-assert builds. unsigned FramePtr = RegInfo->getFrameRegister(MF); // Spill R4 if Thumb2 function requires stack realignment - it will be used as @@ -1480,13 +1593,13 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // instruction. // FIXME: It will be better just to find spare register here. if (AFI->isThumb2Function() && - (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF))) - MRI.setPhysRegUsed(ARM::R4); + (MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF))) + SavedRegs.set(ARM::R4); if (AFI->isThumb1OnlyFunction()) { // Spill LR if Thumb1 function uses variable length argument lists. if (AFI->getArgRegsSaveSize() > 0) - MRI.setPhysRegUsed(ARM::LR); + SavedRegs.set(ARM::LR); // Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know // for sure what the stack size will be, but for this, an estimate is good @@ -1494,36 +1607,45 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // we've used all the registers and so R4 is already used, so not marking // it here will be OK. // FIXME: It will be better just to find spare register here. - unsigned StackSize = MFI->estimateStackSize(MF); - if (MFI->hasVarSizedObjects() || StackSize > 508) - MRI.setPhysRegUsed(ARM::R4); + unsigned StackSize = MFI.estimateStackSize(MF); + if (MFI.hasVarSizedObjects() || StackSize > 508) + SavedRegs.set(ARM::R4); } // See if we can spill vector registers to aligned stack. - checkNumAlignedDPRCS2Regs(MF); + checkNumAlignedDPRCS2Regs(MF, SavedRegs); // Spill the BasePtr if it's used. if (RegInfo->hasBasePointer(MF)) - MRI.setPhysRegUsed(RegInfo->getBaseRegister()); + SavedRegs.set(RegInfo->getBaseRegister()); // Don't spill FP if the frame can be eliminated. This is determined - // by scanning the callee-save registers to see if any is used. + // by scanning the callee-save registers to see if any is modified. const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; bool Spilled = false; - if (MRI.isPhysRegUsed(Reg)) { + if (SavedRegs.test(Reg)) { Spilled = true; CanEliminateFrame = false; } - if (!ARM::GPRRegClass.contains(Reg)) + if (!ARM::GPRRegClass.contains(Reg)) { + if (Spilled) { + if (ARM::SPRRegClass.contains(Reg)) + NumFPRSpills++; + else if (ARM::DPRRegClass.contains(Reg)) + NumFPRSpills += 2; + else if (ARM::QPRRegClass.contains(Reg)) + NumFPRSpills += 4; + } continue; + } if (Spilled) { NumGPRSpills++; - if (!STI.isTargetDarwin()) { + if (!STI.splitFramePushPop(MF)) { if (Reg == ARM::LR) LRSpilled = true; CS1Spilled = true; @@ -1534,7 +1656,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, switch (Reg) { case ARM::LR: LRSpilled = true; - // Fallthrough + LLVM_FALLTHROUGH; case ARM::R0: case ARM::R1: case ARM::R2: case ARM::R3: case ARM::R4: case ARM::R5: @@ -1545,7 +1667,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, break; } } else { - if (!STI.isTargetDarwin()) { + if (!STI.splitFramePushPop(MF)) { UnspilledCS1GPRs.push_back(Reg); continue; } @@ -1591,26 +1713,176 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // FIXME: We could add logic to be more precise about negative offsets // and which instructions will need a scratch register for them. Is it // worth the effort and added fragility? - bool BigStack = - (RS && - (MFI->estimateStackSize(MF) + - ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >= - estimateRSStackSizeLimit(MF, this))) - || MFI->hasVarSizedObjects() - || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF)); + unsigned EstimatedStackSize = + MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills); + + // Determine biggest (positive) SP offset in MachineFrameInfo. + int MaxFixedOffset = 0; + for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) { + int MaxObjectOffset = MFI.getObjectOffset(I) + MFI.getObjectSize(I); + MaxFixedOffset = std::max(MaxFixedOffset, MaxObjectOffset); + } + bool HasFP = hasFP(MF); + if (HasFP) { + if (AFI->hasStackFrame()) + EstimatedStackSize += 4; + } else { + // If FP is not used, SP will be used to access arguments, so count the + // size of arguments into the estimation. + EstimatedStackSize += MaxFixedOffset; + } + EstimatedStackSize += 16; // For possible paddings. + + unsigned EstimatedRSStackSizeLimit = estimateRSStackSizeLimit(MF, this); + int MaxFPOffset = getMaxFPOffset(*MF.getFunction(), *AFI); + bool BigFrameOffsets = EstimatedStackSize >= EstimatedRSStackSizeLimit || + MFI.hasVarSizedObjects() || + (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF)) || + // For large argument stacks fp relative addressed may overflow. + (HasFP && (MaxFixedOffset - MaxFPOffset) >= (int)EstimatedRSStackSizeLimit); bool ExtraCSSpill = false; - if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) { + if (BigFrameOffsets || + !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) { AFI->setHasStackFrame(true); + if (HasFP) { + SavedRegs.set(FramePtr); + // If the frame pointer is required by the ABI, also spill LR so that we + // emit a complete frame record. + if (MF.getTarget().Options.DisableFramePointerElim(MF) && !LRSpilled) { + SavedRegs.set(ARM::LR); + LRSpilled = true; + NumGPRSpills++; + auto LRPos = llvm::find(UnspilledCS1GPRs, ARM::LR); + if (LRPos != UnspilledCS1GPRs.end()) + UnspilledCS1GPRs.erase(LRPos); + } + auto FPPos = llvm::find(UnspilledCS1GPRs, FramePtr); + if (FPPos != UnspilledCS1GPRs.end()) + UnspilledCS1GPRs.erase(FPPos); + NumGPRSpills++; + if (FramePtr == ARM::R7) + CS1Spilled = true; + } + + if (AFI->isThumb1OnlyFunction()) { + // For Thumb1-only targets, we need some low registers when we save and + // restore the high registers (which aren't allocatable, but could be + // used by inline assembly) because the push/pop instructions can not + // access high registers. If necessary, we might need to push more low + // registers to ensure that there is at least one free that can be used + // for the saving & restoring, and preferably we should ensure that as + // many as are needed are available so that fewer push/pop instructions + // are required. + + // Low registers which are not currently pushed, but could be (r4-r7). + SmallVector AvailableRegs; + + // Unused argument registers (r0-r3) can be clobbered in the prologue for + // free. + int EntryRegDeficit = 0; + for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) { + if (!MF.getRegInfo().isLiveIn(Reg)) { + --EntryRegDeficit; + DEBUG(dbgs() << PrintReg(Reg, TRI) + << " is unused argument register, EntryRegDeficit = " + << EntryRegDeficit << "\n"); + } + } + + // Unused return registers can be clobbered in the epilogue for free. + int ExitRegDeficit = AFI->getReturnRegsCount() - 4; + DEBUG(dbgs() << AFI->getReturnRegsCount() + << " return regs used, ExitRegDeficit = " << ExitRegDeficit + << "\n"); + + int RegDeficit = std::max(EntryRegDeficit, ExitRegDeficit); + DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n"); + + // r4-r6 can be used in the prologue if they are pushed by the first push + // instruction. + for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) { + if (SavedRegs.test(Reg)) { + --RegDeficit; + DEBUG(dbgs() << PrintReg(Reg, TRI) + << " is saved low register, RegDeficit = " << RegDeficit + << "\n"); + } else { + AvailableRegs.push_back(Reg); + DEBUG(dbgs() + << PrintReg(Reg, TRI) + << " is non-saved low register, adding to AvailableRegs\n"); + } + } + + // r7 can be used if it is not being used as the frame pointer. + if (!HasFP) { + if (SavedRegs.test(ARM::R7)) { + --RegDeficit; + DEBUG(dbgs() << "%R7 is saved low register, RegDeficit = " + << RegDeficit << "\n"); + } else { + AvailableRegs.push_back(ARM::R7); + DEBUG(dbgs() + << "%R7 is non-saved low register, adding to AvailableRegs\n"); + } + } + + // Each of r8-r11 needs to be copied to a low register, then pushed. + for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) { + if (SavedRegs.test(Reg)) { + ++RegDeficit; + DEBUG(dbgs() << PrintReg(Reg, TRI) + << " is saved high register, RegDeficit = " << RegDeficit + << "\n"); + } + } + + // LR can only be used by PUSH, not POP, and can't be used at all if the + // llvm.returnaddress intrinsic is used. This is only worth doing if we + // are more limited at function entry than exit. + if ((EntryRegDeficit > ExitRegDeficit) && + !(MF.getRegInfo().isLiveIn(ARM::LR) && + MF.getFrameInfo().isReturnAddressTaken())) { + if (SavedRegs.test(ARM::LR)) { + --RegDeficit; + DEBUG(dbgs() << "%LR is saved register, RegDeficit = " << RegDeficit + << "\n"); + } else { + AvailableRegs.push_back(ARM::LR); + DEBUG(dbgs() << "%LR is not saved, adding to AvailableRegs\n"); + } + } + + // If there are more high registers that need pushing than low registers + // available, push some more low registers so that we can use fewer push + // instructions. This might not reduce RegDeficit all the way to zero, + // because we can only guarantee that r4-r6 are available, but r8-r11 may + // need saving. + DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n"); + for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) { + unsigned Reg = AvailableRegs.pop_back_val(); + DEBUG(dbgs() << "Spilling " << PrintReg(Reg, TRI) + << " to make up reg deficit\n"); + SavedRegs.set(Reg); + NumGPRSpills++; + CS1Spilled = true; + ExtraCSSpill = true; + UnspilledCS1GPRs.erase(llvm::find(UnspilledCS1GPRs, Reg)); + if (Reg == ARM::LR) + LRSpilled = true; + } + DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit << "\n"); + } + // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled. // Spill LR as well so we can fold BX_RET to the registers restore (LDM). if (!LRSpilled && CS1Spilled) { - MRI.setPhysRegUsed(ARM::LR); + SavedRegs.set(ARM::LR); NumGPRSpills++; SmallVectorImpl::iterator LRPos; - LRPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(), - (unsigned)ARM::LR); + LRPos = llvm::find(UnspilledCS1GPRs, (unsigned)ARM::LR); if (LRPos != UnspilledCS1GPRs.end()) UnspilledCS1GPRs.erase(LRPos); @@ -1618,27 +1890,23 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, ExtraCSSpill = true; } - if (hasFP(MF)) { - MRI.setPhysRegUsed(FramePtr); - auto FPPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(), - FramePtr); - if (FPPos != UnspilledCS1GPRs.end()) - UnspilledCS1GPRs.erase(FPPos); - NumGPRSpills++; - } - // If stack and double are 8-byte aligned and we are spilling an odd number // of GPRs, spill one extra callee save GPR so we won't have to pad between // the integer and double callee save areas. + DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n"); unsigned TargetAlign = getStackAlignment(); if (TargetAlign >= 8 && (NumGPRSpills & 1)) { if (CS1Spilled && !UnspilledCS1GPRs.empty()) { for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) { unsigned Reg = UnspilledCS1GPRs[i]; - // Don't spill high register if the function is thumb1 - if (!AFI->isThumb1OnlyFunction() || + // Don't spill high register if the function is thumb. In the case of + // Windows on ARM, accept R11 (frame pointer) + if (!AFI->isThumbFunction() || + (STI.isTargetWindows() && Reg == ARM::R11) || isARMLowRegister(Reg) || Reg == ARM::LR) { - MRI.setPhysRegUsed(Reg); + SavedRegs.set(Reg); + DEBUG(dbgs() << "Spilling " << PrintReg(Reg, TRI) + << " to make up alignment\n"); if (!MRI.isReserved(Reg)) ExtraCSSpill = true; break; @@ -1646,7 +1914,9 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) { unsigned Reg = UnspilledCS2GPRs.front(); - MRI.setPhysRegUsed(Reg); + SavedRegs.set(Reg); + DEBUG(dbgs() << "Spilling " << PrintReg(Reg, TRI) + << " to make up alignment\n"); if (!MRI.isReserved(Reg)) ExtraCSSpill = true; } @@ -1657,7 +1927,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // callee-saved register or reserve a special spill slot to facilitate // register scavenging. Thumb1 needs a spill slot for stack pointer // adjustments also, even when the frame itself is small. - if (BigStack && !ExtraCSSpill) { + if (BigFrameOffsets && !ExtraCSSpill) { // If any non-reserved CS register isn't spilled, just spill one or two // extra. That should take care of it! unsigned NumExtras = TargetAlign / 4; @@ -1685,44 +1955,43 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } if (Extras.size() && NumExtras == 0) { for (unsigned i = 0, e = Extras.size(); i != e; ++i) { - MRI.setPhysRegUsed(Extras[i]); + SavedRegs.set(Extras[i]); } } else if (!AFI->isThumb1OnlyFunction()) { // note: Thumb1 functions spill to R12, not the stack. Reserve a slot // closest to SP or frame pointer. - const TargetRegisterClass *RC = &ARM::GPRRegClass; - RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); + assert(RS && "Register scavenging not provided"); + const TargetRegisterClass &RC = ARM::GPRRegClass; + unsigned Size = TRI->getSpillSize(RC); + unsigned Align = TRI->getSpillAlignment(RC); + RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); } } } if (ForceLRSpill) { - MRI.setPhysRegUsed(ARM::LR); + SavedRegs.set(ARM::LR); AFI->setLRIsSpilledForFarJump(true); } } - -void ARMFrameLowering:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { +MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr( + MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { const ARMBaseInstrInfo &TII = *static_cast(MF.getSubtarget().getInstrInfo()); if (!hasReservedCallFrame(MF)) { // If we have alloca, convert as follows: // ADJCALLSTACKDOWN -> sub, sp, sp, amount // ADJCALLSTACKUP -> add, sp, sp, amount - MachineInstr *Old = I; - DebugLoc dl = Old->getDebugLoc(); - unsigned Amount = Old->getOperand(0).getImm(); + MachineInstr &Old = *I; + DebugLoc dl = Old.getDebugLoc(); + unsigned Amount = TII.getFrameSize(Old); if (Amount != 0) { // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next // alignment boundary. - unsigned Align = getStackAlignment(); - Amount = (Amount+Align-1)/Align*Align; + Amount = alignSPAdjust(Amount); ARMFunctionInfo *AFI = MF.getInfo(); assert(!AFI->isThumb1OnlyFunction() && @@ -1730,25 +1999,23 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, bool isARM = !AFI->isThumbFunction(); // Replace the pseudo instruction with a new instruction... - unsigned Opc = Old->getOpcode(); - int PIdx = Old->findFirstPredOperandIdx(); - ARMCC::CondCodes Pred = (PIdx == -1) - ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm(); + unsigned Opc = Old.getOpcode(); + int PIdx = Old.findFirstPredOperandIdx(); + ARMCC::CondCodes Pred = + (PIdx == -1) ? ARMCC::AL + : (ARMCC::CondCodes)Old.getOperand(PIdx).getImm(); + unsigned PredReg = TII.getFramePred(Old); if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { - // Note: PredReg is operand 2 for ADJCALLSTACKDOWN. - unsigned PredReg = Old->getOperand(2).getReg(); emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags, Pred, PredReg); } else { - // Note: PredReg is operand 3 for ADJCALLSTACKUP. - unsigned PredReg = Old->getOperand(3).getReg(); assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags, Pred, PredReg); } } } - MBB.erase(I); + return MBB.erase(I); } /// Get the minimum constant for ARM that is greater than or equal to the @@ -1808,10 +2075,11 @@ static const uint64_t kSplitStackAvailable = 256; // ARM can be found at [1]. // // [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S -void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { +void ARMFrameLowering::adjustForSegmentedStacks( + MachineFunction &MF, MachineBasicBlock &PrologueMBB) const { unsigned Opcode; unsigned CFIIndex; - const ARMSubtarget *ST = &MF.getTarget().getSubtarget(); + const ARMSubtarget *ST = &MF.getSubtarget(); bool Thumb = ST->isThumb(); // Sadly, this currently doesn't support varargs, platforms other than @@ -1821,8 +2089,7 @@ void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { if (!ST->isTargetAndroid() && !ST->isTargetLinux()) report_fatal_error("Segmented stacks not supported on this platform."); - MachineBasicBlock &prologueMBB = MF.front(); - MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); MachineModuleInfo &MMI = MF.getMMI(); MCContext &Context = MMI.getContext(); const MCRegisterInfo *MRI = Context.getRegisterInfo(); @@ -1831,7 +2098,7 @@ void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { ARMFunctionInfo *ARMFI = MF.getInfo(); DebugLoc DL; - uint64_t StackSize = MFI->getStackSize(); + uint64_t StackSize = MFI.getStackSize(); // Do not generate a prologue for functions with a stack of size zero if (StackSize == 0) @@ -1849,21 +2116,48 @@ void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock(); MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock(); - for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(), - e = prologueMBB.livein_end(); - i != e; ++i) { - AllocMBB->addLiveIn(*i); - GetMBB->addLiveIn(*i); - McrMBB->addLiveIn(*i); - PrevStackMBB->addLiveIn(*i); - PostStackMBB->addLiveIn(*i); + // Grab everything that reaches PrologueMBB to update there liveness as well. + SmallPtrSet BeforePrologueRegion; + SmallVector WalkList; + WalkList.push_back(&PrologueMBB); + + do { + MachineBasicBlock *CurMBB = WalkList.pop_back_val(); + for (MachineBasicBlock *PredBB : CurMBB->predecessors()) { + if (BeforePrologueRegion.insert(PredBB).second) + WalkList.push_back(PredBB); + } + } while (!WalkList.empty()); + + // The order in that list is important. + // The blocks will all be inserted before PrologueMBB using that order. + // Therefore the block that should appear first in the CFG should appear + // first in the list. + MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB, + PostStackMBB}; + + for (MachineBasicBlock *B : AddedBlocks) + BeforePrologueRegion.insert(B); + + for (const auto &LI : PrologueMBB.liveins()) { + for (MachineBasicBlock *PredBB : BeforePrologueRegion) + PredBB->addLiveIn(LI); } - MF.push_front(PostStackMBB); - MF.push_front(AllocMBB); - MF.push_front(GetMBB); - MF.push_front(McrMBB); - MF.push_front(PrevStackMBB); + // Remove the newly added blocks from the list, since we know + // we do not have to do the following updates for them. + for (MachineBasicBlock *B : AddedBlocks) { + BeforePrologueRegion.erase(B); + MF.insert(PrologueMBB.getIterator(), B); + } + + for (MachineBasicBlock *MBB : BeforePrologueRegion) { + // Make sure the LiveIns are still sorted and unique. + MBB->sortUniqueLiveIns(); + // Replace the edges to PrologueMBB by edges to the sequences + // we are about to add. + MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]); + } // The required stack size that is aligned to ARM constant criterion. AlignedStackSize = alignToARMConstant(StackSize); @@ -1880,46 +2174,59 @@ void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { // SR1: Scratch Register #1 // push {SR0, SR1} if (Thumb) { - AddDefaultPred(BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))) - .addReg(ScratchReg0).addReg(ScratchReg1); + BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH)) + .add(predOps(ARMCC::AL)) + .addReg(ScratchReg0) + .addReg(ScratchReg1); } else { - AddDefaultPred(BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD)) - .addReg(ARM::SP, RegState::Define).addReg(ARM::SP)) - .addReg(ScratchReg0).addReg(ScratchReg1); + BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD)) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)) + .addReg(ScratchReg0) + .addReg(ScratchReg1); } // Emit the relevant DWARF information about the change in stack pointer as // well as where to find both r4 and r5 (the callee-save registers) CFIIndex = - MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -8)); + MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -8)); BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); - CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( + CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4)); BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); - CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( + CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8)); BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); // mov SR1, sp if (Thumb) { - AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1) - .addReg(ARM::SP)); + BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)); } else if (CompareStackPointer) { - AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1) - .addReg(ARM::SP)).addReg(0); + BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); } // sub SR1, sp, #StackSize if (!CompareStackPointer && Thumb) { - AddDefaultPred( - AddDefaultCC(BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)) - .addReg(ScratchReg1).addImm(AlignedStackSize)); + BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1) + .add(condCodeOp()) + .addReg(ScratchReg1) + .addImm(AlignedStackSize) + .add(predOps(ARMCC::AL)); } else if (!CompareStackPointer) { - AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1) - .addReg(ARM::SP).addImm(AlignedStackSize)).addReg(0); + BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1) + .addReg(ARM::SP) + .addImm(AlignedStackSize) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); } if (Thumb && ST->isThumb1Only()) { @@ -1927,24 +2234,28 @@ void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::Create( MF.getFunction()->getContext(), "__STACK_LIMIT", PCLabelId, 0); MachineConstantPool *MCP = MF.getConstantPool(); - unsigned CPI = MCP->getConstantPoolIndex(NewCPV, MF.getAlignment()); + unsigned CPI = MCP->getConstantPoolIndex(NewCPV, 4); // ldr SR0, [pc, offset(STACK_LIMIT)] - AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0) - .addConstantPoolIndex(CPI)); + BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0) + .addConstantPoolIndex(CPI) + .add(predOps(ARMCC::AL)); // ldr SR0, [SR0] - AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0) - .addReg(ScratchReg0).addImm(0)); + BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0) + .addReg(ScratchReg0) + .addImm(0) + .add(predOps(ARMCC::AL)); } else { // Get TLS base address from the coprocessor // mrc p15, #0, SR0, c13, c0, #3 - AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::MRC), ScratchReg0) - .addImm(15) - .addImm(0) - .addImm(13) - .addImm(0) - .addImm(3)); + BuildMI(McrMBB, DL, TII.get(ARM::MRC), ScratchReg0) + .addImm(15) + .addImm(0) + .addImm(13) + .addImm(0) + .addImm(3) + .add(predOps(ARMCC::AL)); // Use the last tls slot on android and a private field of the TCP on linux. assert(ST->isTargetAndroid() || ST->isTargetLinux()); @@ -1952,16 +2263,19 @@ void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { // Get the stack limit from the right offset // ldr SR0, [sr0, #4 * TlsOffset] - AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::LDRi12), ScratchReg0) - .addReg(ScratchReg0).addImm(4 * TlsOffset)); + BuildMI(GetMBB, DL, TII.get(ARM::LDRi12), ScratchReg0) + .addReg(ScratchReg0) + .addImm(4 * TlsOffset) + .add(predOps(ARMCC::AL)); } // Compare stack limit with stack size requested. // cmp SR0, SR1 Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr; - AddDefaultPred(BuildMI(GetMBB, DL, TII.get(Opcode)) - .addReg(ScratchReg0) - .addReg(ScratchReg1)); + BuildMI(GetMBB, DL, TII.get(Opcode)) + .addReg(ScratchReg0) + .addReg(ScratchReg1) + .add(predOps(ARMCC::AL)); // This jump is taken if StackLimit < SP - stack required. Opcode = Thumb ? ARM::tBcc : ARM::Bcc; @@ -1977,49 +2291,58 @@ void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { // Pass first argument for the __morestack by Scratch Register #0. // The amount size of stack required if (Thumb) { - AddDefaultPred(AddDefaultCC(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), - ScratchReg0)).addImm(AlignedStackSize)); + BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0) + .add(condCodeOp()) + .addImm(AlignedStackSize) + .add(predOps(ARMCC::AL)); } else { - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0) - .addImm(AlignedStackSize)).addReg(0); + BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0) + .addImm(AlignedStackSize) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); } // Pass second argument for the __morestack by Scratch Register #1. // The amount size of stack consumed to save function arguments. if (Thumb) { - AddDefaultPred( - AddDefaultCC(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)) - .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))); + BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1) + .add(condCodeOp()) + .addImm(alignToARMConstant(ARMFI->getArgumentStackSize())) + .add(predOps(ARMCC::AL)); } else { - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1) - .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))) - .addReg(0); + BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1) + .addImm(alignToARMConstant(ARMFI->getArgumentStackSize())) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); } // push {lr} - Save return address of this function. if (Thumb) { - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))) + BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH)) + .add(predOps(ARMCC::AL)) .addReg(ARM::LR); } else { - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD)) - .addReg(ARM::SP, RegState::Define) - .addReg(ARM::SP)) + BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD)) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)) .addReg(ARM::LR); } // Emit the DWARF info about the change in stack as well as where to find the // previous link register CFIIndex = - MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -12)); + MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -12)); BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); - CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( + CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12)); BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); // Call __morestack(). if (Thumb) { - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tBL))) + BuildMI(AllocMBB, DL, TII.get(ARM::tBL)) + .add(predOps(ARMCC::AL)) .addExternalSymbol("__morestack"); } else { BuildMI(AllocMBB, DL, TII.get(ARM::BL)) @@ -2029,22 +2352,26 @@ void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { // pop {lr} - Restore return address of this original function. if (Thumb) { if (ST->isThumb1Only()) { - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))) - .addReg(ScratchReg0); - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR) - .addReg(ScratchReg0)); + BuildMI(AllocMBB, DL, TII.get(ARM::tPOP)) + .add(predOps(ARMCC::AL)) + .addReg(ScratchReg0); + BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR) + .addReg(ScratchReg0) + .add(predOps(ARMCC::AL)); } else { - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST)) - .addReg(ARM::LR, RegState::Define) - .addReg(ARM::SP, RegState::Define) - .addReg(ARM::SP) - .addImm(4)); + BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST)) + .addReg(ARM::LR, RegState::Define) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP) + .addImm(4) + .add(predOps(ARMCC::AL)); } } else { - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD)) - .addReg(ARM::SP, RegState::Define) - .addReg(ARM::SP)) - .addReg(ARM::LR); + BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD)) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)) + .addReg(ARM::LR); } // Restore SR0 and SR1 in case of __morestack() was called. @@ -2052,58 +2379,62 @@ void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { // scratch registers from here. // pop {SR0, SR1} if (Thumb) { - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))) - .addReg(ScratchReg0) - .addReg(ScratchReg1); + BuildMI(AllocMBB, DL, TII.get(ARM::tPOP)) + .add(predOps(ARMCC::AL)) + .addReg(ScratchReg0) + .addReg(ScratchReg1); } else { - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD)) - .addReg(ARM::SP, RegState::Define) - .addReg(ARM::SP)) - .addReg(ScratchReg0) - .addReg(ScratchReg1); + BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD)) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)) + .addReg(ScratchReg0) + .addReg(ScratchReg1); } // Update the CFA offset now that we've popped - CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0)); + CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0)); BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); // bx lr - Return from this function. Opcode = Thumb ? ARM::tBX_RET : ARM::BX_RET; - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(Opcode))); + BuildMI(AllocMBB, DL, TII.get(Opcode)).add(predOps(ARMCC::AL)); // Restore SR0 and SR1 in case of __morestack() was not called. // pop {SR0, SR1} if (Thumb) { - AddDefaultPred(BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))) - .addReg(ScratchReg0) - .addReg(ScratchReg1); + BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP)) + .add(predOps(ARMCC::AL)) + .addReg(ScratchReg0) + .addReg(ScratchReg1); } else { - AddDefaultPred(BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD)) - .addReg(ARM::SP, RegState::Define) - .addReg(ARM::SP)) - .addReg(ScratchReg0) - .addReg(ScratchReg1); + BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD)) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)) + .addReg(ScratchReg0) + .addReg(ScratchReg1); } // Update the CFA offset now that we've popped - CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0)); + CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0)); BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); // Tell debuggers that r4 and r5 are now the same as they were in the // previous function, that they're the "Same Value". - CFIIndex = MMI.addFrameInst(MCCFIInstruction::createSameValue( + CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue( nullptr, MRI->getDwarfRegNum(ScratchReg0, true))); BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); - CFIIndex = MMI.addFrameInst(MCCFIInstruction::createSameValue( + CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue( nullptr, MRI->getDwarfRegNum(ScratchReg1, true))); BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); // Organizing MBB lists - PostStackMBB->addSuccessor(&prologueMBB); + PostStackMBB->addSuccessor(&PrologueMBB); AllocMBB->addSuccessor(PostStackMBB); @@ -2114,7 +2445,7 @@ void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { PrevStackMBB->addSuccessor(McrMBB); -#ifdef XDEBUG +#ifdef EXPENSIVE_CHECKS MF.verify(); #endif }