return false;
}
+ /// Returns the original SrcReg unless it is the target of a copy-like
+ /// operation, in which case we chain backwards through all such operations
+ /// to the ultimate source register. If a physical register is encountered,
+ /// we stop the search.
+ virtual unsigned lookThruCopyLike(unsigned SrcReg,
+ const MachineRegisterInfo *MRI) const;
+
/// Return a null-terminated list of all of the callee-saved registers on
/// this target. The register should be in the order of desired callee-save
/// stack frame offset. The first register is closest to the incoming stack
cl::desc("MachineLICM should sink instructions into "
"loops to avoid register spills"),
cl::init(false), cl::Hidden);
+static cl::opt<bool>
+HoistConstStores("hoist-const-stores",
+ cl::desc("Hoist invariant stores"),
+ cl::init(true), cl::Hidden);
STATISTIC(NumHoisted,
"Number of machine instructions hoisted out of loops");
"Number of hoisted machine instructions CSEed");
STATISTIC(NumPostRAHoisted,
"Number of machine instructions hoisted out of loops post regalloc");
+STATISTIC(NumStoreConst,
+ "Number of stores of const phys reg hoisted out of loops");
namespace {
MachineInstr *MI = &*MII;
if (!Hoist(MI, Preheader))
UpdateRegPressure(MI);
+ // If we have hoisted an instruction that may store, it can only be a
+ // constant store.
+ else if (MI->mayStore())
+ NumStoreConst++;
MII = NextMII;
}
return false;
}
+// This function iterates through all the operands of the input store MI and
+// checks that each register operand statisfies isCallerPreservedPhysReg.
+// This means, the value being stored and the address where it is being stored
+// is constant throughout the body of the function (not including prologue and
+// epilogue). When called with an MI that isn't a store, it returns false.
+static bool isInvariantStore(const MachineInstr &MI,
+ const TargetRegisterInfo *TRI,
+ const MachineRegisterInfo *MRI) {
+
+ if (!MI.mayStore() || MI.hasUnmodeledSideEffects() ||
+ (MI.getNumOperands() == 0))
+ return false;
+
+ // Check that all register operands are caller-preserved physical registers.
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg()) {
+ unsigned Reg = MO.getReg();
+ // If operand is a virtual register, check if it comes from a copy of a
+ // physical register.
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ Reg = TRI->lookThruCopyLike(MO.getReg(), MRI);
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ return false;
+ if (!TRI->isCallerPreservedPhysReg(Reg, *MI.getMF()))
+ return false;
+ }
+ }
+ return true;
+}
+
+// Return true if the input MI is a copy instruction that feeds an invariant
+// store instruction. This means that the src of the copy has to satisfy
+// isCallerPreservedPhysReg and atleast one of it's users should satisfy
+// isInvariantStore.
+static bool isCopyFeedingInvariantStore(const MachineInstr &MI,
+ const MachineRegisterInfo *MRI,
+ const TargetRegisterInfo *TRI) {
+
+ // FIXME: If targets would like to look through instructions that aren't
+ // pure copies, this can be updated to a query.
+ if (!MI.isCopy())
+ return false;
+
+ const MachineFunction *MF = MI.getMF();
+ // Check that we are copying a constant physical register.
+ unsigned CopySrcReg = MI.getOperand(1).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(CopySrcReg))
+ return false;
+
+ if (!TRI->isCallerPreservedPhysReg(CopySrcReg, *MF))
+ return false;
+
+ unsigned CopyDstReg = MI.getOperand(0).getReg();
+ // Check if any of the uses of the copy are invariant stores.
+ assert (TargetRegisterInfo::isVirtualRegister(CopyDstReg) &&
+ "copy dst is not a virtual reg");
+
+ for (MachineInstr &UseMI : MRI->use_instructions(CopyDstReg)) {
+ if (UseMI.mayStore() && isInvariantStore(UseMI, TRI, MRI))
+ return true;
+ }
+ return false;
+}
+
/// Returns true if the instruction may be a suitable candidate for LICM.
/// e.g. If the instruction is a call, then it's obviously not safe to hoist it.
bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) {
// Check if it's safe to move the instruction.
bool DontMoveAcrossStore = true;
- if (!I.isSafeToMove(AA, DontMoveAcrossStore))
+ if ((!I.isSafeToMove(AA, DontMoveAcrossStore)) &&
+ !(HoistConstStores && isInvariantStore(I, TRI, MRI))) {
return false;
+ }
// If it is load then check if it is guaranteed to execute by making sure that
// it dominates all exiting blocks. If it doesn't, then there is a path out of
// - When hoisting the last use of a value in the loop, that value no longer
// needs to be live in the loop. This lowers register pressure in the loop.
+ if (HoistConstStores && isCopyFeedingInvariantStore(MI, MRI, TRI))
+ return true;
+
bool CheapInstr = IsCheapInstruction(MI);
bool CreatesCopy = HasLoopPHIUse(&MI);
return getRegSizeInBits(*RC);
}
+unsigned
+TargetRegisterInfo::lookThruCopyLike(unsigned SrcReg,
+ const MachineRegisterInfo *MRI) const {
+ while (true) {
+ const MachineInstr *MI = MRI->getVRegDef(SrcReg);
+ if (!MI->isCopyLike())
+ return SrcReg;
+
+ unsigned CopySrcReg;
+ if (MI->isCopy())
+ CopySrcReg = MI->getOperand(1).getReg();
+ else {
+ assert(MI->isSubregToReg() && "Bad opcode for lookThruCopyLike");
+ CopySrcReg = MI->getOperand(2).getReg();
+ }
+
+ if (!isVirtualRegister(CopySrcReg))
+ return CopySrcReg;
+
+ SrcReg = CopySrcReg;
+ }
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD
void TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex,
return false;
}
-unsigned PPCInstrInfo::lookThruCopyLike(unsigned SrcReg,
- const MachineRegisterInfo *MRI) {
- while (true) {
- MachineInstr *MI = MRI->getVRegDef(SrcReg);
- if (!MI->isCopyLike())
- return SrcReg;
-
- unsigned CopySrcReg;
- if (MI->isCopy())
- CopySrcReg = MI->getOperand(1).getReg();
- else {
- assert(MI->isSubregToReg() && "Bad opcode for lookThruCopyLike");
- CopySrcReg = MI->getOperand(2).getReg();
- }
-
- if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg))
- return CopySrcReg;
-
- SrcReg = CopySrcReg;
- }
-}
-
// Essentially a compile-time implementation of a compare->isel sequence.
// It takes two constants to compare, along with the true/false registers
// and the comparison type (as a subreg to a CR field) and returns one
ConstOp = ~0U;
MachineInstr *DefMI = nullptr;
MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
// If we'ere in SSA, get the defs through the MRI. Otherwise, only look
// within the basic block to see if the register is defined using an LI/LI8.
if (MRI->isSSA()) {
unsigned Reg = MI.getOperand(i).getReg();
if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
- unsigned TrueReg = lookThruCopyLike(Reg, MRI);
+ unsigned TrueReg = TRI->lookThruCopyLike(Reg, MRI);
if (TargetRegisterInfo::isVirtualRegister(TrueReg)) {
DefMI = MRI->getVRegDef(TrueReg);
if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8) {
MachineInstr **KilledDef = nullptr) const;
void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const;
- // This is used to find the "true" source register for n
- // Machine instruction. Returns the original SrcReg unless it is the target
- // of a copy-like operation, in which case we chain backwards through all
- // such operations to the ultimate source register. If a
- // physical register is encountered, we stop the search.
- static unsigned lookThruCopyLike(unsigned SrcReg,
- const MachineRegisterInfo *MRI);
bool instrHasImmForm(const MachineInstr &MI, ImmInstrInfo &III) const;
};
bool Simplified = false;
MachineInstr* ToErase = nullptr;
std::map<MachineInstr *, bool> TOCSaves;
-
+ const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
NumFunctionsEnteredInMIPeephole++;
if (ConvertRegReg) {
// Fixed-point conversion of reg/reg instructions fed by load-immediate
// We have to look through chains of COPY and SUBREG_TO_REG
// to find the real source values for comparison.
unsigned TrueReg1 =
- TII->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
+ TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
unsigned TrueReg2 =
- TII->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
+ TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
if (TrueReg1 == TrueReg2
&& TargetRegisterInfo::isVirtualRegister(TrueReg1)) {
if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
return false;
unsigned DefReg =
- TII->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
+ TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
if (TargetRegisterInfo::isVirtualRegister(DefReg)) {
MachineInstr *LoadMI = MRI->getVRegDef(DefReg);
if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
if (DefOpc == PPC::XXPERMDI) {
unsigned FeedImmed = DefMI->getOperand(3).getImm();
unsigned FeedReg1 =
- TII->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
+ TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
unsigned FeedReg2 =
- TII->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
+ TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) {
DEBUG(dbgs()
unsigned MyOpcode = MI.getOpcode();
unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
unsigned TrueReg =
- TII->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI);
+ TRI->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI);
if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
break;
MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
case PPC::XVCVDPSP: {
// If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
unsigned TrueReg =
- TII->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
+ TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
break;
MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
// values.
if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
unsigned DefsReg1 =
- TII->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
+ TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
unsigned DefsReg2 =
- TII->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
+ TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
if (!TargetRegisterInfo::isVirtualRegister(DefsReg1) ||
!TargetRegisterInfo::isVirtualRegister(DefsReg2))
break;
EnableGPRToVecSpills("ppc-enable-gpr-to-vsr-spills", cl::Hidden, cl::init(false),
cl::desc("Enable spills from gpr to vsr rather than stack"));
+static cl::opt<bool>
+StackPtrConst("ppc-stack-ptr-caller-preserved",
+ cl::desc("Consider R1 caller preserved so stack saves of "
+ "caller preserved registers can be LICM candidates"),
+ cl::init(true), cl::Hidden);
+
PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM)
: PPCGenRegisterInfo(TM.isPPC64() ? PPC::LR8 : PPC::LR,
TM.isPPC64() ? 0 : 1,
bool PPCRegisterInfo::isCallerPreservedPhysReg(unsigned PhysReg,
const MachineFunction &MF) const {
assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
- if (TM.isELFv2ABI() && PhysReg == PPC::X2) {
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (!TM.isPPC64())
+ return false;
+
+ if (!Subtarget.isSVR4ABI())
+ return false;
+ if (PhysReg == PPC::X2)
// X2 is guaranteed to be preserved within a function if it is reserved.
// The reason it's reserved is that it's the TOC pointer (and the function
// uses the TOC). In functions where it isn't reserved (i.e. leaf functions
// with no TOC access), we can't claim that it is preserved.
return (getReservedRegs(MF).test(PPC::X2));
- } else {
- return false;
- }
+ if (StackPtrConst && (PhysReg == PPC::X1) && !MFI.hasVarSizedObjects()
+ && !MFI.hasOpaqueSPAdjustment())
+ // The value of the stack pointer does not change within a function after
+ // the prologue and before the epilogue if there are no dynamic allocations
+ // and no inline asm which clobbers X1.
+ return true;
+ return false;
}
unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
; CHECK: bctrl
; CHECK: ld 2, 40(1)
-; CHECK: addis [[REG1:[0-9]+]], 2, .LC0@toc@ha
; CHECK: std 2, 40(1)
-; CHECK: ld {{[0-9]+}}, .LC0@toc@l([[REG1]])
; CHECK: {{mr|ld}} 2,
; CHECK: mtctr
; CHECK: bctrl
--- /dev/null
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -hoist-const-stores -ppc-stack-ptr-caller-preserved < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -hoist-const-stores -ppc-stack-ptr-caller-preserved < %s | FileCheck %s -check-prefix=CHECKBE
+
+; Test hoist out of single loop
+define signext i32 @test1(i32 signext %lim, i32 (i32)* nocapture %Func) {
+entry:
+; CHECK-LABEL: test1
+; CHECK: for.body.preheader
+; CHECK: std 2, 24(1)
+; CHECK: for.body
+; CHECK-NOT: std 2, 24(1)
+; CHECKBE-LABEL: test1
+; CHECKBE: for.body.preheader
+; CHECKBE: std 2, 40(1)
+; CHECKBE: for.body
+; CHECKBE-NOT: std 2, 40(1)
+
+ %cmp6 = icmp sgt i32 %lim, 0
+ br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ %Sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ ret i32 %Sum.0.lcssa
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %Sum.07 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
+ %call = tail call signext i32 %Func(i32 signext %i.08)
+ %add = add nsw i32 %call, %Sum.07
+ %inc = add nuw nsw i32 %i.08, 1
+ %exitcond = icmp eq i32 %inc, %lim
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; Test hoist of nested loop goes to outter loop preheader
+define signext i32 @test2(i32 signext %lim, i32 (i32)* nocapture %Func) {
+entry:
+; CHECK-LABEL: test2
+; CHECK: for.body4.lr.ph.preheader
+; CHECK: std 2, 24(1)
+; CHECK: for.body4.lr.ph
+; CHECK-NOT: std 2, 24(1)
+; CHECKBE-LABEL: test2
+; CHECKBE: for.body4.lr.ph.preheader
+; CHECKBE: std 2, 40(1)
+; CHECKBE: for.body4.lr.ph
+; CHECKBE-NOT: std 2, 40(1)
+
+ %cmp20 = icmp sgt i32 %lim, 0
+ br i1 %cmp20, label %for.body4.lr.ph.preheader, label %for.cond.cleanup
+
+for.body4.lr.ph.preheader: ; preds = %entry
+ br label %for.body4.lr.ph
+
+for.cond.cleanup: ; preds = %for.cond.cleanup3, %entry
+ %Sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.cond.cleanup3 ]
+ ret i32 %Sum.0.lcssa
+
+for.body4.lr.ph: ; preds = %for.body4.lr.ph.preheader, %for.cond.cleanup3
+ %j.022 = phi i32 [ %inc6, %for.cond.cleanup3 ], [ 0, %for.body4.lr.ph.preheader ]
+ %Sum.021 = phi i32 [ %add, %for.cond.cleanup3 ], [ 0, %for.body4.lr.ph.preheader ]
+ br label %for.body4
+
+for.cond.cleanup3: ; preds = %for.body4
+ %inc6 = add nuw nsw i32 %j.022, 1
+ %exitcond24 = icmp eq i32 %inc6, %lim
+ br i1 %exitcond24, label %for.cond.cleanup, label %for.body4.lr.ph
+
+for.body4: ; preds = %for.body4, %for.body4.lr.ph
+ %i.019 = phi i32 [ %j.022, %for.body4.lr.ph ], [ %inc, %for.body4 ]
+ %Sum.118 = phi i32 [ %Sum.021, %for.body4.lr.ph ], [ %add, %for.body4 ]
+ %call = tail call signext i32 %Func(i32 signext %i.019)
+ %add = add nsw i32 %call, %Sum.118
+ %inc = add nuw nsw i32 %i.019, 1
+ %exitcond = icmp eq i32 %inc, %lim
+ br i1 %exitcond, label %for.cond.cleanup3, label %for.body4
+}
+
+; Test hoist out of if statement with low branch probability
+; FIXME: we shouldn't hoist in such cases as it could increase the number
+; of stores after hoisting.
+define signext i32 @test3(i32 signext %lim, i32 (i32)* nocapture %Func) {
+entry:
+; CHECK-LABEL: test3
+; CHECK: %for.body.lr.ph
+; CHECK: std 2, 24(1)
+; CHECK: %for.body
+; CHECK-NOT: std 2, 24(1)
+; CHECKBE-LABEL: test3
+; CHECKBE: %for.body.lr.ph
+; CHECKBE: std 2, 40(1)
+; CHECKBE: %for.body
+; CHECKBE-NOT: std 2, 40(1)
+
+ %cmp13 = icmp sgt i32 %lim, 0
+ br i1 %cmp13, label %for.body.lr.ph, label %for.cond.cleanup
+
+for.body.lr.ph: ; preds = %entry
+ %sub = add nsw i32 %lim, -1
+ br label %for.body
+
+for.cond.cleanup: ; preds = %if.end, %entry
+ %Sum.0.lcssa = phi i32 [ 0, %entry ], [ %add3, %if.end ]
+ ret i32 %Sum.0.lcssa
+
+for.body: ; preds = %if.end, %for.body.lr.ph
+ %i.015 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %if.end ]
+ %Sum.014 = phi i32 [ 0, %for.body.lr.ph ], [ %add3, %if.end ]
+ %cmp1 = icmp eq i32 %i.015, %sub
+ br i1 %cmp1, label %if.then, label %if.end
+
+if.then: ; preds = %for.body
+ %call = tail call signext i32 %Func(i32 signext %sub)
+ %add = add nsw i32 %call, %Sum.014
+ br label %if.end
+
+if.end: ; preds = %if.then, %for.body
+ %Sum.1 = phi i32 [ %add, %if.then ], [ %Sum.014, %for.body ]
+ %call2 = tail call signext i32 @func(i32 signext %i.015)
+ %add3 = add nsw i32 %call2, %Sum.1
+ %inc = add nuw nsw i32 %i.015, 1
+ %exitcond = icmp eq i32 %inc, %lim
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+declare signext i32 @func(i32 signext)
; INVFUNCDESC-DAG: ld [[REG3:[0-9]+]], 0(3)
; INVFUNCDESC: %for.body
-; INVFUNCDESC: std 2, 40(1)
; INVFUNCDESC-DAG: mtctr [[REG3]]
; INVFUNCDESC-DAG: mr 11, [[REG2]]
; INVFUNCDESC-DAG: mr 2, [[REG1]]
; NONINVFUNCDESC-LABEL: @bar
; NONINVFUNCDESC: %for.body
-; NONINVFUNCDESC: std 2, 40(1)
; NONINVFUNCDESC-DAG: ld 3, 0(30)
; NONINVFUNCDESC-DAG: ld 11, 16(30)
; NONINVFUNCDESC-DAG: ld 2, 8(30)