bool runOnMachineFunction(MachineFunction &MF) override;
+ // Call determineCalleeSaves and then also set the bits for subregs and
+ // fully saved superregs.
+ static void computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF);
+
static char ID;
};
} // end of anonymous namespace
LLVM_DEBUG(dbgs() << "Clobbered Registers: ");
+ BitVector SavedRegs;
+ computeCalleeSavedRegs(SavedRegs, MF);
+
const BitVector &UsedPhysRegsMask = MRI->getUsedPhysRegsMask();
auto SetRegAsDefined = [&RegMask] (unsigned Reg) {
RegMask[Reg / 32] &= ~(1u << Reg % 32);
// Scan all the physical registers. When a register is defined in the current
// function set it and all the aliasing registers as defined in the regmask.
for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) {
+ // Don't count registers that are saved and restored.
+ if (SavedRegs.test(PReg))
+ continue;
// If a register is defined by an instruction mark it as defined together
- // with all it's aliases.
+ // with all it's unsaved aliases.
if (!MRI->def_empty(PReg)) {
for (MCRegAliasIterator AI(PReg, TRI, true); AI.isValid(); ++AI)
- SetRegAsDefined(*AI);
+ if (!SavedRegs.test(*AI))
+ SetRegAsDefined(*AI);
continue;
}
// If a register is in the UsedPhysRegsMask set then mark it as defined.
SetRegAsDefined(PReg);
}
- if (!TargetFrameLowering::isSafeForNoCSROpt(F)) {
- const uint32_t *CallPreservedMask =
- TRI->getCallPreservedMask(MF, F.getCallingConv());
- if (CallPreservedMask) {
- // Set callee saved register as preserved.
- for (unsigned i = 0; i < RegMaskSize; ++i)
- RegMask[i] = RegMask[i] | CallPreservedMask[i];
- }
- } else {
+ if (TargetFrameLowering::isSafeForNoCSROpt(F)) {
++NumCSROpt;
LLVM_DEBUG(dbgs() << MF.getName()
<< " function optimized for not having CSR.\n");
return false;
}
+
+void RegUsageInfoCollector::
+computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF) {
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
+ // Target will return the set of registers that it saves/restores as needed.
+ SavedRegs.clear();
+ TFI->determineCalleeSaves(MF, SavedRegs);
+
+ // Insert subregs.
+ const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ unsigned Reg = CSRegs[i];
+ if (SavedRegs.test(Reg))
+ for (MCSubRegIterator SR(Reg, TRI, false); SR.isValid(); ++SR)
+ SavedRegs.set(*SR);
+ }
+
+ // Insert any register fully saved via subregisters.
+ for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) {
+ if (SavedRegs.test(PReg))
+ continue;
+
+ // Check if PReg is fully covered by its subregs.
+ bool CoveredBySubRegs = false;
+ for (const TargetRegisterClass *RC : TRI->regclasses())
+ if (RC->CoveredBySubRegs && RC->contains(PReg)) {
+ CoveredBySubRegs = true;
+ break;
+ }
+ if (!CoveredBySubRegs)
+ continue;
+
+ // Add PReg to SavedRegs if all subregs are saved.
+ bool AllSubRegsSaved = true;
+ for (MCSubRegIterator SR(PReg, TRI, false); SR.isValid(); ++SR)
+ if (!SavedRegs.test(*SR)) {
+ AllSubRegsSaved = false;
+ break;
+ }
+ if (AllSubRegsSaved)
+ SavedRegs.set(PReg);
+ }
+}
--- /dev/null
+; Test that the updated regmask on the call to @fun1 preserves %r14 and
+; %15. @fun1 will save and restore these registers since it contains a call.
+;
+; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -enable-ipra -print-regmask-num-regs=-1 \
+; RUN: -debug-only=ip-regalloc 2>&1 < %s | FileCheck --check-prefix=DBG %s
+; REQUIRES: asserts
+;
+; DBG: fun1 function optimized for not having CSR
+; DBG: Call Instruction After Register Usage Info Propagation : CallBRASL @fun1{{.*}} $r14d $r15d
+
+declare dso_local fastcc signext i32 @foo(i16*, i32 signext) unnamed_addr
+
+define internal fastcc void @fun1(i16* %arg, i16* nocapture %arg1) unnamed_addr #0 {
+bb:
+ %tmp = load i16, i16* undef, align 2
+ %tmp2 = shl i16 %tmp, 4
+ %tmp3 = tail call fastcc signext i32 @foo(i16* nonnull %arg, i32 signext 5)
+ %tmp4 = or i16 0, %tmp2
+ %tmp5 = or i16 %tmp4, 0
+ store i16 %tmp5, i16* undef, align 2
+ %tmp6 = getelementptr inbounds i16, i16* %arg, i64 5
+ %tmp7 = load i16, i16* %tmp6, align 2
+ store i16 %tmp7, i16* %arg1, align 2
+ ret void
+}
+
+define fastcc void @fun0(i8* nocapture readonly %arg, i16* nocapture %arg1, i32 signext %arg2) unnamed_addr {
+bb:
+ %a = alloca i8, i64 undef
+ call fastcc void @fun1(i16* nonnull undef, i16* %arg1)
+ ret void
+}
+
+attributes #0 = { norecurse nounwind "no-frame-pointer-elim"="false" }