From 6095a7948d32d712811e8c2ab2190acf8e66a8bc Mon Sep 17 00:00:00 2001 From: Oren Ben Simhon Date: Tue, 14 Mar 2017 09:09:26 +0000 Subject: [PATCH] Disable Callee Saved Registers MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Each Calling convention (CC) defines a static list of registers that should be preserved by a callee function. All other registers should be saved by the caller. Some CCs use additional condition: If the register is used for passing/returning arguments – the caller needs to save it - even if it is part of the Callee Saved Registers (CSR) list. The current LLVM implementation doesn’t support it. It will save a register if it is part of the static CSR list and will not care if the register is passed/returned by the callee. The solution is to dynamically allocate the CSR lists (Only for these CCs). The lists will be updated with actual registers that should be saved by the callee. Since we need the allocated lists to live as long as the function exists, the list should reside inside the Machine Register Info (MRI) which is a property of the Machine Function and managed by it (and has the same life span). The lists should be saved in the MRI and populated upon LowerCall and LowerFormalArguments. The patch will also assist to implement future no_caller_saved_regsiters attribute intended for interrupt handler CC. Differential Revision: https://reviews.llvm.org/D28566 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@297715 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/MachineRegisterInfo.h | 20 ++++++++ include/llvm/CodeGen/RegisterClassInfo.h | 13 ++--- include/llvm/Target/TargetRegisterInfo.h | 4 +- lib/CodeGen/AggressiveAntiDepBreaker.cpp | 3 +- lib/CodeGen/CriticalAntiDepBreaker.cpp | 3 +- lib/CodeGen/LivePhysRegs.cpp | 7 ++- lib/CodeGen/MachineFunction.cpp | 4 +- lib/CodeGen/MachineInstr.cpp | 17 ++++++- lib/CodeGen/MachineRegisterInfo.cpp | 36 ++++++++++++- lib/CodeGen/PrologEpilogInserter.cpp | 2 +- lib/CodeGen/RegAllocPBQP.cpp | 2 +- lib/CodeGen/RegisterClassInfo.cpp | 23 +++++---- lib/CodeGen/TargetFrameLoweringImpl.cpp | 2 +- lib/Target/X86/X86ISelLowering.cpp | 67 ++++++++++++++++++++++--- lib/Target/X86/X86ISelLowering.h | 3 +- lib/Target/X86/X86MachineFunctionInfo.cpp | 8 ++- test/CodeGen/X86/DynamicCalleeSavedRegisters.ll | 60 ++++++++++++++++++++++ test/CodeGen/X86/avx512-regcall-NoMask.ll | 19 +++---- test/CodeGen/X86/sse-regcall.ll | 54 +++++++------------- 19 files changed, 258 insertions(+), 89 deletions(-) create mode 100644 test/CodeGen/X86/DynamicCalleeSavedRegisters.ll diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h index eeada3c886a..1b956db060b 100644 --- a/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/include/llvm/CodeGen/MachineRegisterInfo.h @@ -73,6 +73,16 @@ private: VirtReg2IndexFunctor> VRegInfo; + /// The flag is true upon \p UpdatedCSRs initialization + /// and false otherwise. + bool IsUpdatedCSRsInitizialied; + + /// Contains the updated callee saved register list. + /// As opposed to the static list defined in register info, + /// all registers that were disabled (in CalleeSaveDisableRegs) + /// are removed from the list. + SmallVector UpdatedCSRs; + /// RegAllocHints - This vector records register allocation hints for virtual /// registers. For each virtual register, it keeps a register and hint type /// pair making up the allocation hint. Hint type is target specific except @@ -207,6 +217,16 @@ public: // Register Info //===--------------------------------------------------------------------===// + /// Disables the register from the list of CSRs. + /// I.e. the register will not appear as part of the CSR mask. + /// \see UpdatedCalleeSavedRegs. + void disableCalleeSavedRegister(unsigned Reg); + + /// Returns list of callee saved registers. + /// The function returns the updated CSR list (after taking into account + /// registers that are disabled from the CSR list). + const MCPhysReg *getCalleeSavedRegs() const; + // Strictly for use by MachineInstr.cpp. void addRegOperandToUseList(MachineOperand *MO); diff --git a/include/llvm/CodeGen/RegisterClassInfo.h b/include/llvm/CodeGen/RegisterClassInfo.h index f2c5972aa97..355c9f9b2f1 100644 --- a/include/llvm/CodeGen/RegisterClassInfo.h +++ b/include/llvm/CodeGen/RegisterClassInfo.h @@ -56,10 +56,11 @@ class RegisterClassInfo { // Callee saved registers of last MF. Assumed to be valid until the next // runOnFunction() call. - const MCPhysReg *CalleeSaved = nullptr; + // Used only to determine if an update was made to CalleeSavedAliases. + const MCPhysReg *CalleeSavedRegs = nullptr; - // Map register number to CalleeSaved index + 1; - SmallVector CSRNum; + // Map register alias to the callee saved Register. + SmallVector CalleeSavedAliases; // Reserved registers in the current MF. BitVector Reserved; @@ -108,11 +109,11 @@ public: } /// getLastCalleeSavedAlias - Returns the last callee saved register that - /// overlaps PhysReg, or 0 if Reg doesn't overlap a CSR. + /// overlaps PhysReg, or 0 if Reg doesn't overlap a CalleeSavedAliases. unsigned getLastCalleeSavedAlias(unsigned PhysReg) const { assert(TargetRegisterInfo::isPhysicalRegister(PhysReg)); - if (unsigned N = CSRNum[PhysReg]) - return CalleeSaved[N-1]; + if (PhysReg < CalleeSavedAliases.size()) + return CalleeSavedAliases[PhysReg]; return 0; } diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h index 673cca7f44a..5a4f1a709ef 100644 --- a/include/llvm/Target/TargetRegisterInfo.h +++ b/include/llvm/Target/TargetRegisterInfo.h @@ -426,7 +426,9 @@ public: /// this target. The register should be in the order of desired callee-save /// stack frame offset. The first register is closest to the incoming stack /// pointer if stack grows down, and vice versa. - /// + /// Notice: This function does not take into account disabled CSRs. + /// In most cases you will want to use instead the function + /// getCalleeSavedRegs that is implemented in MachineRegisterInfo. virtual const MCPhysReg* getCalleeSavedRegs(const MachineFunction *MF) const = 0; diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index bb908618b67..a60a43a2ddc 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -163,7 +163,8 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { // callee-saved register that is not saved in the prolog. const MachineFrameInfo &MFI = MF.getFrameInfo(); BitVector Pristine = MFI.getPristineRegs(MF); - for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) { + for (const MCPhysReg *I = MF.getRegInfo().getCalleeSavedRegs(); *I; + ++I) { unsigned Reg = *I; if (!IsReturnBlock && !Pristine.test(Reg)) continue; for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index 5d60c305545..615d7eb7b4d 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -71,7 +71,8 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { // callee-saved register that is not saved in the prolog. const MachineFrameInfo &MFI = MF.getFrameInfo(); BitVector Pristine = MFI.getPristineRegs(MF); - for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) { + for (const MCPhysReg *I = MF.getRegInfo().getCalleeSavedRegs(); *I; + ++I) { if (!IsReturnBlock && !Pristine.test(*I)) continue; for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) { unsigned Reg = *AI; diff --git a/lib/CodeGen/LivePhysRegs.cpp b/lib/CodeGen/LivePhysRegs.cpp index a3bed3b76c0..9f7d7cf5484 100644 --- a/lib/CodeGen/LivePhysRegs.cpp +++ b/lib/CodeGen/LivePhysRegs.cpp @@ -160,7 +160,9 @@ void LivePhysRegs::addBlockLiveIns(const MachineBasicBlock &MBB) { static void addPristines(LivePhysRegs &LiveRegs, const MachineFunction &MF, const MachineFrameInfo &MFI, const TargetRegisterInfo &TRI) { - for (const MCPhysReg *CSR = TRI.getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR) + const MachineRegisterInfo &MRI = MF.getRegInfo(); + for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; + ++CSR) LiveRegs.addReg(*CSR); for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) LiveRegs.removeReg(Info.getReg()); @@ -179,7 +181,8 @@ void LivePhysRegs::addLiveOuts(const MachineBasicBlock &MBB) { if (MBB.isReturnBlock()) { // The return block has no successors whose live-ins we could merge // below. So instead we add the callee saved registers manually. - for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) + const MachineRegisterInfo &MRI = MF.getRegInfo(); + for (const MCPhysReg *I = MRI.getCalleeSavedRegs(); *I; ++I) addReg(*I); } else { addPristines(*this, MF, MFI, *TRI); diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index ef5e032f1af..c9767a25e90 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -860,7 +860,9 @@ BitVector MachineFrameInfo::getPristineRegs(const MachineFunction &MF) const { if (!isCalleeSavedInfoValid()) return BV; - for (const MCPhysReg *CSR = TRI->getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR) + const MachineRegisterInfo &MRI = MF.getRegInfo(); + for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; + ++CSR) BV.set(*CSR); // Saved CSRs are not pristine. diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 2acdc34150c..20ae830818a 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -262,8 +262,21 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { return getBlockAddress() == Other.getBlockAddress() && getOffset() == Other.getOffset(); case MachineOperand::MO_RegisterMask: - case MachineOperand::MO_RegisterLiveOut: - return getRegMask() == Other.getRegMask(); + case MachineOperand::MO_RegisterLiveOut: { + // Shallow compare of the two RegMasks + const uint32_t *RegMask = getRegMask(); + const uint32_t *OtherRegMask = Other.getRegMask(); + if (RegMask == OtherRegMask) + return true; + + // Calculate the size of the RegMask + const MachineFunction *MF = getParent()->getParent()->getParent(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32; + + // Deep compare of the two RegMasks + return std::equal(RegMask, RegMask + RegMaskSize, OtherRegMask); + } case MachineOperand::MO_MCSymbol: return getMCSymbol() == Other.getMCSymbol(); case MachineOperand::MO_CFIIndex: diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 2bb5b054066..8df0d7e0123 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -42,8 +42,9 @@ static cl::opt EnableSubRegLiveness("enable-subreg-liveness", cl::Hidden, void MachineRegisterInfo::Delegate::anchor() {} MachineRegisterInfo::MachineRegisterInfo(MachineFunction *MF) - : MF(MF), TracksSubRegLiveness(MF->getSubtarget().enableSubRegLiveness() && - EnableSubRegLiveness) { + : MF(MF), IsUpdatedCSRsInitizialied(false), + TracksSubRegLiveness(MF->getSubtarget().enableSubRegLiveness() && + EnableSubRegLiveness) { unsigned NumRegs = getTargetRegisterInfo()->getNumRegs(); VRegInfo.reserve(256); RegAllocHints.reserve(256); @@ -556,3 +557,34 @@ bool MachineRegisterInfo::isPhysRegUsed(unsigned PhysReg) const { } return false; } + +void MachineRegisterInfo::disableCalleeSavedRegister(unsigned Reg) { + + const TargetRegisterInfo *TRI = getTargetRegisterInfo(); + assert(Reg && (Reg < TRI->getNumRegs()) && + "Trying to disable an invalid register"); + + if (!IsUpdatedCSRsInitizialied) { + const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF); + for (const MCPhysReg *I = CSR; *I; ++I) + UpdatedCSRs.push_back(*I); + + // Zero value represents the end of the register list + // (no more registers should be pushed). + UpdatedCSRs.push_back(0); + + IsUpdatedCSRsInitizialied = true; + } + + // Remove the register (and its aliases from the list). + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + UpdatedCSRs.erase(std::remove(UpdatedCSRs.begin(), UpdatedCSRs.end(), *AI), + UpdatedCSRs.end()); +} + +const MCPhysReg *MachineRegisterInfo::getCalleeSavedRegs() const { + if (IsUpdatedCSRsInitizialied) + return UpdatedCSRs.data(); + + return getTargetRegisterInfo()->getCalleeSavedRegs(MF); +} diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 708f95c55b2..9f608957ca2 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -336,7 +336,7 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F, return; const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo(); - const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&F); + const MCPhysReg *CSRegs = F.getRegInfo().getCalleeSavedRegs(); std::vector CSI; for (unsigned i = 0; CSRegs[i]; ++i) { diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 686abf99251..3b5964eef55 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -573,7 +573,7 @@ void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction &MF, static bool isACalleeSavedRegister(unsigned reg, const TargetRegisterInfo &TRI, const MachineFunction &MF) { - const MCPhysReg *CSR = TRI.getCalleeSavedRegs(&MF); + const MCPhysReg *CSR = MF.getRegInfo().getCalleeSavedRegs(); for (unsigned i = 0; CSR[i] != 0; ++i) if (TRI.regsOverlap(reg, CSR[i])) return true; diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp index e61831b151d..82a3bd9a0bd 100644 --- a/lib/CodeGen/RegisterClassInfo.cpp +++ b/lib/CodeGen/RegisterClassInfo.cpp @@ -24,6 +24,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include @@ -56,18 +57,20 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { // Does this MF have different CSRs? assert(TRI && "no register info set"); - const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF); - if (Update || CSR != CalleeSaved) { - // Build a CSRNum map. Every CSR alias gets an entry pointing to the last + + // Get the callee saved registers. + const MCPhysReg *CSR = MF->getRegInfo().getCalleeSavedRegs(); + if (Update || CSR != CalleeSavedRegs) { + // Build a CSRAlias map. Every CSR alias saves the last // overlapping CSR. - CSRNum.clear(); - CSRNum.resize(TRI->getNumRegs(), 0); - for (unsigned N = 0; unsigned Reg = CSR[N]; ++N) - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - CSRNum[*AI] = N + 1; // 0 means no CSR, 1 means CalleeSaved[0], ... + CalleeSavedAliases.resize(TRI->getNumRegs(), 0); + for (const MCPhysReg *I = CSR; *I; ++I) + for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) + CalleeSavedAliases[*AI] = *I; + Update = true; } - CalleeSaved = CSR; + CalleeSavedRegs = CSR; // Different reserved registers? const BitVector &RR = MF->getRegInfo().getReservedRegs(); @@ -111,7 +114,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { unsigned Cost = TRI->getCostPerUse(PhysReg); MinCost = std::min(MinCost, Cost); - if (CSRNum[PhysReg]) + if (CalleeSavedAliases[PhysReg]) // PhysReg aliases a CSR, save it for later. CSRAlias.push_back(PhysReg); else { diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp index f082add8c7d..e5def6752e0 100644 --- a/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -73,7 +73,7 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF, return; // Get the callee saved register list... - const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF); + const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs(); // Early exit if there are no callee saved registers. if (!CSRegs || CSRegs[0] == 0) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index c2b16402319..81c78981b6f 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2217,6 +2217,11 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, ++I, ++OutsIndex) { CCValAssign &VA = RVLocs[I]; assert(VA.isRegLoc() && "Can only return in registers!"); + + // Add the register to the CalleeSaveDisableRegs list. + if (CallConv == CallingConv::X86_RegCall) + MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg()); + SDValue ValToCopy = OutVals[OutsIndex]; EVT ValVT = ValToCopy.getValueType(); @@ -2291,6 +2296,10 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, assert(2 == RegsToPass.size() && "Expecting two registers after Pass64BitArgInRegs"); + + // Add the second register to the CalleeSaveDisableRegs list. + if (CallConv == CallingConv::X86_RegCall) + MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg()); } else { RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy)); } @@ -2347,6 +2356,10 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, // RAX/EAX now acts like a return value. RetOps.push_back( DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout()))); + + // Add the returned register to the CalleeSaveDisableRegs list. + if (CallConv == CallingConv::X86_RegCall) + MF.getRegInfo().disableCalleeSavedRegister(RetValReg); } const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); @@ -2526,8 +2539,10 @@ static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT, SDValue X86TargetLowering::LowerCallResult( SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, - SelectionDAG &DAG, SmallVectorImpl &InVals) const { + SelectionDAG &DAG, SmallVectorImpl &InVals, + uint32_t *RegMask) const { + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); // Assign locations to each value returned by this call. SmallVector RVLocs; bool Is64Bit = Subtarget.is64Bit(); @@ -2541,6 +2556,14 @@ SDValue X86TargetLowering::LowerCallResult( CCValAssign &VA = RVLocs[I]; EVT CopyVT = VA.getLocVT(); + // In some calling conventions we need to remove the used registers + // from the register mask. + if (RegMask && CallConv == CallingConv::X86_RegCall) { + for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32)); + } + // If this is x86-64, and we disabled SSE, we can't return FP values if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) && ((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) { @@ -3228,6 +3251,12 @@ SDValue X86TargetLowering::LowerFormalArguments( } } + if (CallConv == CallingConv::X86_RegCall) { + const MachineRegisterInfo &MRI = MF.getRegInfo(); + for (const auto &Pair : make_range(MRI.livein_begin(), MRI.livein_end())) + MF.getRegInfo().disableCalleeSavedRegister(Pair.first); + } + return Chain; } @@ -3744,7 +3773,34 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Mask = RegInfo->getNoPreservedMask(); } - Ops.push_back(DAG.getRegisterMask(Mask)); + // Define a new register mask from the existing mask. + uint32_t *RegMask = nullptr; + + // In some calling conventions we need to remove the used physical registers + // from the reg mask. + if (CallConv == CallingConv::X86_RegCall) { + /// @todo Need to add support in MIPrinter and MIParser to represent + /// the customed RegMask. + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); + + // Allocate a new Reg Mask and copy Mask. + RegMask = MF.allocateRegisterMask(TRI->getNumRegs()); + unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32; + memcpy(RegMask, Mask, sizeof(uint32_t) * RegMaskSize); + + // Make sure all sub registers of the argument registers are reset + // in the RegMask. + for (auto const &RegPair : RegsToPass) + for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32)); + + // Create the RegMask Operand according to our updated mask. + Ops.push_back(DAG.getRegisterMask(RegMask)); + } else { + // Create the RegMask Operand according to the static mask. + Ops.push_back(DAG.getRegisterMask(Mask)); + } if (InFlag.getNode()) Ops.push_back(InFlag); @@ -3797,8 +3853,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Handle result values, copying them out of physregs into vregs that we // return. - return LowerCallResult(Chain, InFlag, CallConv, isVarArg, - Ins, dl, DAG, InVals); + return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, + InVals, RegMask); } //===----------------------------------------------------------------------===// @@ -26104,8 +26160,7 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, // N.B. the order the invoke BBs are processed in doesn't matter here. SmallVector MBBLPads; - const MCPhysReg *SavedRegs = - Subtarget.getRegisterInfo()->getCalleeSavedRegs(MF); + const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs(); for (MachineBasicBlock *MBB : InvokeBBs) { // Remove the landing pad successor from the invoke block and replace it // with the new dispatch block. diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index d9acc1357ec..73daf640a18 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -1087,7 +1087,8 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const; + SmallVectorImpl &InVals, + uint32_t *RegMask) const; SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, const SmallVectorImpl &ArgInfo, const SDLoc &dl, SelectionDAG &DAG, diff --git a/lib/Target/X86/X86MachineFunctionInfo.cpp b/lib/Target/X86/X86MachineFunctionInfo.cpp index c9e636f1eb0..3fcb642424a 100644 --- a/lib/Target/X86/X86MachineFunctionInfo.cpp +++ b/lib/Target/X86/X86MachineFunctionInfo.cpp @@ -9,6 +9,7 @@ #include "X86MachineFunctionInfo.h" #include "X86RegisterInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -20,11 +21,8 @@ void X86MachineFunctionInfo::setRestoreBasePointer(const MachineFunction *MF) { const X86RegisterInfo *RegInfo = static_cast( MF->getSubtarget().getRegisterInfo()); unsigned SlotSize = RegInfo->getSlotSize(); - for (const MCPhysReg *CSR = - RegInfo->X86RegisterInfo::getCalleeSavedRegs(MF); - unsigned Reg = *CSR; - ++CSR) - { + for (const MCPhysReg *CSR = MF->getRegInfo().getCalleeSavedRegs(); + unsigned Reg = *CSR; ++CSR) { if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) RestoreBasePointerOffset -= SlotSize; } diff --git a/test/CodeGen/X86/DynamicCalleeSavedRegisters.ll b/test/CodeGen/X86/DynamicCalleeSavedRegisters.ll new file mode 100644 index 00000000000..9dff4e596ca --- /dev/null +++ b/test/CodeGen/X86/DynamicCalleeSavedRegisters.ll @@ -0,0 +1,60 @@ +; RUN: llc < %s -mtriple=i386-linux-gnu | FileCheck --check-prefix=CHECK %s + +declare x86_regcallcc i32 @callee(i32 %a0, i32 %b0, i32 %c0, i32 %d0, i32 %e0); + +; In RegCall calling convention, ESI and EDI are callee saved registers. +; One might think that the caller could assume that ESI value is the same before +; and after calling the callee. +; However, RegCall also says that a register that was used for +; passing/returning argumnets, can be assumed to be modified by the callee. +; In other words, it is no longer a callee saved register. +; In this case we want to see that EDX/ECX values are saved and EDI/ESI are assumed +; to be modified by the callee. +; This is a hipe CC function that doesn't save any register for the caller. +; So we can be sure that there is no other reason to save EDX/ECX. +; The caller arguments are expected to be passed (in the following order) +; in registers: ESI, EBP, EAX, EDX and ECX. +define cc 11 i32 @caller(i32 %a0, i32 %b0, i32 %c0, i32 %d0, i32 %e0) nounwind { + %b1 = call x86_regcallcc i32 @callee(i32 %a0, i32 %b0, i32 %c0, i32 %d0, i32 %e0) + %b2 = add i32 %b1, %d0 + %b3 = add i32 %b2, %e0 + ret i32 %b3 +} +; CHECK-LABEL: caller +; CHECK: subl $12, %esp +; CHECK-NEXT: movl %ecx, 8(%esp) +; CHECK-NEXT: movl %edx, %ebx +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movl %ebp, %ecx +; CHECK-NEXT: movl %ebx, %edi +; CHECK-NEXT: movl 8(%esp), %ebp +; CHECK-NEXT: movl %ebp, %esi +; CHECK-NEXT: calll callee +; CHECK-NEXT: leal (%eax,%ebx), %esi +; CHECK-NEXT: addl %ebp, %esi +; CHECK-NEXT: addl $12, %esp +; CHECK-NEXT: retl + +!hipe.literals = !{ !0, !1, !2 } +!0 = !{ !"P_NSP_LIMIT", i32 120 } +!1 = !{ !"X86_LEAF_WORDS", i32 24 } +!2 = !{ !"AMD64_LEAF_WORDS", i32 18 } + +; Make sure that the callee doesn't save parameters that were passed as arguments. +; The caller arguments are expected to be passed (in the following order) +; in registers: EAX, ECX, EDX, EDI and ESI. +; The result will return in EAX, ECX and EDX. +define x86_regcallcc {i32, i32, i32} @test_callee(i32 %a0, i32 %b0, i32 %c0, i32 %d0, i32 %e0) nounwind { + %b1 = mul i32 7, %e0 + %b2 = udiv i32 5, %e0 + %b3 = mul i32 7, %d0 + %b4 = insertvalue {i32, i32, i32} undef, i32 %b1, 0 + %b5 = insertvalue {i32, i32, i32} %b4, i32 %b2, 1 + %b6 = insertvalue {i32, i32, i32} %b5, i32 %b3, 2 + ret {i32, i32, i32} %b6 +} +; CHECK-LABEL: test_callee +; CHECK-NOT: pushl %esi +; CHECK-NOT: pushl %edi +; CHECK: retl diff --git a/test/CodeGen/X86/avx512-regcall-NoMask.ll b/test/CodeGen/X86/avx512-regcall-NoMask.ll index a2b058d0a40..33409791785 100644 --- a/test/CodeGen/X86/avx512-regcall-NoMask.ll +++ b/test/CodeGen/X86/avx512-regcall-NoMask.ll @@ -469,32 +469,27 @@ define x86_regcallcc <32 x float> @testf32_inp(<32 x float> %a, <32 x float> %b, ret <32 x float> %x4 } -; X32-LABEL: pushl {{%e(si|di|bx|bp)}} -; X32: pushl {{%e(si|di|bx|bp)}} -; X32: pushl {{%e(si|di|bx|bp)}} -; X32: pushl {{%e(si|di|bx|bp)}} -; X32: popl {{%e(si|di|bx|bp)}} -; X32: popl {{%e(si|di|bx|bp)}} -; X32: popl {{%e(si|di|bx|bp)}} -; X32: popl {{%e(si|di|bx|bp)}} +; X32-LABEL: testi32_inp +; X32: pushl {{%e(bx|bp)}} +; X32: pushl {{%e(bx|bp)}} +; X32: popl {{%e(bx|bp)}} +; X32: popl {{%e(bx|bp)}} ; X32: retl -; WIN64-LABEL: pushq {{%r(bp|bx|1[0-5])}} +; WIN64-LABEL: testi32_inp ; WIN64: pushq {{%r(bp|bx|1[0-5])}} ; WIN64: pushq {{%r(bp|bx|1[0-5])}} ; WIN64: pushq {{%r(bp|bx|1[0-5])}} ; WIN64: popq {{%r(bp|bx|1[0-5])}} ; WIN64: popq {{%r(bp|bx|1[0-5])}} ; WIN64: popq {{%r(bp|bx|1[0-5])}} -; WIN64: popq {{%r(bp|bx|1[0-5])}} ; WIN64: retq -; LINUXOSX64-LABEL: pushq {{%r(bp|bx|1[2-5])}} +; LINUXOSX64-LABEL: testi32_inp ; LINUXOSX64: pushq {{%r(bp|bx|1[2-5])}} ; LINUXOSX64: pushq {{%r(bp|bx|1[2-5])}} ; LINUXOSX64: popq {{%r(bp|bx|1[2-5])}} ; LINUXOSX64: popq {{%r(bp|bx|1[2-5])}} -; LINUXOSX64: popq {{%r(bp|bx|1[2-5])}} ; LINUXOSX64: retq ; Test regcall when running multiple input parameters - callee saved GPRs diff --git a/test/CodeGen/X86/sse-regcall.ll b/test/CodeGen/X86/sse-regcall.ll index b44e544d83c..862b9cc92f6 100644 --- a/test/CodeGen/X86/sse-regcall.ll +++ b/test/CodeGen/X86/sse-regcall.ll @@ -37,48 +37,42 @@ define x86_regcallcc i1 @test_CallargReti1(i1 %a) { } ; WIN64-LABEL: testf32_inp -; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill -; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill -; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill -; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill +; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill +; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill +; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill +; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill ; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}} ; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}} ; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}} ; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}} -; WIN64: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload -; WIN64: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload -; WIN64: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload -; WIN64: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload +; WIN64: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload +; WIN64: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload +; WIN64: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload +; WIN64: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload ; WIN64: retq ; WIN32-LABEL: testf32_inp -; WIN32: movaps {{%xmm([4-7])}}, {{.*(%ebp).*}} {{#+}} 16-byte Spill -; WIN32: movaps {{%xmm([4-7])}}, {{.*(%ebp).*}} {{#+}} 16-byte Spill -; WIN32: movaps {{%xmm([4-7])}}, {{.*(%ebp).*}} {{#+}} 16-byte Spill -; WIN32: movaps {{%xmm([4-7])}}, {{.*(%ebp).*}} {{#+}} 16-byte Spill +; WIN32: movaps {{%xmm([0-7])}}, {{.*(%e(b|s)p).*}} {{#+}} 16-byte Spill ; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}} ; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}} ; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}} ; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}} -; WIN32: movaps {{.*(%ebp).*}}, {{%xmm([4-7])}} {{#+}} 16-byte Reload -; WIN32: movaps {{.*(%ebp).*}}, {{%xmm([4-7])}} {{#+}} 16-byte Reload -; WIN32: movaps {{.*(%ebp).*}}, {{%xmm([4-7])}} {{#+}} 16-byte Reload -; WIN32: movaps {{.*(%ebp).*}}, {{%xmm([4-7])}} {{#+}} 16-byte Reload +; WIN32: movaps {{.*(%e(b|s)p).*}}, {{%xmm([0-7])}} {{#+}} 16-byte Reload ; WIN32: retl ; LINUXOSX-LABEL: testf32_inp -; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill -; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill -; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill -; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill +; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill +; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill +; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill +; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill ; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}} ; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}} ; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}} ; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}} -; LINUXOSX: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload -; LINUXOSX: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload -; LINUXOSX: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload -; LINUXOSX: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload +; LINUXOSX: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload +; LINUXOSX: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload +; LINUXOSX: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload +; LINUXOSX: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload ; LINUXOSX: retq ;test calling conventions - input parameters, callee saved XMMs @@ -93,10 +87,6 @@ define x86_regcallcc <16 x float> @testf32_inp(<16 x float> %a, <16 x float> %b, ; WIN32-LABEL: testi32_inp ; WIN32: pushl {{%e(si|di|bx|bp)}} ; WIN32: pushl {{%e(si|di|bx|bp)}} -; WIN32: pushl {{%e(si|di|bx|bp)}} -; WIN32: pushl {{%e(si|di|bx|bp)}} -; WIN32: popl {{%e(si|di|bx|bp)}} -; WIN32: popl {{%e(si|di|bx|bp)}} ; WIN32: popl {{%e(si|di|bx|bp)}} ; WIN32: popl {{%e(si|di|bx|bp)}} ; WIN32: retl @@ -105,10 +95,6 @@ define x86_regcallcc <16 x float> @testf32_inp(<16 x float> %a, <16 x float> %b, ; WIN64: pushq {{%r(bp|bx|1[0-5])}} ; WIN64: pushq {{%r(bp|bx|1[0-5])}} ; WIN64: pushq {{%r(bp|bx|1[0-5])}} -; WIN64: pushq {{%r(bp|bx|1[0-5])}} -; WIN64: pushq {{%r(bp|bx|1[0-5])}} -; WIN64: popq {{%r(bp|bx|1[0-5])}} -; WIN64: popq {{%r(bp|bx|1[0-5])}} ; WIN64: popq {{%r(bp|bx|1[0-5])}} ; WIN64: popq {{%r(bp|bx|1[0-5])}} ; WIN64: popq {{%r(bp|bx|1[0-5])}} @@ -117,10 +103,6 @@ define x86_regcallcc <16 x float> @testf32_inp(<16 x float> %a, <16 x float> %b, ; LINUXOSX-LABEL: testi32_inp ; LINUXOSX: pushq {{%r(bp|bx|1[2-5])}} ; LINUXOSX: pushq {{%r(bp|bx|1[2-5])}} -; LINUXOSX: pushq {{%r(bp|bx|1[2-5])}} -; LINUXOSX: pushq {{%r(bp|bx|1[2-5])}} -; LINUXOSX: popq {{%r(bp|bx|1[2-5])}} -; LINUXOSX: popq {{%r(bp|bx|1[2-5])}} ; LINUXOSX: popq {{%r(bp|bx|1[2-5])}} ; LINUXOSX: popq {{%r(bp|bx|1[2-5])}} ; LINUXOSX: retq -- 2.11.0