VirtReg2IndexFunctor>
VRegInfo;
+ /// The flag is true upon \p UpdatedCSRs initialization
+ /// and false otherwise.
+ bool IsUpdatedCSRsInitizialied;
+
+ /// Contains the updated callee saved register list.
+ /// As opposed to the static list defined in register info,
+ /// all registers that were disabled (in CalleeSaveDisableRegs)
+ /// are removed from the list.
+ SmallVector<MCPhysReg, 16> UpdatedCSRs;
+
/// RegAllocHints - This vector records register allocation hints for virtual
/// registers. For each virtual register, it keeps a register and hint type
/// pair making up the allocation hint. Hint type is target specific except
// Register Info
//===--------------------------------------------------------------------===//
+ /// Disables the register from the list of CSRs.
+ /// I.e. the register will not appear as part of the CSR mask.
+ /// \see UpdatedCalleeSavedRegs.
+ void disableCalleeSavedRegister(unsigned Reg);
+
+ /// Returns list of callee saved registers.
+ /// The function returns the updated CSR list (after taking into account
+ /// registers that are disabled from the CSR list).
+ const MCPhysReg *getCalleeSavedRegs() const;
+
// Strictly for use by MachineInstr.cpp.
void addRegOperandToUseList(MachineOperand *MO);
// Callee saved registers of last MF. Assumed to be valid until the next
// runOnFunction() call.
- const MCPhysReg *CalleeSaved = nullptr;
+ // Used only to determine if an update was made to CalleeSavedAliases.
+ const MCPhysReg *CalleeSavedRegs = nullptr;
- // Map register number to CalleeSaved index + 1;
- SmallVector<uint8_t, 4> CSRNum;
+ // Map register alias to the callee saved Register.
+ SmallVector<MCPhysReg, 4> CalleeSavedAliases;
// Reserved registers in the current MF.
BitVector Reserved;
}
/// getLastCalleeSavedAlias - Returns the last callee saved register that
- /// overlaps PhysReg, or 0 if Reg doesn't overlap a CSR.
+ /// overlaps PhysReg, or 0 if Reg doesn't overlap a CalleeSavedAliases.
unsigned getLastCalleeSavedAlias(unsigned PhysReg) const {
assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
- if (unsigned N = CSRNum[PhysReg])
- return CalleeSaved[N-1];
+ if (PhysReg < CalleeSavedAliases.size())
+ return CalleeSavedAliases[PhysReg];
return 0;
}
/// this target. The register should be in the order of desired callee-save
/// stack frame offset. The first register is closest to the incoming stack
/// pointer if stack grows down, and vice versa.
- ///
+ /// Notice: This function does not take into account disabled CSRs.
+ /// In most cases you will want to use instead the function
+ /// getCalleeSavedRegs that is implemented in MachineRegisterInfo.
virtual const MCPhysReg*
getCalleeSavedRegs(const MachineFunction *MF) const = 0;
// callee-saved register that is not saved in the prolog.
const MachineFrameInfo &MFI = MF.getFrameInfo();
BitVector Pristine = MFI.getPristineRegs(MF);
- for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
+ for (const MCPhysReg *I = MF.getRegInfo().getCalleeSavedRegs(); *I;
+ ++I) {
unsigned Reg = *I;
if (!IsReturnBlock && !Pristine.test(Reg)) continue;
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
// callee-saved register that is not saved in the prolog.
const MachineFrameInfo &MFI = MF.getFrameInfo();
BitVector Pristine = MFI.getPristineRegs(MF);
- for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
+ for (const MCPhysReg *I = MF.getRegInfo().getCalleeSavedRegs(); *I;
+ ++I) {
if (!IsReturnBlock && !Pristine.test(*I)) continue;
for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
unsigned Reg = *AI;
static void addPristines(LivePhysRegs &LiveRegs, const MachineFunction &MF,
const MachineFrameInfo &MFI,
const TargetRegisterInfo &TRI) {
- for (const MCPhysReg *CSR = TRI.getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR)
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR;
+ ++CSR)
LiveRegs.addReg(*CSR);
for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo())
LiveRegs.removeReg(Info.getReg());
if (MBB.isReturnBlock()) {
// The return block has no successors whose live-ins we could merge
// below. So instead we add the callee saved registers manually.
- for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I)
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (const MCPhysReg *I = MRI.getCalleeSavedRegs(); *I; ++I)
addReg(*I);
} else {
addPristines(*this, MF, MFI, *TRI);
if (!isCalleeSavedInfoValid())
return BV;
- for (const MCPhysReg *CSR = TRI->getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR)
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR;
+ ++CSR)
BV.set(*CSR);
// Saved CSRs are not pristine.
return getBlockAddress() == Other.getBlockAddress() &&
getOffset() == Other.getOffset();
case MachineOperand::MO_RegisterMask:
- case MachineOperand::MO_RegisterLiveOut:
- return getRegMask() == Other.getRegMask();
+ case MachineOperand::MO_RegisterLiveOut: {
+ // Shallow compare of the two RegMasks
+ const uint32_t *RegMask = getRegMask();
+ const uint32_t *OtherRegMask = Other.getRegMask();
+ if (RegMask == OtherRegMask)
+ return true;
+
+ // Calculate the size of the RegMask
+ const MachineFunction *MF = getParent()->getParent()->getParent();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32;
+
+ // Deep compare of the two RegMasks
+ return std::equal(RegMask, RegMask + RegMaskSize, OtherRegMask);
+ }
case MachineOperand::MO_MCSymbol:
return getMCSymbol() == Other.getMCSymbol();
case MachineOperand::MO_CFIIndex:
void MachineRegisterInfo::Delegate::anchor() {}
MachineRegisterInfo::MachineRegisterInfo(MachineFunction *MF)
- : MF(MF), TracksSubRegLiveness(MF->getSubtarget().enableSubRegLiveness() &&
- EnableSubRegLiveness) {
+ : MF(MF), IsUpdatedCSRsInitizialied(false),
+ TracksSubRegLiveness(MF->getSubtarget().enableSubRegLiveness() &&
+ EnableSubRegLiveness) {
unsigned NumRegs = getTargetRegisterInfo()->getNumRegs();
VRegInfo.reserve(256);
RegAllocHints.reserve(256);
}
return false;
}
+
+void MachineRegisterInfo::disableCalleeSavedRegister(unsigned Reg) {
+
+ const TargetRegisterInfo *TRI = getTargetRegisterInfo();
+ assert(Reg && (Reg < TRI->getNumRegs()) &&
+ "Trying to disable an invalid register");
+
+ if (!IsUpdatedCSRsInitizialied) {
+ const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF);
+ for (const MCPhysReg *I = CSR; *I; ++I)
+ UpdatedCSRs.push_back(*I);
+
+ // Zero value represents the end of the register list
+ // (no more registers should be pushed).
+ UpdatedCSRs.push_back(0);
+
+ IsUpdatedCSRsInitizialied = true;
+ }
+
+ // Remove the register (and its aliases from the list).
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ UpdatedCSRs.erase(std::remove(UpdatedCSRs.begin(), UpdatedCSRs.end(), *AI),
+ UpdatedCSRs.end());
+}
+
+const MCPhysReg *MachineRegisterInfo::getCalleeSavedRegs() const {
+ if (IsUpdatedCSRsInitizialied)
+ return UpdatedCSRs.data();
+
+ return getTargetRegisterInfo()->getCalleeSavedRegs(MF);
+}
return;
const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo();
- const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&F);
+ const MCPhysReg *CSRegs = F.getRegInfo().getCalleeSavedRegs();
std::vector<CalleeSavedInfo> CSI;
for (unsigned i = 0; CSRegs[i]; ++i) {
static bool isACalleeSavedRegister(unsigned reg, const TargetRegisterInfo &TRI,
const MachineFunction &MF) {
- const MCPhysReg *CSR = TRI.getCalleeSavedRegs(&MF);
+ const MCPhysReg *CSR = MF.getRegInfo().getCalleeSavedRegs();
for (unsigned i = 0; CSR[i] != 0; ++i)
if (TRI.regsOverlap(reg, CSR[i]))
return true;
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
// Does this MF have different CSRs?
assert(TRI && "no register info set");
- const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF);
- if (Update || CSR != CalleeSaved) {
- // Build a CSRNum map. Every CSR alias gets an entry pointing to the last
+
+ // Get the callee saved registers.
+ const MCPhysReg *CSR = MF->getRegInfo().getCalleeSavedRegs();
+ if (Update || CSR != CalleeSavedRegs) {
+ // Build a CSRAlias map. Every CSR alias saves the last
// overlapping CSR.
- CSRNum.clear();
- CSRNum.resize(TRI->getNumRegs(), 0);
- for (unsigned N = 0; unsigned Reg = CSR[N]; ++N)
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- CSRNum[*AI] = N + 1; // 0 means no CSR, 1 means CalleeSaved[0], ...
+ CalleeSavedAliases.resize(TRI->getNumRegs(), 0);
+ for (const MCPhysReg *I = CSR; *I; ++I)
+ for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI)
+ CalleeSavedAliases[*AI] = *I;
+
Update = true;
}
- CalleeSaved = CSR;
+ CalleeSavedRegs = CSR;
// Different reserved registers?
const BitVector &RR = MF->getRegInfo().getReservedRegs();
unsigned Cost = TRI->getCostPerUse(PhysReg);
MinCost = std::min(MinCost, Cost);
- if (CSRNum[PhysReg])
+ if (CalleeSavedAliases[PhysReg])
// PhysReg aliases a CSR, save it for later.
CSRAlias.push_back(PhysReg);
else {
return;
// Get the callee saved register list...
- const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF);
+ const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
// Early exit if there are no callee saved registers.
if (!CSRegs || CSRegs[0] == 0)
++I, ++OutsIndex) {
CCValAssign &VA = RVLocs[I];
assert(VA.isRegLoc() && "Can only return in registers!");
+
+ // Add the register to the CalleeSaveDisableRegs list.
+ if (CallConv == CallingConv::X86_RegCall)
+ MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
+
SDValue ValToCopy = OutVals[OutsIndex];
EVT ValVT = ValToCopy.getValueType();
assert(2 == RegsToPass.size() &&
"Expecting two registers after Pass64BitArgInRegs");
+
+ // Add the second register to the CalleeSaveDisableRegs list.
+ if (CallConv == CallingConv::X86_RegCall)
+ MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
} else {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
}
// RAX/EAX now acts like a return value.
RetOps.push_back(
DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
+
+ // Add the returned register to the CalleeSaveDisableRegs list.
+ if (CallConv == CallingConv::X86_RegCall)
+ MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
}
const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
SDValue X86TargetLowering::LowerCallResult(
SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
- SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
+ uint32_t *RegMask) const {
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
bool Is64Bit = Subtarget.is64Bit();
CCValAssign &VA = RVLocs[I];
EVT CopyVT = VA.getLocVT();
+ // In some calling conventions we need to remove the used registers
+ // from the register mask.
+ if (RegMask && CallConv == CallingConv::X86_RegCall) {
+ for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
+ SubRegs.isValid(); ++SubRegs)
+ RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
+ }
+
// If this is x86-64, and we disabled SSE, we can't return FP values
if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) {
}
}
+ if (CallConv == CallingConv::X86_RegCall) {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (const auto &Pair : make_range(MRI.livein_begin(), MRI.livein_end()))
+ MF.getRegInfo().disableCalleeSavedRegister(Pair.first);
+ }
+
return Chain;
}
Mask = RegInfo->getNoPreservedMask();
}
- Ops.push_back(DAG.getRegisterMask(Mask));
+ // Define a new register mask from the existing mask.
+ uint32_t *RegMask = nullptr;
+
+ // In some calling conventions we need to remove the used physical registers
+ // from the reg mask.
+ if (CallConv == CallingConv::X86_RegCall) {
+ /// @todo Need to add support in MIPrinter and MIParser to represent
+ /// the customed RegMask.
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+
+ // Allocate a new Reg Mask and copy Mask.
+ RegMask = MF.allocateRegisterMask(TRI->getNumRegs());
+ unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32;
+ memcpy(RegMask, Mask, sizeof(uint32_t) * RegMaskSize);
+
+ // Make sure all sub registers of the argument registers are reset
+ // in the RegMask.
+ for (auto const &RegPair : RegsToPass)
+ for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
+ SubRegs.isValid(); ++SubRegs)
+ RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
+
+ // Create the RegMask Operand according to our updated mask.
+ Ops.push_back(DAG.getRegisterMask(RegMask));
+ } else {
+ // Create the RegMask Operand according to the static mask.
+ Ops.push_back(DAG.getRegisterMask(Mask));
+ }
if (InFlag.getNode())
Ops.push_back(InFlag);
// Handle result values, copying them out of physregs into vregs that we
// return.
- return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
- Ins, dl, DAG, InVals);
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
+ InVals, RegMask);
}
//===----------------------------------------------------------------------===//
// N.B. the order the invoke BBs are processed in doesn't matter here.
SmallVector<MachineBasicBlock *, 64> MBBLPads;
- const MCPhysReg *SavedRegs =
- Subtarget.getRegisterInfo()->getCalleeSavedRegs(MF);
+ const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs();
for (MachineBasicBlock *MBB : InvokeBBs) {
// Remove the landing pad successor from the invoke block and replace it
// with the new dispatch block.
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
const SDLoc &dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
+ SmallVectorImpl<SDValue> &InVals,
+ uint32_t *RegMask) const;
SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
const SmallVectorImpl<ISD::InputArg> &ArgInfo,
const SDLoc &dl, SelectionDAG &DAG,
#include "X86MachineFunctionInfo.h"
#include "X86RegisterInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
MF->getSubtarget().getRegisterInfo());
unsigned SlotSize = RegInfo->getSlotSize();
- for (const MCPhysReg *CSR =
- RegInfo->X86RegisterInfo::getCalleeSavedRegs(MF);
- unsigned Reg = *CSR;
- ++CSR)
- {
+ for (const MCPhysReg *CSR = MF->getRegInfo().getCalleeSavedRegs();
+ unsigned Reg = *CSR; ++CSR) {
if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
RestoreBasePointerOffset -= SlotSize;
}
--- /dev/null
+; RUN: llc < %s -mtriple=i386-linux-gnu | FileCheck --check-prefix=CHECK %s
+
+declare x86_regcallcc i32 @callee(i32 %a0, i32 %b0, i32 %c0, i32 %d0, i32 %e0);
+
+; In RegCall calling convention, ESI and EDI are callee saved registers.
+; One might think that the caller could assume that ESI value is the same before
+; and after calling the callee.
+; However, RegCall also says that a register that was used for
+; passing/returning argumnets, can be assumed to be modified by the callee.
+; In other words, it is no longer a callee saved register.
+; In this case we want to see that EDX/ECX values are saved and EDI/ESI are assumed
+; to be modified by the callee.
+; This is a hipe CC function that doesn't save any register for the caller.
+; So we can be sure that there is no other reason to save EDX/ECX.
+; The caller arguments are expected to be passed (in the following order)
+; in registers: ESI, EBP, EAX, EDX and ECX.
+define cc 11 i32 @caller(i32 %a0, i32 %b0, i32 %c0, i32 %d0, i32 %e0) nounwind {
+ %b1 = call x86_regcallcc i32 @callee(i32 %a0, i32 %b0, i32 %c0, i32 %d0, i32 %e0)
+ %b2 = add i32 %b1, %d0
+ %b3 = add i32 %b2, %e0
+ ret i32 %b3
+}
+; CHECK-LABEL: caller
+; CHECK: subl $12, %esp
+; CHECK-NEXT: movl %ecx, 8(%esp)
+; CHECK-NEXT: movl %edx, %ebx
+; CHECK-NEXT: movl %eax, %edx
+; CHECK-NEXT: movl %esi, %eax
+; CHECK-NEXT: movl %ebp, %ecx
+; CHECK-NEXT: movl %ebx, %edi
+; CHECK-NEXT: movl 8(%esp), %ebp
+; CHECK-NEXT: movl %ebp, %esi
+; CHECK-NEXT: calll callee
+; CHECK-NEXT: leal (%eax,%ebx), %esi
+; CHECK-NEXT: addl %ebp, %esi
+; CHECK-NEXT: addl $12, %esp
+; CHECK-NEXT: retl
+
+!hipe.literals = !{ !0, !1, !2 }
+!0 = !{ !"P_NSP_LIMIT", i32 120 }
+!1 = !{ !"X86_LEAF_WORDS", i32 24 }
+!2 = !{ !"AMD64_LEAF_WORDS", i32 18 }
+
+; Make sure that the callee doesn't save parameters that were passed as arguments.
+; The caller arguments are expected to be passed (in the following order)
+; in registers: EAX, ECX, EDX, EDI and ESI.
+; The result will return in EAX, ECX and EDX.
+define x86_regcallcc {i32, i32, i32} @test_callee(i32 %a0, i32 %b0, i32 %c0, i32 %d0, i32 %e0) nounwind {
+ %b1 = mul i32 7, %e0
+ %b2 = udiv i32 5, %e0
+ %b3 = mul i32 7, %d0
+ %b4 = insertvalue {i32, i32, i32} undef, i32 %b1, 0
+ %b5 = insertvalue {i32, i32, i32} %b4, i32 %b2, 1
+ %b6 = insertvalue {i32, i32, i32} %b5, i32 %b3, 2
+ ret {i32, i32, i32} %b6
+}
+; CHECK-LABEL: test_callee
+; CHECK-NOT: pushl %esi
+; CHECK-NOT: pushl %edi
+; CHECK: retl
ret <32 x float> %x4
}
-; X32-LABEL: pushl {{%e(si|di|bx|bp)}}
-; X32: pushl {{%e(si|di|bx|bp)}}
-; X32: pushl {{%e(si|di|bx|bp)}}
-; X32: pushl {{%e(si|di|bx|bp)}}
-; X32: popl {{%e(si|di|bx|bp)}}
-; X32: popl {{%e(si|di|bx|bp)}}
-; X32: popl {{%e(si|di|bx|bp)}}
-; X32: popl {{%e(si|di|bx|bp)}}
+; X32-LABEL: testi32_inp
+; X32: pushl {{%e(bx|bp)}}
+; X32: pushl {{%e(bx|bp)}}
+; X32: popl {{%e(bx|bp)}}
+; X32: popl {{%e(bx|bp)}}
; X32: retl
-; WIN64-LABEL: pushq {{%r(bp|bx|1[0-5])}}
+; WIN64-LABEL: testi32_inp
; WIN64: pushq {{%r(bp|bx|1[0-5])}}
; WIN64: pushq {{%r(bp|bx|1[0-5])}}
; WIN64: pushq {{%r(bp|bx|1[0-5])}}
; WIN64: popq {{%r(bp|bx|1[0-5])}}
; WIN64: popq {{%r(bp|bx|1[0-5])}}
; WIN64: popq {{%r(bp|bx|1[0-5])}}
-; WIN64: popq {{%r(bp|bx|1[0-5])}}
; WIN64: retq
-; LINUXOSX64-LABEL: pushq {{%r(bp|bx|1[2-5])}}
+; LINUXOSX64-LABEL: testi32_inp
; LINUXOSX64: pushq {{%r(bp|bx|1[2-5])}}
; LINUXOSX64: pushq {{%r(bp|bx|1[2-5])}}
; LINUXOSX64: popq {{%r(bp|bx|1[2-5])}}
; LINUXOSX64: popq {{%r(bp|bx|1[2-5])}}
-; LINUXOSX64: popq {{%r(bp|bx|1[2-5])}}
; LINUXOSX64: retq
; Test regcall when running multiple input parameters - callee saved GPRs
}
; WIN64-LABEL: testf32_inp
-; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill
-; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill
-; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill
-; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill
+; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill
+; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill
+; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill
+; WIN64: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill
; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
; WIN64: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
-; WIN64: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload
-; WIN64: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload
-; WIN64: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload
-; WIN64: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload
+; WIN64: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload
+; WIN64: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload
+; WIN64: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload
+; WIN64: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload
; WIN64: retq
; WIN32-LABEL: testf32_inp
-; WIN32: movaps {{%xmm([4-7])}}, {{.*(%ebp).*}} {{#+}} 16-byte Spill
-; WIN32: movaps {{%xmm([4-7])}}, {{.*(%ebp).*}} {{#+}} 16-byte Spill
-; WIN32: movaps {{%xmm([4-7])}}, {{.*(%ebp).*}} {{#+}} 16-byte Spill
-; WIN32: movaps {{%xmm([4-7])}}, {{.*(%ebp).*}} {{#+}} 16-byte Spill
+; WIN32: movaps {{%xmm([0-7])}}, {{.*(%e(b|s)p).*}} {{#+}} 16-byte Spill
; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}}
; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}}
; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}}
; WIN32: {{.*}} {{%xmm[0-7]}}, {{%xmm[4-7]}}
-; WIN32: movaps {{.*(%ebp).*}}, {{%xmm([4-7])}} {{#+}} 16-byte Reload
-; WIN32: movaps {{.*(%ebp).*}}, {{%xmm([4-7])}} {{#+}} 16-byte Reload
-; WIN32: movaps {{.*(%ebp).*}}, {{%xmm([4-7])}} {{#+}} 16-byte Reload
-; WIN32: movaps {{.*(%ebp).*}}, {{%xmm([4-7])}} {{#+}} 16-byte Reload
+; WIN32: movaps {{.*(%e(b|s)p).*}}, {{%xmm([0-7])}} {{#+}} 16-byte Reload
; WIN32: retl
; LINUXOSX-LABEL: testf32_inp
-; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill
-; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill
-; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill
-; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%rsp).*}} {{#+}} 16-byte Spill
+; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill
+; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill
+; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill
+; LINUXOSX: movaps {{%xmm(1[2-5])}}, {{.*(%r(b|s)p).*}} {{#+}} 16-byte Spill
; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
; LINUXOSX: {{.*}} {{%xmm([0-9]|1[0-1])}}, {{%xmm(1[2-5])}}
-; LINUXOSX: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload
-; LINUXOSX: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload
-; LINUXOSX: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload
-; LINUXOSX: movaps {{.*(%rsp).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload
+; LINUXOSX: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload
+; LINUXOSX: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload
+; LINUXOSX: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload
+; LINUXOSX: movaps {{.*(%r(b|s)p).*}}, {{%xmm(1[2-5])}} {{#+}} 16-byte Reload
; LINUXOSX: retq
;test calling conventions - input parameters, callee saved XMMs
; WIN32-LABEL: testi32_inp
; WIN32: pushl {{%e(si|di|bx|bp)}}
; WIN32: pushl {{%e(si|di|bx|bp)}}
-; WIN32: pushl {{%e(si|di|bx|bp)}}
-; WIN32: pushl {{%e(si|di|bx|bp)}}
-; WIN32: popl {{%e(si|di|bx|bp)}}
-; WIN32: popl {{%e(si|di|bx|bp)}}
; WIN32: popl {{%e(si|di|bx|bp)}}
; WIN32: popl {{%e(si|di|bx|bp)}}
; WIN32: retl
; WIN64: pushq {{%r(bp|bx|1[0-5])}}
; WIN64: pushq {{%r(bp|bx|1[0-5])}}
; WIN64: pushq {{%r(bp|bx|1[0-5])}}
-; WIN64: pushq {{%r(bp|bx|1[0-5])}}
-; WIN64: pushq {{%r(bp|bx|1[0-5])}}
-; WIN64: popq {{%r(bp|bx|1[0-5])}}
-; WIN64: popq {{%r(bp|bx|1[0-5])}}
; WIN64: popq {{%r(bp|bx|1[0-5])}}
; WIN64: popq {{%r(bp|bx|1[0-5])}}
; WIN64: popq {{%r(bp|bx|1[0-5])}}
; LINUXOSX-LABEL: testi32_inp
; LINUXOSX: pushq {{%r(bp|bx|1[2-5])}}
; LINUXOSX: pushq {{%r(bp|bx|1[2-5])}}
-; LINUXOSX: pushq {{%r(bp|bx|1[2-5])}}
-; LINUXOSX: pushq {{%r(bp|bx|1[2-5])}}
-; LINUXOSX: popq {{%r(bp|bx|1[2-5])}}
-; LINUXOSX: popq {{%r(bp|bx|1[2-5])}}
; LINUXOSX: popq {{%r(bp|bx|1[2-5])}}
; LINUXOSX: popq {{%r(bp|bx|1[2-5])}}
; LINUXOSX: retq