From a1a416812afcad670f1704813e30ab7db752ef8e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 13 Sep 2017 23:47:01 +0000 Subject: [PATCH] AMDGPU: Don't spill SP reg like a normal CSR git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@313217 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/SIFrameLowering.cpp | 9 +++++++++ lib/Target/AMDGPU/SIFrameLowering.h | 3 +++ lib/Target/AMDGPU/SIInstrInfo.cpp | 4 ++++ test/CodeGen/AMDGPU/byval-frame-setup.ll | 13 ++++++++----- test/CodeGen/AMDGPU/sibling-call.ll | 2 ++ 5 files changed, 26 insertions(+), 5 deletions(-) diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp index 2ecf32c6ffe..ff6fed88e37 100644 --- a/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -594,6 +594,15 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized( } } +void SIFrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, + RegScavenger *RS) const { + TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); + const SIMachineFunctionInfo *MFI = MF.getInfo(); + + // The SP is specifically managed and we don't want extra spills of it. + SavedRegs.reset(MFI->getStackPtrOffsetReg()); +} + MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr( MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/lib/Target/AMDGPU/SIFrameLowering.h b/lib/Target/AMDGPU/SIFrameLowering.h index c23969d711b..cc1c85ff6bf 100644 --- a/lib/Target/AMDGPU/SIFrameLowering.h +++ b/lib/Target/AMDGPU/SIFrameLowering.h @@ -35,6 +35,9 @@ public: int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override; + void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, + RegScavenger *RS = nullptr) const override; + void processFunctionBeforeFrameFinalized( MachineFunction &MF, RegScavenger *RS = nullptr) const override; diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index 2279afaf89e..73eb3a3b5f6 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -818,6 +818,10 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineFrameInfo &FrameInfo = MF->getFrameInfo(); DebugLoc DL = MBB.findDebugLoc(MI); + assert(SrcReg != MFI->getStackPtrOffsetReg() && + SrcReg != MFI->getFrameOffsetReg() && + SrcReg != MFI->getScratchWaveOffsetReg()); + unsigned Size = FrameInfo.getObjectSize(FrameIndex); unsigned Align = FrameInfo.getObjectAlignment(FrameIndex); MachinePointerInfo PtrInfo diff --git a/test/CodeGen/AMDGPU/byval-frame-setup.ll b/test/CodeGen/AMDGPU/byval-frame-setup.ll index 723c78ad647..bd354682784 100644 --- a/test/CodeGen/AMDGPU/byval-frame-setup.ll +++ b/test/CodeGen/AMDGPU/byval-frame-setup.ll @@ -32,6 +32,7 @@ entry: ; GCN: s_mov_b32 s5, s32 ; GCN-DAG: buffer_store_dword v32 ; GCN-DAG: buffer_store_dword v33 +; GCN-NOT: v_writelane_b32 v{{[0-9]+}}, s32 ; GCN: v_writelane_b32 ; GCN-DAG: s_add_u32 s32, s32, 0xb00{{$}} @@ -48,6 +49,7 @@ entry: ; GCN: buffer_store_dword [[ADD1]], off, s[0:3], s5 offset:20{{$}} ; GCN: v_readlane_b32 +; GCN-NOT: v_readlane_b32 s32 ; GCN: buffer_load_dword v32, ; GCN: buffer_load_dword v33, ; GCN: s_sub_u32 s32, s32, 0xb00{{$}} @@ -69,8 +71,8 @@ entry: ; GCN-LABEL: {{^}}call_void_func_byval_struct_func: ; GCN: s_mov_b32 s5, s32 -; GCN: s_add_u32 s32, s32, 0xc00{{$}} -; GCN: v_writelane_b32 +; GCN-DAG: s_add_u32 s32, s32, 0xc00{{$}} +; GCN-DAG: v_writelane_b32 ; GCN-DAG: s_add_u32 s32, s32, 0x800{{$}} ; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9 @@ -101,11 +103,12 @@ entry: ; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:32 ; GCN: s_swappc_b64 -; GCN-NEXT: s_sub_u32 s32, s32, 0x800{{$}} - +; GCN-NOT: v_readlane_b32 s32 ; GCN: v_readlane_b32 +; GCN-NOT: v_readlane_b32 s32 -; GCN: s_sub_u32 s32, s32, 0xc00{{$}} +; GCN: s_sub_u32 s32, s32, 0x800{{$}} +; GCN-NEXT: s_sub_u32 s32, s32, 0xc00{{$}} ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 define void @call_void_func_byval_struct_func() #0 { diff --git a/test/CodeGen/AMDGPU/sibling-call.ll b/test/CodeGen/AMDGPU/sibling-call.ll index deb59d0866b..d5b56ace32c 100644 --- a/test/CodeGen/AMDGPU/sibling-call.ll +++ b/test/CodeGen/AMDGPU/sibling-call.ll @@ -93,8 +93,10 @@ define fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, i32* byval align 4 %arg1) ; Tail call disallowed with byval in parent. ; GCN-LABEL: {{^}}sibling_call_i32_fastcc_i32_byval_i32_byval_parent: +; GCN-NOT: v_writelane_b32 v{{[0-9]+}}, s32 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32 offset:4 ; GCN: s_swappc_b64 +; GCN-NOT: v_readlane_b32 s32 ; GCN: s_setpc_b64 define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32_byval_parent(i32 %a, i32* byval %b.byval, i32 %c) #1 { entry: -- 2.11.0