From 5d8c984e5463a26de279b4a0d341a7472fdc7f83 Mon Sep 17 00:00:00 2001 From: Pavel Chupin Date: Thu, 7 Aug 2014 09:41:19 +0000 Subject: [PATCH] [x32] Use ebp/esp as frame and stack pointer Summary: Since pointers are 32-bit on x32 we can use ebp and esp as frame and stack pointer. Some operations like PUSH/POP and CFI_INSTRUCTION still require 64-bit register, so using 64-bit MachineFramePtr where required. X86_64 NaCl uses 64-bit frame/stack pointers, however it's been found that both isTarget64BitLP64 and isTarget64BitILP32 are true for NaCl. Addressing this issue here as well by making isTarget64BitLP64 false. Also mark hasReservedSpillSlot unreachable on X86. See inlined comments. Test Plan: Add one new simple test and upgrade 2 existing with x32 target case. Reviewers: nadav, dschuff Subscribers: llvm-commits, zinovy.nis Differential Revision: http://reviews.llvm.org/D4617 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@215091 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 63 ++++++++++++++------------ lib/Target/X86/X86ISelLowering.cpp | 3 +- lib/Target/X86/X86RegisterInfo.cpp | 16 +++---- lib/Target/X86/X86Subtarget.h | 3 +- test/CodeGen/X86/alloca-align-rounding.ll | 19 +++++++- test/CodeGen/X86/frameaddr.ll | 15 ++++++ test/CodeGen/X86/x86-64-stack-and-frame-ptr.ll | 34 ++++++++++++++ 7 files changed, 111 insertions(+), 42 deletions(-) create mode 100644 test/CodeGen/X86/x86-64-stack-and-frame-ptr.ll diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index f17e5a34e50..d9d23f83042 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -148,32 +148,32 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, static void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, unsigned StackPtr, int64_t NumBytes, - bool Is64Bit, bool IsLP64, bool UseLEA, + bool Is64BitTarget, bool Is64BitStackPtr, bool UseLEA, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) { bool isSub = NumBytes < 0; uint64_t Offset = isSub ? -NumBytes : NumBytes; unsigned Opc; if (UseLEA) - Opc = getLEArOpcode(IsLP64); + Opc = getLEArOpcode(Is64BitStackPtr); else Opc = isSub - ? getSUBriOpcode(IsLP64, Offset) - : getADDriOpcode(IsLP64, Offset); + ? getSUBriOpcode(Is64BitStackPtr, Offset) + : getADDriOpcode(Is64BitStackPtr, Offset); uint64_t Chunk = (1LL << 31) - 1; DebugLoc DL = MBB.findDebugLoc(MBBI); while (Offset) { uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset; - if (ThisVal == (Is64Bit ? 8 : 4)) { + if (ThisVal == (Is64BitTarget ? 8 : 4)) { // Use push / pop instead. unsigned Reg = isSub - ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX) - : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit); + ? (unsigned)(Is64BitTarget ? X86::RAX : X86::EAX) + : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64BitTarget); if (Reg) { Opc = isSub - ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r) - : (Is64Bit ? X86::POP64r : X86::POP32r); + ? (Is64BitTarget ? X86::PUSH64r : X86::PUSH32r) + : (Is64BitTarget ? X86::POP64r : X86::POP32r); MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc)) .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub)); if (isSub) @@ -449,7 +449,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { bool HasFP = hasFP(MF); const X86Subtarget &STI = MF.getTarget().getSubtarget(); bool Is64Bit = STI.is64Bit(); - bool IsLP64 = STI.isTarget64BitLP64(); + // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit. + const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64(); bool IsWin64 = STI.isTargetWin64(); bool IsWinEH = MF.getTarget().getMCAsmInfo()->getExceptionHandlingType() == @@ -461,6 +462,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { unsigned StackAlign = getStackAlignment(); unsigned SlotSize = RegInfo->getSlotSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); + const unsigned MachineFramePtr = STI.isTarget64BitILP32() ? + getX86SubSuperRegister(FramePtr, MVT::i64, false) : FramePtr; unsigned StackPtr = RegInfo->getStackRegister(); unsigned BasePtr = RegInfo->getBaseRegister(); DebugLoc DL; @@ -507,7 +510,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { if (TailCallReturnAddrDelta < 0) { MachineInstr *MI = BuildMI(MBB, MBBI, DL, - TII.get(getSUBriOpcode(IsLP64, -TailCallReturnAddrDelta)), + TII.get(getSUBriOpcode(Uses64BitFramePtr, -TailCallReturnAddrDelta)), StackPtr) .addReg(StackPtr) .addImm(-TailCallReturnAddrDelta) @@ -551,7 +554,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // Save EBP/RBP into the appropriate stack slot. BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) - .addReg(FramePtr, RegState::Kill) + .addReg(MachineFramePtr, RegState::Kill) .setMIFlag(MachineInstr::FrameSetup); if (NeedsDwarfCFI) { @@ -564,7 +567,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { .addCFIIndex(CFIIndex); // Change the rule for the FramePtr to be an "offset" rule. - unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(FramePtr, true); + unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(MachineFramePtr, true); CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, DwarfFramePtr, 2 * stackGrowth)); @@ -580,14 +583,14 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // Update EBP with the new base value. BuildMI(MBB, MBBI, DL, - TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr) + TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), FramePtr) .addReg(StackPtr) .setMIFlag(MachineInstr::FrameSetup); if (NeedsDwarfCFI) { // Mark effective beginning of when frame pointer becomes valid. // Define the current CFA to use the EBP/RBP register. - unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(FramePtr, true); + unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(MachineFramePtr, true); unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr)); BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) @@ -596,7 +599,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // Mark the FramePtr as live-in in every block. for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) - I->addLiveIn(FramePtr); + I->addLiveIn(MachineFramePtr); } else { NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); } @@ -635,7 +638,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { assert(HasFP && "There should be a frame pointer if stack is realigned."); MachineInstr *MI = BuildMI(MBB, MBBI, DL, - TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), StackPtr) + TII.get(Uses64BitFramePtr ? X86::AND64ri32 : X86::AND32ri), StackPtr) .addReg(StackPtr) .addImm(-MaxAlign) .setMIFlag(MachineInstr::FrameSetup); @@ -730,7 +733,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { MBB.insert(MBBI, MI); } } else if (NumBytes) { - emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, IsLP64, + emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, Uses64BitFramePtr, UseLEA, TII, *RegInfo); } @@ -804,7 +807,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // to reference locals. if (RegInfo->hasBasePointer(MF)) { // Update the base pointer with the current stack pointer. - unsigned Opc = Is64Bit ? X86::MOV64rr : X86::MOV32rr; + unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr; BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr) .addReg(StackPtr) .setMIFlag(MachineInstr::FrameSetup); @@ -842,11 +845,15 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, DebugLoc DL = MBBI->getDebugLoc(); const X86Subtarget &STI = MF.getTarget().getSubtarget(); bool Is64Bit = STI.is64Bit(); - bool IsLP64 = STI.isTarget64BitLP64(); + // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit. + const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64(); + const bool Is64BitILP32 = STI.isTarget64BitILP32(); bool UseLEA = STI.useLeaForSP(); unsigned StackAlign = getStackAlignment(); unsigned SlotSize = RegInfo->getSlotSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); + unsigned MachineFramePtr {Is64BitILP32 ? + getX86SubSuperRegister(FramePtr, MVT::i64, false) : FramePtr}; unsigned StackPtr = RegInfo->getStackRegister(); bool IsWinEH = @@ -903,7 +910,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, // Pop EBP. BuildMI(MBB, MBBI, DL, - TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr); + TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr); } else { NumBytes = StackSize - CSSize; } @@ -935,19 +942,19 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, if (RegInfo->needsStackRealignment(MF)) MBBI = FirstCSPop; if (CSSize != 0) { - unsigned Opc = getLEArOpcode(IsLP64); + unsigned Opc = getLEArOpcode(Uses64BitFramePtr); addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr), FramePtr, false, -CSSize); --MBBI; } else { - unsigned Opc = (Is64Bit ? X86::MOV64rr : X86::MOV32rr); + unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr); BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) .addReg(FramePtr); --MBBI; } } else if (NumBytes) { // Adjust stack pointer back: ESP += numbytes. - emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, IsLP64, UseLEA, + emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, Uses64BitFramePtr, UseLEA, TII, *RegInfo); --MBBI; } @@ -967,7 +974,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, MachineOperand &DestAddr = MBBI->getOperand(0); assert(DestAddr.isReg() && "Offset should be in register!"); BuildMI(MBB, MBBI, DL, - TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), + TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), StackPtr).addReg(DestAddr.getReg()); } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNmi || @@ -993,7 +1000,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, if (Offset) { // Check for possible merge with preceding ADD instruction. Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true); - emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, IsLP64, + emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, Uses64BitFramePtr, UseLEA, TII, *RegInfo); } @@ -1038,7 +1045,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, // Check for possible merge with preceding ADD instruction. delta += mergeSPUpdates(MBB, MBBI, StackPtr, true); - emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, IsLP64, UseLEA, TII, + emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, Uses64BitFramePtr, UseLEA, TII, *RegInfo); } } @@ -1124,7 +1131,7 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots( // about avoiding it later. unsigned FPReg = RegInfo->getFrameRegister(MF); for (unsigned i = 0; i < CSI.size(); ++i) { - if (CSI[i].getReg() == FPReg) { + if (TRI->regsOverlap(CSI[i].getReg(),FPReg)) { CSI.erase(CSI.begin() + i); break; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 38cb996b8d7..c4bcc046967 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -659,8 +659,7 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ? - MVT::i64 : MVT::i32, Custom); + setOperationAction(ISD::DYNAMIC_STACKALLOC, getPointerTy(), Custom); if (!TM.Options.UseSoftFloat && X86ScalarSSEf64) { // f32 and f64 use SSE. diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 5b19b1237cc..97f86e2760d 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -68,8 +68,10 @@ X86RegisterInfo::X86RegisterInfo(const X86Subtarget &STI) if (Is64Bit) { SlotSize = 8; - StackPtr = X86::RSP; - FramePtr = X86::RBP; + StackPtr = (Subtarget.isTarget64BitLP64() || Subtarget.isTargetNaCl64()) ? + X86::RSP : X86::ESP; + FramePtr = (Subtarget.isTarget64BitLP64() || Subtarget.isTargetNaCl64()) ? + X86::RBP : X86::EBP; } else { SlotSize = 4; StackPtr = X86::ESP; @@ -459,13 +461,9 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, int &FrameIdx) const { - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); - - if (Reg == FramePtr && TFI->hasFP(MF)) { - FrameIdx = MF.getFrameInfo()->getObjectIndexBegin(); - return true; - } - return false; + // Since X86 defines assignCalleeSavedSpillSlots which always return true + // this function neither used nor tested. + llvm_unreachable("Unused function on X86. Otherwise need a test case."); } void diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 566f25a24b0..75e8ae5dc2b 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -312,7 +312,8 @@ public: /// Is this x86_64 with the LP64 programming model (standard AMD64, no x32)? bool isTarget64BitLP64() const { - return In64BitMode && (TargetTriple.getEnvironment() != Triple::GNUX32); + return In64BitMode && (TargetTriple.getEnvironment() != Triple::GNUX32 && + TargetTriple.getOS() != Triple::NaCl); } PICStyles::Style getPICStyle() const { return PICStyle; } diff --git a/test/CodeGen/X86/alloca-align-rounding.ll b/test/CodeGen/X86/alloca-align-rounding.ll index 74b9470db75..9d8b6cfa673 100644 --- a/test/CodeGen/X86/alloca-align-rounding.ll +++ b/test/CodeGen/X86/alloca-align-rounding.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux -enable-misched=false | FileCheck %s +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-linux-gnux32 -enable-misched=false | FileCheck %s -check-prefix=X32ABI declare void @bar(<2 x i64>* %n) @@ -6,15 +7,29 @@ define void @foo(i64 %h) { %p = alloca <2 x i64>, i64 %h call void @bar(<2 x i64>* %p) ret void -; CHECK: foo +; CHECK-LABEL: foo ; CHECK-NOT: andq $-32, %rax +; X32ABI-LABEL: foo +; X32ABI-NOT: andl $-32, %eax } define void @foo2(i64 %h) { %p = alloca <2 x i64>, i64 %h, align 32 call void @bar(<2 x i64>* %p) ret void -; CHECK: foo2 +; CHECK-LABEL: foo2 ; CHECK: andq $-32, %rsp ; CHECK: andq $-32, %rax +; X32ABI-LABEL: foo2 +; X32ABI: andl $-32, %esp +; X32ABI: andl $-32, %eax +} + +define void @foo3(i64 %h) { + %p = alloca <2 x i64>, i64 %h + ret void +; CHECK-LABEL: foo3 +; CHECK: movq %rbp, %rsp +; X32ABI-LABEL: foo3 +; X32ABI: movl %ebp, %esp } diff --git a/test/CodeGen/X86/frameaddr.ll b/test/CodeGen/X86/frameaddr.ll index 6c1ca252bb9..452c8e5a21a 100644 --- a/test/CodeGen/X86/frameaddr.ll +++ b/test/CodeGen/X86/frameaddr.ll @@ -2,6 +2,8 @@ ; RUN: llc < %s -march=x86 -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=CHECK-32 ; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=CHECK-64 ; RUN: llc < %s -march=x86-64 -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=CHECK-64 +; RUN: llc < %s -mtriple=x86_64-gnux32 | FileCheck %s --check-prefix=CHECK-X32ABI +; RUN: llc < %s -mtriple=x86_64-gnux32 -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=CHECK-X32ABI define i8* @test1() nounwind { entry: @@ -17,6 +19,12 @@ entry: ; CHECK-64-NEXT: movq %rbp, %rax ; CHECK-64-NEXT: pop ; CHECK-64-NEXT: ret +; CHECK-X32ABI-LABEL: test1 +; CHECK-X32ABI: pushq %rbp +; CHECK-X32ABI-NEXT: movl %esp, %ebp +; CHECK-X32ABI-NEXT: movl %ebp, %eax +; CHECK-X32ABI-NEXT: popq %rbp +; CHECK-X32ABI-NEXT: ret %0 = tail call i8* @llvm.frameaddress(i32 0) ret i8* %0 } @@ -37,6 +45,13 @@ entry: ; CHECK-64-NEXT: movq (%rax), %rax ; CHECK-64-NEXT: pop ; CHECK-64-NEXT: ret +; CHECK-X32ABI-LABEL: test2 +; CHECK-X32ABI: pushq %rbp +; CHECK-X32ABI-NEXT: movl %esp, %ebp +; CHECK-X32ABI-NEXT: movl (%ebp), %eax +; CHECK-X32ABI-NEXT: movl (%eax), %eax +; CHECK-X32ABI-NEXT: popq %rbp +; CHECK-X32ABI-NEXT: ret %0 = tail call i8* @llvm.frameaddress(i32 2) ret i8* %0 } diff --git a/test/CodeGen/X86/x86-64-stack-and-frame-ptr.ll b/test/CodeGen/X86/x86-64-stack-and-frame-ptr.ll new file mode 100644 index 00000000000..c476ffd8405 --- /dev/null +++ b/test/CodeGen/X86/x86-64-stack-and-frame-ptr.ll @@ -0,0 +1,34 @@ +; RUN: llc -mtriple=x86_64-pc-linux < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-pc-linux-gnux32 < %s | FileCheck -check-prefix=X32ABI %s +; RUN: llc -mtriple=x86_64-pc-nacl < %s | FileCheck -check-prefix=NACL %s + +; x32 uses %esp, %ebp as stack and frame pointers + +; CHECK-LABEL: foo +; CHECK: pushq %rbp +; CHECK: movq %rsp, %rbp +; CHECK: movq %rdi, -8(%rbp) +; CHECK: popq %rbp +; X32ABI-LABEL: foo +; X32ABI: pushq %rbp +; X32ABI: movl %esp, %ebp +; X32ABI: movl %edi, -4(%ebp) +; X32ABI: popq %rbp +; NACL-LABEL: foo +; NACL: pushq %rbp +; NACL: movq %rsp, %rbp +; NACL: movl %edi, -4(%rbp) +; NACL: popq %rbp + + +define void @foo(i32* %a) #0 { +entry: + %a.addr = alloca i32*, align 4 + %b = alloca i32*, align 4 + store i32* %a, i32** %a.addr, align 4 + ret void +} + +attributes #0 = { nounwind uwtable "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"} + + -- 2.11.0