// baseline, since the LDM/POP instruction on Thumb doesn't take LR. This
// means if we need to reload LR, it takes extra instructions, which outweighs
// the value of the tail call; but here we don't know yet whether LR is going
- // to be used. We generate the tail call here and turn it back into CALL/RET
- // in emitEpilogue if LR is used.
+ // to be used. We take the optimistic approach of generating the tail call and
+ // perhaps taking a hit if we need to restore the LR.
// Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
// but we need to make sure there are enough registers; the only valid
return false;
}
+static void findTemporariesForLR(const BitVector &GPRsNoLRSP,
+ const BitVector &PopFriendly,
+ const LivePhysRegs &UsedRegs, unsigned &PopReg,
+ unsigned &TmpReg) {
+ PopReg = TmpReg = 0;
+ for (auto Reg : GPRsNoLRSP.set_bits()) {
+ if (!UsedRegs.contains(Reg)) {
+ // Remember the first pop-friendly register and exit.
+ if (PopFriendly.test(Reg)) {
+ PopReg = Reg;
+ TmpReg = 0;
+ break;
+ }
+ // Otherwise, remember that the register will be available to
+ // save a pop-friendly register.
+ TmpReg = Reg;
+ }
+ }
+}
+
bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
bool DoIt) const {
MachineFunction &MF = *MBB.getParent();
GPRsNoLRSP.reset(ARM::LR);
GPRsNoLRSP.reset(ARM::SP);
GPRsNoLRSP.reset(ARM::PC);
- for (unsigned Register : GPRsNoLRSP.set_bits()) {
- if (!UsedRegs.contains(Register)) {
- // Remember the first pop-friendly register and exit.
- if (PopFriendly.test(Register)) {
- PopReg = Register;
- TemporaryReg = 0;
- break;
- }
- // Otherwise, remember that the register will be available to
- // save a pop-friendly register.
- TemporaryReg = Register;
+ findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TemporaryReg);
+
+ // If we couldn't find a pop-friendly register, restore LR before popping the
+ // other callee-saved registers, so we can use one of them as a temporary.
+ bool UseLDRSP = false;
+ if (!PopReg && MBBI != MBB.begin()) {
+ auto PrevMBBI = MBBI;
+ PrevMBBI--;
+ if (PrevMBBI->getOpcode() == ARM::tPOP) {
+ MBBI = PrevMBBI;
+ UsedRegs.stepBackward(*MBBI);
+ findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TemporaryReg);
+ UseLDRSP = true;
}
}
assert((PopReg || TemporaryReg) && "Cannot get LR");
+ if (UseLDRSP) {
+ assert(PopReg && "Do not know how to get LR");
+ // Load the LR via LDR tmp, [SP, #off]
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRspi))
+ .addReg(PopReg, RegState::Define)
+ .addReg(ARM::SP)
+ .addImm(MBBI->getNumOperands() - 3)
+ .add(predOps(ARMCC::AL));
+ // Move from the temporary register to the LR.
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+ .addReg(ARM::LR, RegState::Define)
+ .addReg(PopReg, RegState::Kill)
+ .add(predOps(ARMCC::AL));
+ // Advance past the pop instruction.
+ MBBI++;
+ // Increment the SP.
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize + 4);
+ return true;
+ }
+
if (TemporaryReg) {
assert(!PopReg && "Unnecessary MOV is about to be inserted");
PopReg = PopFriendly.find_first();
if (Reg == ARM::LR) {
Info.setRestored(false);
- if (MBB.succ_empty()) {
- // Special epilogue for vararg functions. See emitEpilogue
- if (isVarArg)
- continue;
- // ARMv4T requires BX, see emitEpilogue
- if (!STI.hasV5TOps())
- continue;
- // Tailcall optimization failed; change TCRETURN to a tBL
- if (MI->getOpcode() == ARM::TCRETURNdi ||
- MI->getOpcode() == ARM::TCRETURNri) {
- unsigned Opcode = MI->getOpcode() == ARM::TCRETURNdi
- ? ARM::tBL : ARM::tBLXr;
- MachineInstrBuilder BL = BuildMI(MF, DL, TII.get(Opcode));
- BL.add(predOps(ARMCC::AL));
- BL.add(MI->getOperand(0));
- MBB.insert(MI, &*BL);
- }
- Reg = ARM::PC;
- (*MIB).setDesc(TII.get(ARM::tPOP_RET));
- if (MI != MBB.end())
- MIB.copyImplicitOps(*MI);
- MI = MBB.erase(MI);
- } else
+ if (!MBB.succ_empty() ||
+ MI->getOpcode() == ARM::TCRETURNdi ||
+ MI->getOpcode() == ARM::TCRETURNri)
// LR may only be popped into PC, as part of return sequence.
// If this isn't the return sequence, we'll need emitPopSpecialFixUp
// to restore LR the hard way.
+ // FIXME: if we don't pass any stack arguments it would be actually
+ // advantageous *and* correct to do the conversion to an ordinary call
+ // instruction here.
continue;
+ // Special epilogue for vararg functions. See emitEpilogue
+ if (isVarArg)
+ continue;
+ // ARMv4T requires BX, see emitEpilogue
+ if (!STI.hasV5TOps())
+ continue;
+
+ // Pop LR into PC.
+ Reg = ARM::PC;
+ (*MIB).setDesc(TII.get(ARM::tPOP_RET));
+ if (MI != MBB.end())
+ MIB.copyImplicitOps(*MI);
+ MI = MBB.erase(MI);
}
MIB.addReg(Reg, getDefRegState(true));
NeedsPop = true;
; --------
; Stack realignment means sp is restored from frame pointer
; CHECK-V4T: mov sp
+; CHECK-V4T-NEXT: ldr [[POP:r[4567]]], [sp, #{{.*}}]
+; CHECK-V4T-NEXT: mov lr, [[POP]]
; CHECK-V4T-NEXT: pop {[[SAVED]]}
+; CHECK-V4T-NEXT add sp, sp, #4
; The ISA for v4 does not support pop pc, so make sure we do not emit
; one even when we do not need to update SP.
; CHECK-V4T-NOT: pop {pc}
-; We may only use lo register to pop, but in that case, all the scratch
-; ones are used.
-; r12 is the only register we are allowed to clobber for AAPCS.
-; Use it to save a lo register.
-; CHECK-V4T-NEXT: mov [[TEMP_REG:r12]], [[POP_REG:r[0-7]]]
-; Pop the value of LR.
-; CHECK-V4T-NEXT: pop {[[POP_REG]]}
-; Copy the value of LR in the right register.
-; CHECK-V4T-NEXT: mov lr, [[POP_REG]]
-; Restore the value that was in the register we used to pop the value of LR.
-; CHECK-V4T-NEXT: mov [[POP_REG]], [[TEMP_REG]]
-; Return.
-; CHECK-V4T-NEXT: bx lr
+; CHECK-V4T: bx lr
; CHECK-V5T: pop {[[SAVED]], pc}
}
; Epilogue
; --------
-; CHECK-V4T: pop {[[SAVED]]}
-; CHECK-V4T-NEXT: mov r12, [[POP_REG:r[0-7]]]
-; CHECK-V4T-NEXT: pop {[[POP_REG]]}
-; CHECK-V4T-NEXT: add sp,
-; CHECK-V4T-NEXT: mov lr, [[POP_REG]]
-; CHECK-V4T-NEXT: mov [[POP_REG]], r12
-; CHECK-V4T: bx lr
+; CHECK-V4T: ldr [[POP:r[4567]]], [sp, #{{.*}}]
+; CHECK-V4T-NEXT: mov lr, [[POP]]
+; CHECK-V4T-NEXT: pop {[[SAVED]]}
+; CHECK-V4T-NEXT: add sp, #16
+; CHECK-V4T-NEXT: bx lr
; CHECK-V5T: lsls r4
; CHECK-V5T-NEXT: mov sp, r4
-; CHECK-V5T: pop {[[SAVED]]}
-; CHECK-V5T-NEXT: mov r12, [[POP_REG:r[0-7]]]
-; CHECK-V5T-NEXT: pop {[[POP_REG]]}
-; CHECK-V5T-NEXT: add sp,
-; CHECK-V5T-NEXT: mov lr, [[POP_REG]]
-; CHECK-V5T-NEXT: mov [[POP_REG]], r12
-; CHECK-V5T-NEXT: bx lr
+; CHECK-V5T: ldr [[POP:r[4567]]], [sp, #{{.*}}]
+; CHECK-V5T-NEXT: mov lr, [[POP]]
+; CHECK-V5T-NEXT: pop {[[SAVED]]}
+; CHECK-V5T-NEXT: add sp, #16
+; CHECK-V5T-NEXT: bx lr
}
; CHECK-V4T-LABEL: simpleframe
; RUN: llc %s -o - -mtriple=thumbv8m.base | FileCheck %s
-define void @test() {
-; CHECK-LABEL: test:
-entry:
- %call = tail call i32 @foo()
- %tail = tail call i32 @foo()
- ret void
-; CHECK: bl foo
-; CHECK: bl foo
-; CHECK-NOT: b foo
+declare i32 @g(...)
+
+declare i32 @h0(i32, i32, i32, i32)
+define hidden i32 @f0() {
+ %1 = tail call i32 bitcast (i32 (...)* @g to i32 ()*)()
+ %2 = tail call i32 @h0(i32 %1, i32 1, i32 2, i32 3)
+ ret i32 %2
+; CHECK-LABEL: f0
+; CHECK: ldr [[POP:r[4567]]], [sp
+; CHECK-NEXT: mov lr, [[POP]]
+; CHECK-NEXT: pop {{.*}}[[POP]]
+; CHECK-NEXT: add sp, #4
+; CHECK-NEXT: b h0
}
-define void @test2() {
-; CHECK-LABEL: test2:
-entry:
- %tail = tail call i32 @foo()
- ret void
-; CHECK: b foo
-; CHECK-NOT: bl foo
+declare i32 @h1(i32)
+define hidden i32 @f1() {
+ %1 = tail call i32 bitcast (i32 (...)* @g to i32 ()*)()
+ %2 = tail call i32 @h1(i32 %1)
+ ret i32 %2
+; CHECK-LABEL: f1
+; CHECK: pop {r7}
+; CHECK: pop {r1}
+; CHECK: mov lr, r1
+; CHECK: b h1
}
-declare i32 @foo()
+declare i32 @h2(i32, i32, i32, i32, i32)
+define hidden i32 @f2(i32, i32, i32, i32, i32) {
+ %6 = tail call i32 bitcast (i32 (...)* @g to i32 ()*)()
+ %7 = icmp eq i32 %6, 0
+ br i1 %7, label %10, label %8
+
+ %9 = tail call i32 @h2(i32 %6, i32 %1, i32 %2, i32 %3, i32 %4)
+ br label %10
+
+ %11 = phi i32 [ %9, %8 ], [ -1, %5 ]
+ ret i32 %11
+; CHECK-LABEL: f2
+; CHECK: ldr [[POP:r[4567]]], [sp
+; CHECK-NEXT: mov lr, [[POP]]
+; CHECK-NEXT: pop {{.*}}[[POP]]
+; CHECK-NEXT: add sp, #4
+; CHECK-NEXT: b h2
+}
; ENABLE: push {r4, lr}
; CHECK: tst r3, r4
-; ENABLE-NEXT: pop {r4}
-; ENABLE-NEXT: mov r12, r{{.*}}
-; ENABLE-NEXT: pop {r0}
-; ENABLE-NEXT: mov lr, r0
-; ENABLE-NEXT: mov r0, r12
+; ENABLE-NEXT: ldr [[POP:r[4567]]], [sp, #8]
+; ENABLE-NEXT: mov lr, [[POP]]
+; ENABLE-NEXT: pop {[[POP]]}
+; ENABLE-NEXT: add sp, #4
; CHECK-NEXT: beq [[EXIT_LABEL]]
; CHECK: str r1, [r2]