/// terminator instruction that has not been predicated.
virtual bool isUnpredicatedTerminator(const MachineInstr &MI) const;
+ /// Returns true if MI is an unconditional tail call.
+ virtual bool isUnconditionalTailCall(const MachineInstr &MI) const {
+ return false;
+ }
+
+ /// Returns true if the tail call can be made conditional on BranchCond.
+ virtual bool
+ canMakeTailCallConditional(SmallVectorImpl<MachineOperand> &Cond,
+ const MachineInstr &TailCall) const {
+ return false;
+ }
+
+ /// Replace the conditional branch in MBB with a conditional tail call.
+ virtual void replaceBranchWithTailCall(MachineBasicBlock &MBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ const MachineInstr &TailCall) const {
+ llvm_unreachable("Target didn't implement replaceBranchWithTailCall!");
+ }
+
/// Convert the instruction into a predicated instruction.
/// It returns true if the operation was successful.
virtual bool PredicateInstruction(MachineInstr &MI,
STATISTIC(NumBranchOpts, "Number of branches optimized");
STATISTIC(NumTailMerge , "Number of block tails merged");
STATISTIC(NumHoist , "Number of times common instructions are hoisted");
+STATISTIC(NumTailCalls, "Number of tail calls optimized");
static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
cl::init(cl::BOU_UNSET), cl::Hidden);
}
}
+ if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 &&
+ MF.getFunction()->optForSize()) {
+ // Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch
+ // direction, thereby defeating careful block placement and regressing
+ // performance. Therefore, only consider this for optsize functions.
+ MachineInstr &TailCall = *MBB->getFirstNonDebugInstr();
+ if (TII->isUnconditionalTailCall(TailCall)) {
+ MachineBasicBlock *Pred = *MBB->pred_begin();
+ MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
+ SmallVector<MachineOperand, 4> PredCond;
+ bool PredAnalyzable =
+ !TII->analyzeBranch(*Pred, PredTBB, PredFBB, PredCond, true);
+
+ if (PredAnalyzable && !PredCond.empty() && PredTBB == MBB) {
+ // The predecessor has a conditional branch to this block which consists
+ // of only a tail call. Try to fold the tail call into the conditional
+ // branch.
+ if (TII->canMakeTailCallConditional(PredCond, TailCall)) {
+ // TODO: It would be nice if analyzeBranch() could provide a pointer
+ // to the branch insturction so replaceBranchWithTailCall() doesn't
+ // have to search for it.
+ TII->replaceBranchWithTailCall(*Pred, PredCond, TailCall);
+ ++NumTailCalls;
+ Pred->removeSuccessor(MBB);
+ MadeChange = true;
+ return MadeChange;
+ }
+ }
+ // If the predecessor is falling through to this block, we could reverse
+ // the branch condition and fold the tail call into that. However, after
+ // that we might have to re-arrange the CFG to fall through to the other
+ // block and there is a high risk of regressing code size rather than
+ // improving it.
+ }
+ }
+
// Analyze the branch in the current block.
MachineBasicBlock *CurTBB = nullptr, *CurFBB = nullptr;
SmallVector<MachineOperand, 4> CurCond;
default:
return false;
case X86::TCRETURNdi:
+ case X86::TCRETURNdicc:
case X86::TCRETURNri:
case X86::TCRETURNmi:
case X86::TCRETURNdi64:
+ case X86::TCRETURNdi64cc:
case X86::TCRETURNri64:
case X86::TCRETURNmi64: {
bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64;
Offset = StackAdj - MaxTCDelta;
assert(Offset >= 0 && "Offset should never be negative");
+ if (Opcode == X86::TCRETURNdicc || Opcode == X86::TCRETURNdi64cc) {
+ assert(Offset == 0 && "Conditional tail call cannot adjust the stack.");
+ }
+
if (Offset) {
// Check for possible merge with preceding ADD instruction.
Offset += X86FL->mergeSPUpdates(MBB, MBBI, true);
// Jump to label or value in register.
bool IsWin64 = STI->isTargetWin64();
- if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdi64) {
+ if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdicc ||
+ Opcode == X86::TCRETURNdi64 || Opcode == X86::TCRETURNdi64cc) {
unsigned Op;
switch (Opcode) {
case X86::TCRETURNdi:
Op = X86::TAILJMPd;
break;
+ case X86::TCRETURNdicc:
+ Op = X86::TAILJMPd_CC;
+ break;
+ case X86::TCRETURNdi64cc:
+ assert(!IsWin64 && "Conditional tail calls confuse the Win64 unwinder.");
+ // TODO: We could do it for Win64 "leaf" functions though; PR30337.
+ Op = X86::TAILJMPd64_CC;
+ break;
default:
// Note: Win64 uses REX prefixes indirect jumps out of functions, but
// not direct ones.
MIB.addExternalSymbol(JumpTarget.getSymbolName(),
JumpTarget.getTargetFlags());
}
+ if (Op == X86::TAILJMPd_CC || Op == X86::TAILJMPd64_CC) {
+ MIB.addImm(MBBI->getOperand(2).getImm());
+ }
+
} else if (Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64) {
unsigned Op = (Opcode == X86::TCRETURNmi)
? X86::TAILJMPm
"jmp{l}\t{*}$dst", [], IIC_JMP_MEM>;
}
+// Conditional tail calls are similar to the above, but they are branches
+// rather than barriers, and they use EFLAGS.
+let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1,
+ isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
+ let Uses = [ESP, EFLAGS] in {
+ def TCRETURNdicc : PseudoI<(outs),
+ (ins i32imm_pcrel:$dst, i32imm:$offset, i32imm:$cond), []>;
+
+ // This gets substituted to a conditional jump instruction in MC lowering.
+ def TAILJMPd_CC : Ii32PCRel<0x80, RawFrm, (outs),
+ (ins i32imm_pcrel:$dst, i32imm:$cond),
+ "",
+ [], IIC_JMP_REL>;
+}
+
//===----------------------------------------------------------------------===//
// Call Instructions...
"rex64 jmp{q}\t{*}$dst", [], IIC_JMP_MEM>;
}
}
+
+// Conditional tail calls are similar to the above, but they are branches
+// rather than barriers, and they use EFLAGS.
+let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1,
+ isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
+ let Uses = [RSP, EFLAGS] in {
+ def TCRETURNdi64cc : PseudoI<(outs),
+ (ins i64i32imm_pcrel:$dst, i32imm:$offset,
+ i32imm:$cond), []>;
+
+ // This gets substituted to a conditional jump instruction in MC lowering.
+ def TAILJMPd64_CC : Ii32PCRel<0x80, RawFrm, (outs),
+ (ins i64i32imm_pcrel:$dst, i32imm:$cond),
+ "",
+ [], IIC_JMP_REL>;
+}
return !isPredicated(MI);
}
+bool X86InstrInfo::isUnconditionalTailCall(const MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
+ case X86::TCRETURNdi:
+ case X86::TCRETURNri:
+ case X86::TCRETURNmi:
+ case X86::TCRETURNdi64:
+ case X86::TCRETURNri64:
+ case X86::TCRETURNmi64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool X86InstrInfo::canMakeTailCallConditional(
+ SmallVectorImpl<MachineOperand> &BranchCond,
+ const MachineInstr &TailCall) const {
+ if (TailCall.getOpcode() != X86::TCRETURNdi &&
+ TailCall.getOpcode() != X86::TCRETURNdi64) {
+ // Only direct calls can be done with a conditional branch.
+ return false;
+ }
+
+ if (Subtarget.isTargetWin64()) {
+ // Conditional tail calls confuse the Win64 unwinder.
+ // TODO: Allow them for "leaf" functions; PR30337.
+ return false;
+ }
+
+ assert(BranchCond.size() == 1);
+ if (BranchCond[0].getImm() > X86::LAST_VALID_COND) {
+ // Can't make a conditional tail call with this condition.
+ return false;
+ }
+
+ const X86MachineFunctionInfo *X86FI =
+ TailCall.getParent()->getParent()->getInfo<X86MachineFunctionInfo>();
+ if (X86FI->getTCReturnAddrDelta() != 0 ||
+ TailCall.getOperand(1).getImm() != 0) {
+ // A conditional tail call cannot do any stack adjustment.
+ return false;
+ }
+
+ return true;
+}
+
+void X86InstrInfo::replaceBranchWithTailCall(
+ MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &BranchCond,
+ const MachineInstr &TailCall) const {
+ assert(canMakeTailCallConditional(BranchCond, TailCall));
+
+ MachineBasicBlock::iterator I = MBB.end();
+ while (I != MBB.begin()) {
+ --I;
+ if (I->isDebugValue())
+ continue;
+ if (!I->isBranch())
+ assert(0 && "Can't find the branch to replace!");
+
+ X86::CondCode CC = getCondFromBranchOpc(I->getOpcode());
+ assert(BranchCond.size() == 1);
+ if (CC != BranchCond[0].getImm())
+ continue;
+
+ break;
+ }
+
+ unsigned Opc = TailCall.getOpcode() == X86::TCRETURNdi ? X86::TCRETURNdicc
+ : X86::TCRETURNdi64cc;
+
+ auto MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opc));
+ MIB->addOperand(TailCall.getOperand(0)); // Destination.
+ MIB.addImm(0); // Stack offset (not used).
+ MIB->addOperand(BranchCond[0]); // Condition.
+ MIB.copyImplicitOps(TailCall); // Regmask and (imp-used) parameters.
+
+ // Add implicit uses and defs of all live regs potentially clobbered by the
+ // call. This way they still appear live across the call.
+ LivePhysRegs LiveRegs(&getRegisterInfo());
+ LiveRegs.addLiveOuts(MBB);
+ SmallVector<std::pair<unsigned, const MachineOperand *>, 8> Clobbers;
+ LiveRegs.stepForward(*MIB, Clobbers);
+ for (const auto &C : Clobbers) {
+ MIB.addReg(C.first, RegState::Implicit);
+ MIB.addReg(C.first, RegState::Implicit | RegState::Define);
+ }
+
+ I->eraseFromParent();
+}
+
// Given a MBB and its TBB, find the FBB which was a fallthrough MBB (it may
// not be a fallthrough MBB now due to layout changes). Return nullptr if the
// fallthrough MBB cannot be identified.
// Branch analysis.
bool isUnpredicatedTerminator(const MachineInstr &MI) const override;
+ bool isUnconditionalTailCall(const MachineInstr &MI) const override;
+ bool canMakeTailCallConditional(SmallVectorImpl<MachineOperand> &Cond,
+ const MachineInstr &TailCall) const override;
+ void replaceBranchWithTailCall(MachineBasicBlock &MBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ const MachineInstr &TailCall) const override;
+
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
break;
}
- // TAILJMPd, TAILJMPd64 - Lower to the correct jump instruction.
+ // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump instruction.
{ unsigned Opcode;
case X86::TAILJMPr: Opcode = X86::JMP32r; goto SetTailJmpOpcode;
case X86::TAILJMPd:
case X86::TAILJMPd64: Opcode = X86::JMP_1; goto SetTailJmpOpcode;
+ case X86::TAILJMPd_CC:
+ case X86::TAILJMPd64_CC:
+ Opcode = X86::GetCondBranchFromCond(
+ static_cast<X86::CondCode>(MI->getOperand(1).getImm()));
+ goto SetTailJmpOpcode;
SetTailJmpOpcode:
MCOperand Saved = OutMI.getOperand(0);
case X86::TAILJMPr:
case X86::TAILJMPm:
case X86::TAILJMPd:
+ case X86::TAILJMPd_CC:
case X86::TAILJMPr64:
case X86::TAILJMPm64:
case X86::TAILJMPd64:
+ case X86::TAILJMPd64_CC:
case X86::TAILJMPr64_REX:
case X86::TAILJMPm64_REX:
// Lower these as normal, but add some comments.
--- /dev/null
+; RUN: llc < %s -mtriple=i686-linux -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK32
+; RUN: llc < %s -mtriple=x86_64-linux -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK64
+
+declare void @foo()
+declare void @bar()
+
+define void @f(i32 %x, i32 %y) optsize {
+entry:
+ %p = icmp eq i32 %x, %y
+ br i1 %p, label %bb1, label %bb2
+bb1:
+ tail call void @foo()
+ ret void
+bb2:
+ tail call void @bar()
+ ret void
+
+; CHECK-LABEL: f:
+; CHECK: cmp
+; CHECK: jne bar
+; Check that the asm doesn't just look good, but uses the correct encoding.
+; CHECK: encoding: [0x75,A]
+; CHECK: jmp foo
+}
+
+
+declare x86_thiscallcc zeroext i1 @baz(i8*, i32)
+define x86_thiscallcc zeroext i1 @BlockPlacementTest(i8* %this, i32 %x) optsize {
+entry:
+ %and = and i32 %x, 42
+ %tobool = icmp eq i32 %and, 0
+ br i1 %tobool, label %land.end, label %land.rhs
+
+land.rhs:
+ %and6 = and i32 %x, 44
+ %tobool7 = icmp eq i32 %and6, 0
+ br i1 %tobool7, label %lor.rhs, label %land.end
+
+lor.rhs:
+ %call = tail call x86_thiscallcc zeroext i1 @baz(i8* %this, i32 %x) #2
+ br label %land.end
+
+land.end:
+ %0 = phi i1 [ false, %entry ], [ true, %land.rhs ], [ %call, %lor.rhs ]
+ ret i1 %0
+
+; Make sure machine block placement isn't confused by the conditional tail call,
+; but sees that it can fall through to the next block.
+; CHECK-LABEL: BlockPlacementTest
+; CHECK: je baz
+; CHECK-NOT: xor
+; CHECK: ret
+}
+
+
+
+%"class.std::basic_string" = type { %"struct.std::basic_string<char, std::char_traits<char>, std::allocator<char> >::_Alloc_hider" }
+%"struct.std::basic_string<char, std::char_traits<char>, std::allocator<char> >::_Alloc_hider" = type { i8* }
+declare zeroext i1 @_Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_(i8*, i8*)
+
+define zeroext i1 @pr31257(%"class.std::basic_string"* nocapture readonly dereferenceable(8) %s) minsize {
+; CHECK-LABEL: pr31257
+entry:
+ %_M_p.i.i = getelementptr inbounds %"class.std::basic_string", %"class.std::basic_string"* %s, i64 0, i32 0, i32 0
+ %0 = load i8*, i8** %_M_p.i.i, align 8
+ %arrayidx.i.i.i54 = getelementptr inbounds i8, i8* %0, i64 -24
+ %_M_length.i.i55 = bitcast i8* %arrayidx.i.i.i54 to i64*
+ %1 = load i64, i64* %_M_length.i.i55, align 8
+ %add.ptr.i56 = getelementptr inbounds i8, i8* %0, i64 %1
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %it.sroa.0.0 = phi i8* [ %0, %entry ], [ %incdec.ptr.i, %for.inc ]
+ %state.0 = phi i32 [ 0, %entry ], [ %state.1, %for.inc ]
+ %cmp.i = icmp eq i8* %it.sroa.0.0, %add.ptr.i56
+ br i1 %cmp.i, label %5, label %for.body
+
+for.body: ; preds = %for.cond
+ switch i32 %state.0, label %for.inc [
+ i32 0, label %sw.bb
+ i32 1, label %sw.bb14
+ i32 2, label %sw.bb22
+ ]
+
+sw.bb: ; preds = %for.body
+ %2 = load i8, i8* %it.sroa.0.0, align 1
+ switch i8 %2, label %if.else [
+ i8 43, label %for.inc
+ i8 45, label %for.inc
+ ]
+
+if.else: ; preds = %sw.bb
+ %conv9 = zext i8 %2 to i32
+ %isdigittmp45 = add nsw i32 %conv9, -48
+ %isdigit46 = icmp ult i32 %isdigittmp45, 10
+ br i1 %isdigit46, label %for.inc, label %cleanup.thread.loopexit
+
+sw.bb14: ; preds = %for.body
+ %3 = load i8, i8* %it.sroa.0.0, align 1
+ %conv16 = zext i8 %3 to i32
+ %isdigittmp43 = add nsw i32 %conv16, -48
+ %isdigit44 = icmp ult i32 %isdigittmp43, 10
+ br i1 %isdigit44, label %for.inc, label %cleanup.thread.loopexit
+
+sw.bb22: ; preds = %for.body
+ %4 = load i8, i8* %it.sroa.0.0, align 1
+ %conv24 = zext i8 %4 to i32
+ %isdigittmp = add nsw i32 %conv24, -48
+ %isdigit = icmp ult i32 %isdigittmp, 10
+ br i1 %isdigit, label %for.inc, label %if.else28
+
+; Make sure Machine Copy Propagation doesn't delete the mov to %ecx becaue it
+; thinks the conditional tail call clobbers it.
+; CHECK64-LABEL: .LBB2_11:
+; CHECK64: movzbl (%rdi), %ecx
+; CHECK64-NEXT: addl $-48, %ecx
+; CHECK64-NEXT: cmpl $10, %ecx
+; CHECK64-NEXT: movl %r9d, %ecx
+; CHECK64-NEXT: jae _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEE
+
+if.else28: ; preds = %sw.bb22
+ %call34 = tail call zeroext i1 @_Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_(i8* nonnull %it.sroa.0.0, i8* %add.ptr.i56)
+ br label %cleanup.thread
+
+for.inc: ; preds = %sw.bb, %sw.bb, %sw.bb22, %sw.bb14, %if.else, %for.body
+ %state.1 = phi i32 [ %state.0, %for.body ], [ 1, %sw.bb ], [ 2, %if.else ], [ 2, %sw.bb14 ], [ 2, %sw.bb22 ], [ 1, %sw.bb ]
+ %incdec.ptr.i = getelementptr inbounds i8, i8* %it.sroa.0.0, i64 1
+ br label %for.cond
+
+; <label>:5: ; preds = %for.cond
+ %cmp37 = icmp eq i32 %state.0, 2
+ br label %cleanup.thread
+
+cleanup.thread.loopexit: ; preds = %if.else, %sw.bb14
+ br label %cleanup.thread
+
+cleanup.thread: ; preds = %cleanup.thread.loopexit, %if.else28, %5
+ %6 = phi i1 [ %cmp37, %5 ], [ %call34, %if.else28 ], [ false, %cleanup.thread.loopexit ]
+ ret i1 %6
+}
; CHECK-LABEL: test2_1:
; CHECK: movzbl
; CHECK: cmpl $256
-; CHECK: jne .LBB
-; CHECK: jmp bar
+; CHECK: je bar
define void @test2_1(i32 %X) nounwind minsize {
entry:
%and = and i32 %X, 255
; CHECK-LABEL: test_sext_i8_icmp_255:
; CHECK: movb $1,
; CHECK: testb
-; CHECK: jne .LBB
-; CHECK: jmp bar
+; CHECK: je bar
define void @test_sext_i8_icmp_255(i8 %x) nounwind minsize {
entry:
%sext = sext i8 %x to i32
--- /dev/null
+# RUN: llc -mtriple x86_64-- -verify-machineinstrs -run-pass branch-folder -o - %s | FileCheck %s
+
+# Check the TCRETURNdi64cc optimization.
+
+--- |
+ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+ define i64 @test(i64 %arg, i8* %arg1) optsize {
+ %tmp = icmp ult i64 %arg, 100
+ br i1 %tmp, label %1, label %4
+
+ %tmp3 = icmp ult i64 %arg, 10
+ br i1 %tmp3, label %2, label %3
+
+ %tmp5 = tail call i64 @f1(i8* %arg1, i64 %arg)
+ ret i64 %tmp5
+
+ %tmp7 = tail call i64 @f2(i8* %arg1, i64 %arg)
+ ret i64 %tmp7
+
+ ret i64 123
+ }
+
+ declare i64 @f1(i8*, i64)
+ declare i64 @f2(i8*, i64)
+
+...
+---
+name: test
+tracksRegLiveness: true
+liveins:
+ - { reg: '%rdi' }
+ - { reg: '%rsi' }
+body: |
+ bb.0:
+ successors: %bb.1, %bb.4
+ liveins: %rdi, %rsi
+
+ %rax = COPY %rdi
+ CMP64ri8 %rax, 99, implicit-def %eflags
+ JA_1 %bb.4, implicit %eflags
+ JMP_1 %bb.1
+
+ ; CHECK: bb.1:
+ ; CHECK-NEXT: successors: %bb.2({{[^)]+}}){{$}}
+ ; CHECK-NEXT: liveins: %rax, %rsi
+ ; CHECK-NEXT: {{^ $}}
+ ; CHECK-NEXT: %rdi = COPY %rsi
+ ; CHECK-NEXT: %rsi = COPY %rax
+ ; CHECK-NEXT: CMP64ri8 %rax, 9, implicit-def %eflags
+ ; CHECK-NEXT: TCRETURNdi64cc @f1, 0, 3, csr_64, implicit %rsp, implicit %eflags, implicit %rsp, implicit %rdi, implicit %rsi, implicit %rax, implicit-def %rax, implicit %sil, implicit-def %sil, implicit %si, implicit-def %si, implicit %esi, implicit-def %esi, implicit %rsi, implicit-def %rsi, implicit %dil, implicit-def %dil, implicit %di, implicit-def %di, implicit %edi, implicit-def %edi, implicit %rdi, implicit-def %rdi, implicit %ah, implicit-def %ah, implicit %al, implicit-def %al, implicit %ax, implicit-def %ax, implicit %eax, implicit-def %eax
+
+ bb.1:
+ successors: %bb.2, %bb.3
+ liveins: %rax, %rsi
+
+ CMP64ri8 %rax, 9, implicit-def %eflags
+ JA_1 %bb.3, implicit %eflags
+ JMP_1 %bb.2
+
+ bb.2:
+ liveins: %rax, %rsi
+
+ %rdi = COPY %rsi
+ %rsi = COPY %rax
+
+ TCRETURNdi64 @f1, 0, csr_64, implicit %rsp, implicit %rdi, implicit %rsi
+
+ ; CHECK: bb.2:
+ ; CHECK-NEXT: liveins: %rax, %rdi, %rsi
+ ; CHECK-NEXT: {{^ $}}
+ ; CHECK-NEXT: TCRETURNdi64 @f2, 0, csr_64, implicit %rsp, implicit %rdi, implicit %rsi
+
+ bb.3:
+ liveins: %rax, %rsi
+
+ %rdi = COPY %rsi
+ %rsi = COPY %rax
+ TCRETURNdi64 @f2, 0, csr_64, implicit %rsp, implicit %rdi, implicit %rsi
+
+ bb.4:
+ dead %eax = MOV32ri64 123, implicit-def %rax
+ RET 0, %rax
+
+...