+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-linux -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK32
; RUN: llc < %s -mtriple=x86_64-linux -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK64
; RUN: llc < %s -mtriple=x86_64-win32 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=WIN64
declare void @bar()
define void @f(i32 %x, i32 %y) optsize {
+; CHECK32-LABEL: f:
+; CHECK32: # %bb.0: # %entry
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; CHECK32-NEXT: cmpl {{[0-9]+}}(%esp), %eax # encoding: [0x3b,0x44,0x24,0x08]
+; CHECK32-NEXT: jne bar # TAILCALL
+; CHECK32-NEXT: # encoding: [0x75,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1
+; CHECK32-NEXT: # %bb.1: # %bb1
+; CHECK32-NEXT: jmp foo # TAILCALL
+; CHECK32-NEXT: # encoding: [0xeb,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1
+;
+; CHECK64-LABEL: f:
+; CHECK64: # %bb.0: # %entry
+; CHECK64-NEXT: cmpl %esi, %edi # encoding: [0x39,0xf7]
+; CHECK64-NEXT: jne bar # TAILCALL
+; CHECK64-NEXT: # encoding: [0x75,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1
+; CHECK64-NEXT: # %bb.1: # %bb1
+; CHECK64-NEXT: jmp foo # TAILCALL
+; CHECK64-NEXT: # encoding: [0xeb,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1
+;
+; WIN64-LABEL: f:
+; WIN64: # %bb.0: # %entry
+; WIN64-NEXT: cmpl %edx, %ecx # encoding: [0x39,0xd1]
+; WIN64-NEXT: jne bar # TAILCALL
+; WIN64-NEXT: # encoding: [0x75,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1
+; WIN64-NEXT: # %bb.1: # %bb1
+; WIN64-NEXT: jmp foo # TAILCALL
+; WIN64-NEXT: # encoding: [0xeb,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1
entry:
%p = icmp eq i32 %x, %y
br i1 %p, label %bb1, label %bb2
tail call void @bar()
ret void
-; CHECK-LABEL: f:
-; CHECK: cmp
-; CHECK: jne bar
; Check that the asm doesn't just look good, but uses the correct encoding.
-; CHECK: encoding: [0x75,A]
-; CHECK: jmp foo
}
define void @f_non_leaf(i32 %x, i32 %y) optsize {
+; CHECK32-LABEL: f_non_leaf:
+; CHECK32: # %bb.0: # %entry
+; CHECK32-NEXT: pushl %ebx # encoding: [0x53]
+; CHECK32-NEXT: .cfi_def_cfa_offset 8
+; CHECK32-NEXT: .cfi_offset %ebx, -8
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
+; CHECK32-NEXT: #APP
+; CHECK32-NEXT: #NO_APP
+; CHECK32-NEXT: cmpl {{[0-9]+}}(%esp), %eax # encoding: [0x3b,0x44,0x24,0x0c]
+; CHECK32-NEXT: jne .LBB1_2 # encoding: [0x75,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1
+; CHECK32-NEXT: # %bb.1: # %bb1
+; CHECK32-NEXT: popl %ebx # encoding: [0x5b]
+; CHECK32-NEXT: .cfi_def_cfa_offset 4
+; CHECK32-NEXT: jmp foo # TAILCALL
+; CHECK32-NEXT: # encoding: [0xeb,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1
+; CHECK32-NEXT: .LBB1_2: # %bb2
+; CHECK32-NEXT: .cfi_def_cfa_offset 8
+; CHECK32-NEXT: popl %ebx # encoding: [0x5b]
+; CHECK32-NEXT: .cfi_def_cfa_offset 4
+; CHECK32-NEXT: jmp bar # TAILCALL
+; CHECK32-NEXT: # encoding: [0xeb,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1
+;
+; CHECK64-LABEL: f_non_leaf:
+; CHECK64: # %bb.0: # %entry
+; CHECK64-NEXT: pushq %rbx # encoding: [0x53]
+; CHECK64-NEXT: .cfi_def_cfa_offset 16
+; CHECK64-NEXT: .cfi_offset %rbx, -16
+; CHECK64-NEXT: #APP
+; CHECK64-NEXT: #NO_APP
+; CHECK64-NEXT: cmpl %esi, %edi # encoding: [0x39,0xf7]
+; CHECK64-NEXT: jne .LBB1_2 # encoding: [0x75,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1
+; CHECK64-NEXT: # %bb.1: # %bb1
+; CHECK64-NEXT: popq %rbx # encoding: [0x5b]
+; CHECK64-NEXT: .cfi_def_cfa_offset 8
+; CHECK64-NEXT: jmp foo # TAILCALL
+; CHECK64-NEXT: # encoding: [0xeb,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1
+; CHECK64-NEXT: .LBB1_2: # %bb2
+; CHECK64-NEXT: .cfi_def_cfa_offset 16
+; CHECK64-NEXT: popq %rbx # encoding: [0x5b]
+; CHECK64-NEXT: .cfi_def_cfa_offset 8
+; CHECK64-NEXT: jmp bar # TAILCALL
+; CHECK64-NEXT: # encoding: [0xeb,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1
+;
+; WIN64-LABEL: f_non_leaf:
+; WIN64: # %bb.0: # %entry
+; WIN64-NEXT: pushq %rbx # encoding: [0x53]
+; WIN64-NEXT: .seh_pushreg 3
+; WIN64-NEXT: .seh_endprologue
+; WIN64-NEXT: #APP
+; WIN64-NEXT: #NO_APP
+; WIN64-NEXT: cmpl %edx, %ecx # encoding: [0x39,0xd1]
+; WIN64-NEXT: jne .LBB1_2 # encoding: [0x75,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1
+; WIN64-NEXT: # %bb.1: # %bb1
+; WIN64-NEXT: popq %rbx # encoding: [0x5b]
+; WIN64-NEXT: jmp foo # TAILCALL
+; WIN64-NEXT: # encoding: [0xeb,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1
+; WIN64-NEXT: .LBB1_2: # %bb2
+; WIN64-NEXT: nop # encoding: [0x90]
+; WIN64-NEXT: popq %rbx # encoding: [0x5b]
+; WIN64-NEXT: jmp bar # TAILCALL
+; WIN64-NEXT: # encoding: [0xeb,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1
+; WIN64-NEXT: .seh_handlerdata
+; WIN64-NEXT: .text
+; WIN64-NEXT: .seh_endproc
entry:
; Force %ebx to be spilled on the stack, turning this into
; not a "leaf" function for Win64.
tail call void @bar()
ret void
-; CHECK-LABEL: f_non_leaf:
-; WIN64-NOT: je foo
-; WIN64-NOT: jne bar
-; WIN64: jne
-; WIN64: jmp foo
-; WIN64: jmp bar
}
declare x86_thiscallcc zeroext i1 @baz(i8*, i32)
define x86_thiscallcc zeroext i1 @BlockPlacementTest(i8* %this, i32 %x) optsize {
+; CHECK32-LABEL: BlockPlacementTest:
+; CHECK32: # %bb.0: # %entry
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04]
+; CHECK32-NEXT: testb $42, %dl # encoding: [0xf6,0xc2,0x2a]
+; CHECK32-NEXT: je .LBB2_1 # encoding: [0x74,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB2_1-1, kind: FK_PCRel_1
+; CHECK32-NEXT: # %bb.2: # %land.rhs
+; CHECK32-NEXT: movb $1, %al # encoding: [0xb0,0x01]
+; CHECK32-NEXT: testb $44, %dl # encoding: [0xf6,0xc2,0x2c]
+; CHECK32-NEXT: je baz # TAILCALL
+; CHECK32-NEXT: # encoding: [0x74,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: baz-1, kind: FK_PCRel_1
+; CHECK32-NEXT: # %bb.3: # %land.end
+; CHECK32-NEXT: # kill: def $al killed $al killed $eax
+; CHECK32-NEXT: retl $4 # encoding: [0xc2,0x04,0x00]
+; CHECK32-NEXT: .LBB2_1:
+; CHECK32-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK32-NEXT: # kill: def $al killed $al killed $eax
+; CHECK32-NEXT: retl $4 # encoding: [0xc2,0x04,0x00]
+;
+; CHECK64-LABEL: BlockPlacementTest:
+; CHECK64: # %bb.0: # %entry
+; CHECK64-NEXT: testb $42, %sil # encoding: [0x40,0xf6,0xc6,0x2a]
+; CHECK64-NEXT: je .LBB2_1 # encoding: [0x74,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB2_1-1, kind: FK_PCRel_1
+; CHECK64-NEXT: # %bb.2: # %land.rhs
+; CHECK64-NEXT: movb $1, %al # encoding: [0xb0,0x01]
+; CHECK64-NEXT: testb $44, %sil # encoding: [0x40,0xf6,0xc6,0x2c]
+; CHECK64-NEXT: je baz # TAILCALL
+; CHECK64-NEXT: # encoding: [0x74,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: baz-1, kind: FK_PCRel_1
+; CHECK64-NEXT: # %bb.3: # %land.end
+; CHECK64-NEXT: # kill: def $al killed $al killed $eax
+; CHECK64-NEXT: retq # encoding: [0xc3]
+; CHECK64-NEXT: .LBB2_1:
+; CHECK64-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK64-NEXT: # kill: def $al killed $al killed $eax
+; CHECK64-NEXT: retq # encoding: [0xc3]
+;
+; WIN64-LABEL: BlockPlacementTest:
+; WIN64: # %bb.0: # %entry
+; WIN64-NEXT: testb $42, %dl # encoding: [0xf6,0xc2,0x2a]
+; WIN64-NEXT: je .LBB2_1 # encoding: [0x74,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: .LBB2_1-1, kind: FK_PCRel_1
+; WIN64-NEXT: # %bb.2: # %land.rhs
+; WIN64-NEXT: movb $1, %al # encoding: [0xb0,0x01]
+; WIN64-NEXT: testb $44, %dl # encoding: [0xf6,0xc2,0x2c]
+; WIN64-NEXT: je baz # TAILCALL
+; WIN64-NEXT: # encoding: [0x74,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: baz-1, kind: FK_PCRel_1
+; WIN64-NEXT: # %bb.3: # %land.end
+; WIN64-NEXT: # kill: def $al killed $al killed $eax
+; WIN64-NEXT: retq # encoding: [0xc3]
+; WIN64-NEXT: .LBB2_1:
+; WIN64-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; WIN64-NEXT: # kill: def $al killed $al killed $eax
+; WIN64-NEXT: retq # encoding: [0xc3]
entry:
%and = and i32 %x, 42
%tobool = icmp eq i32 %and, 0
; Make sure machine block placement isn't confused by the conditional tail call,
; but sees that it can fall through to the next block.
-; CHECK-LABEL: BlockPlacementTest
-; CHECK: je baz
-; CHECK-NOT: xor
-; CHECK: ret
}
declare zeroext i1 @_Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_(i8*, i8*)
define zeroext i1 @pr31257(%"class.std::basic_string"* nocapture readonly dereferenceable(8) %s) minsize {
-; CHECK-LABEL: pr31257
+; CHECK32-LABEL: pr31257:
+; CHECK32: # %bb.0: # %entry
+; CHECK32-NEXT: pushl %ebp # encoding: [0x55]
+; CHECK32-NEXT: .cfi_def_cfa_offset 8
+; CHECK32-NEXT: pushl %ebx # encoding: [0x53]
+; CHECK32-NEXT: .cfi_def_cfa_offset 12
+; CHECK32-NEXT: pushl %edi # encoding: [0x57]
+; CHECK32-NEXT: .cfi_def_cfa_offset 16
+; CHECK32-NEXT: pushl %esi # encoding: [0x56]
+; CHECK32-NEXT: .cfi_def_cfa_offset 20
+; CHECK32-NEXT: subl $12, %esp # encoding: [0x83,0xec,0x0c]
+; CHECK32-NEXT: .cfi_def_cfa_offset 32
+; CHECK32-NEXT: .cfi_offset %esi, -20
+; CHECK32-NEXT: .cfi_offset %edi, -16
+; CHECK32-NEXT: .cfi_offset %ebx, -12
+; CHECK32-NEXT: .cfi_offset %ebp, -8
+; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x20]
+; CHECK32-NEXT: movl (%eax), %eax # encoding: [0x8b,0x00]
+; CHECK32-NEXT: movl -24(%eax), %edx # encoding: [0x8b,0x50,0xe8]
+; CHECK32-NEXT: leal (%eax,%edx), %ebp # encoding: [0x8d,0x2c,0x10]
+; CHECK32-NEXT: xorl %ebx, %ebx # encoding: [0x31,0xdb]
+; CHECK32-NEXT: pushl $2 # encoding: [0x6a,0x02]
+; CHECK32-NEXT: .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT: popl %esi # encoding: [0x5e]
+; CHECK32-NEXT: .cfi_adjust_cfa_offset -4
+; CHECK32-NEXT: xorl %edi, %edi # encoding: [0x31,0xff]
+; CHECK32-NEXT: incl %edi # encoding: [0x47]
+; CHECK32-NEXT: jmp .LBB3_1 # encoding: [0xeb,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1
+; CHECK32-NEXT: .LBB3_2: # %for.body
+; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1
+; CHECK32-NEXT: cmpl $2, %ebx # encoding: [0x83,0xfb,0x02]
+; CHECK32-NEXT: je .LBB3_11 # encoding: [0x74,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1
+; CHECK32-NEXT: # %bb.3: # %for.body
+; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1
+; CHECK32-NEXT: cmpl $1, %ebx # encoding: [0x83,0xfb,0x01]
+; CHECK32-NEXT: je .LBB3_9 # encoding: [0x74,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1
+; CHECK32-NEXT: # %bb.4: # %for.body
+; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1
+; CHECK32-NEXT: testl %ebx, %ebx # encoding: [0x85,0xdb]
+; CHECK32-NEXT: jne .LBB3_10 # encoding: [0x75,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
+; CHECK32-NEXT: # %bb.5: # %sw.bb
+; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1
+; CHECK32-NEXT: movzbl (%eax), %ecx # encoding: [0x0f,0xb6,0x08]
+; CHECK32-NEXT: cmpl $43, %ecx # encoding: [0x83,0xf9,0x2b]
+; CHECK32-NEXT: movl %edi, %ebx # encoding: [0x89,0xfb]
+; CHECK32-NEXT: je .LBB3_10 # encoding: [0x74,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
+; CHECK32-NEXT: # %bb.6: # %sw.bb
+; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1
+; CHECK32-NEXT: cmpb $45, %cl # encoding: [0x80,0xf9,0x2d]
+; CHECK32-NEXT: movl %edi, %ebx # encoding: [0x89,0xfb]
+; CHECK32-NEXT: je .LBB3_10 # encoding: [0x74,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
+; CHECK32-NEXT: jmp .LBB3_7 # encoding: [0xeb,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_7-1, kind: FK_PCRel_1
+; CHECK32-NEXT: .LBB3_11: # %sw.bb22
+; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1
+; CHECK32-NEXT: movzbl (%eax), %ecx # encoding: [0x0f,0xb6,0x08]
+; CHECK32-NEXT: addl $-48, %ecx # encoding: [0x83,0xc1,0xd0]
+; CHECK32-NEXT: cmpl $10, %ecx # encoding: [0x83,0xf9,0x0a]
+; CHECK32-NEXT: movl %esi, %ebx # encoding: [0x89,0xf3]
+; CHECK32-NEXT: jb .LBB3_10 # encoding: [0x72,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
+; CHECK32-NEXT: jmp .LBB3_12 # encoding: [0xeb,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_12-1, kind: FK_PCRel_1
+; CHECK32-NEXT: .LBB3_9: # %sw.bb14
+; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1
+; CHECK32-NEXT: movzbl (%eax), %ecx # encoding: [0x0f,0xb6,0x08]
+; CHECK32-NEXT: .LBB3_7: # %if.else
+; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1
+; CHECK32-NEXT: addl $-48, %ecx # encoding: [0x83,0xc1,0xd0]
+; CHECK32-NEXT: cmpl $10, %ecx # encoding: [0x83,0xf9,0x0a]
+; CHECK32-NEXT: movl %esi, %ebx # encoding: [0x89,0xf3]
+; CHECK32-NEXT: jae .LBB3_8 # encoding: [0x73,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_8-1, kind: FK_PCRel_1
+; CHECK32-NEXT: .LBB3_10: # %for.inc
+; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1
+; CHECK32-NEXT: incl %eax # encoding: [0x40]
+; CHECK32-NEXT: decl %edx # encoding: [0x4a]
+; CHECK32-NEXT: .LBB3_1: # %for.cond
+; CHECK32-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK32-NEXT: testl %edx, %edx # encoding: [0x85,0xd2]
+; CHECK32-NEXT: jne .LBB3_2 # encoding: [0x75,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_2-1, kind: FK_PCRel_1
+; CHECK32-NEXT: # %bb.13:
+; CHECK32-NEXT: cmpl $2, %ebx # encoding: [0x83,0xfb,0x02]
+; CHECK32-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; CHECK32-NEXT: jmp .LBB3_14 # encoding: [0xeb,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_14-1, kind: FK_PCRel_1
+; CHECK32-NEXT: .LBB3_8:
+; CHECK32-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK32-NEXT: .LBB3_14: # %cleanup.thread
+; CHECK32-NEXT: # kill: def $al killed $al killed $eax
+; CHECK32-NEXT: addl $12, %esp # encoding: [0x83,0xc4,0x0c]
+; CHECK32-NEXT: .cfi_def_cfa_offset 20
+; CHECK32-NEXT: .LBB3_15: # %cleanup.thread
+; CHECK32-NEXT: popl %esi # encoding: [0x5e]
+; CHECK32-NEXT: .cfi_def_cfa_offset 16
+; CHECK32-NEXT: popl %edi # encoding: [0x5f]
+; CHECK32-NEXT: .cfi_def_cfa_offset 12
+; CHECK32-NEXT: popl %ebx # encoding: [0x5b]
+; CHECK32-NEXT: .cfi_def_cfa_offset 8
+; CHECK32-NEXT: popl %ebp # encoding: [0x5d]
+; CHECK32-NEXT: .cfi_def_cfa_offset 4
+; CHECK32-NEXT: retl # encoding: [0xc3]
+; CHECK32-NEXT: .LBB3_12: # %if.else28
+; CHECK32-NEXT: .cfi_def_cfa_offset 32
+; CHECK32-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08]
+; CHECK32-NEXT: .cfi_adjust_cfa_offset 8
+; CHECK32-NEXT: pushl %ebp # encoding: [0x55]
+; CHECK32-NEXT: .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT: pushl %eax # encoding: [0x50]
+; CHECK32-NEXT: .cfi_adjust_cfa_offset 4
+; CHECK32-NEXT: calll _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_ # encoding: [0xe8,A,A,A,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_-4, kind: FK_PCRel_4
+; CHECK32-NEXT: addl $28, %esp # encoding: [0x83,0xc4,0x1c]
+; CHECK32-NEXT: .cfi_adjust_cfa_offset -28
+; CHECK32-NEXT: jmp .LBB3_15 # encoding: [0xeb,A]
+; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_15-1, kind: FK_PCRel_1
+;
+; CHECK64-LABEL: pr31257:
+; CHECK64: # %bb.0: # %entry
+; CHECK64-NEXT: movq (%rdi), %rdi # encoding: [0x48,0x8b,0x3f]
+; CHECK64-NEXT: movq -24(%rdi), %rax # encoding: [0x48,0x8b,0x47,0xe8]
+; CHECK64-NEXT: leaq (%rdi,%rax), %rsi # encoding: [0x48,0x8d,0x34,0x07]
+; CHECK64-NEXT: xorl %ecx, %ecx # encoding: [0x31,0xc9]
+; CHECK64-NEXT: pushq $2 # encoding: [0x6a,0x02]
+; CHECK64-NEXT: .cfi_adjust_cfa_offset 8
+; CHECK64-NEXT: popq %r9 # encoding: [0x41,0x59]
+; CHECK64-NEXT: .cfi_adjust_cfa_offset -8
+; CHECK64-NEXT: pushq $1 # encoding: [0x6a,0x01]
+; CHECK64-NEXT: .cfi_adjust_cfa_offset 8
+; CHECK64-NEXT: popq %r8 # encoding: [0x41,0x58]
+; CHECK64-NEXT: .cfi_adjust_cfa_offset -8
+; CHECK64-NEXT: jmp .LBB3_1 # encoding: [0xeb,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1
+; CHECK64-NEXT: .LBB3_2: # %for.body
+; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1
+; CHECK64-NEXT: cmpl $2, %ecx # encoding: [0x83,0xf9,0x02]
+; CHECK64-NEXT: je .LBB3_11 # encoding: [0x74,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1
+; CHECK64-NEXT: # %bb.3: # %for.body
+; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1
+; CHECK64-NEXT: cmpl $1, %ecx # encoding: [0x83,0xf9,0x01]
+; CHECK64-NEXT: je .LBB3_10 # encoding: [0x74,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
+; CHECK64-NEXT: # %bb.4: # %for.body
+; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1
+; CHECK64-NEXT: testl %ecx, %ecx # encoding: [0x85,0xc9]
+; CHECK64-NEXT: jne .LBB3_12 # encoding: [0x75,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_12-1, kind: FK_PCRel_1
+; CHECK64-NEXT: # %bb.5: # %sw.bb
+; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1
+; CHECK64-NEXT: movzbl (%rdi), %edx # encoding: [0x0f,0xb6,0x17]
+; CHECK64-NEXT: cmpl $43, %edx # encoding: [0x83,0xfa,0x2b]
+; CHECK64-NEXT: movl %r8d, %ecx # encoding: [0x44,0x89,0xc1]
+; CHECK64-NEXT: je .LBB3_12 # encoding: [0x74,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_12-1, kind: FK_PCRel_1
+; CHECK64-NEXT: # %bb.6: # %sw.bb
+; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1
+; CHECK64-NEXT: cmpb $45, %dl # encoding: [0x80,0xfa,0x2d]
+; CHECK64-NEXT: movl %r8d, %ecx # encoding: [0x44,0x89,0xc1]
+; CHECK64-NEXT: je .LBB3_12 # encoding: [0x74,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_12-1, kind: FK_PCRel_1
+; CHECK64-NEXT: # %bb.7: # %if.else
+; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1
+; CHECK64-NEXT: addl $-48, %edx # encoding: [0x83,0xc2,0xd0]
+; CHECK64-NEXT: cmpl $10, %edx # encoding: [0x83,0xfa,0x0a]
+; CHECK64-NEXT: jmp .LBB3_8 # encoding: [0xeb,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_8-1, kind: FK_PCRel_1
+; CHECK64-NEXT: .LBB3_10: # %sw.bb14
+; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1
+; CHECK64-NEXT: movzbl (%rdi), %ecx # encoding: [0x0f,0xb6,0x0f]
+; CHECK64-NEXT: addl $-48, %ecx # encoding: [0x83,0xc1,0xd0]
+; CHECK64-NEXT: cmpl $10, %ecx # encoding: [0x83,0xf9,0x0a]
+; CHECK64-NEXT: .LBB3_8: # %if.else
+; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1
+; CHECK64-NEXT: movl %r9d, %ecx # encoding: [0x44,0x89,0xc9]
+; CHECK64-NEXT: jb .LBB3_12 # encoding: [0x72,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_12-1, kind: FK_PCRel_1
+; CHECK64-NEXT: jmp .LBB3_9 # encoding: [0xeb,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1
+; CHECK64-NEXT: .LBB3_11: # %sw.bb22
+; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1
+; CHECK64-NEXT: movzbl (%rdi), %ecx # encoding: [0x0f,0xb6,0x0f]
+; CHECK64-NEXT: addl $-48, %ecx # encoding: [0x83,0xc1,0xd0]
+; CHECK64-NEXT: cmpl $10, %ecx # encoding: [0x83,0xf9,0x0a]
+; CHECK64-NEXT: movl %r9d, %ecx # encoding: [0x44,0x89,0xc9]
+; CHECK64-NEXT: jae _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_ # TAILCALL
+; CHECK64-NEXT: # encoding: [0x73,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_-1, kind: FK_PCRel_1
+; CHECK64-NEXT: .LBB3_12: # %for.inc
+; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1
+; CHECK64-NEXT: incq %rdi # encoding: [0x48,0xff,0xc7]
+; CHECK64-NEXT: decq %rax # encoding: [0x48,0xff,0xc8]
+; CHECK64-NEXT: .LBB3_1: # %for.cond
+; CHECK64-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK64-NEXT: testq %rax, %rax # encoding: [0x48,0x85,0xc0]
+; CHECK64-NEXT: jne .LBB3_2 # encoding: [0x75,A]
+; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_2-1, kind: FK_PCRel_1
+; CHECK64-NEXT: # %bb.13:
+; CHECK64-NEXT: cmpl $2, %ecx # encoding: [0x83,0xf9,0x02]
+; CHECK64-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; CHECK64-NEXT: # kill: def $al killed $al killed $eax
+; CHECK64-NEXT: retq # encoding: [0xc3]
+; CHECK64-NEXT: .LBB3_9:
+; CHECK64-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; CHECK64-NEXT: # kill: def $al killed $al killed $eax
+; CHECK64-NEXT: retq # encoding: [0xc3]
+;
+; WIN64-LABEL: pr31257:
+; WIN64: # %bb.0: # %entry
+; WIN64-NEXT: movq (%rcx), %rcx # encoding: [0x48,0x8b,0x09]
+; WIN64-NEXT: movq -24(%rcx), %r8 # encoding: [0x4c,0x8b,0x41,0xe8]
+; WIN64-NEXT: leaq (%rcx,%r8), %rdx # encoding: [0x4a,0x8d,0x14,0x01]
+; WIN64-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; WIN64-NEXT: jmp .LBB3_1 # encoding: [0xeb,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1
+; WIN64-NEXT: .LBB3_2: # %for.body
+; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1
+; WIN64-NEXT: cmpl $2, %eax # encoding: [0x83,0xf8,0x02]
+; WIN64-NEXT: je .LBB3_10 # encoding: [0x74,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
+; WIN64-NEXT: # %bb.3: # %for.body
+; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1
+; WIN64-NEXT: cmpl $1, %eax # encoding: [0x83,0xf8,0x01]
+; WIN64-NEXT: je .LBB3_9 # encoding: [0x74,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1
+; WIN64-NEXT: # %bb.4: # %for.body
+; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1
+; WIN64-NEXT: testl %eax, %eax # encoding: [0x85,0xc0]
+; WIN64-NEXT: jne .LBB3_11 # encoding: [0x75,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1
+; WIN64-NEXT: # %bb.5: # %sw.bb
+; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1
+; WIN64-NEXT: movzbl (%rcx), %r9d # encoding: [0x44,0x0f,0xb6,0x09]
+; WIN64-NEXT: cmpl $43, %r9d # encoding: [0x41,0x83,0xf9,0x2b]
+; WIN64-NEXT: movl $1, %eax # encoding: [0xb8,0x01,0x00,0x00,0x00]
+; WIN64-NEXT: je .LBB3_11 # encoding: [0x74,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1
+; WIN64-NEXT: # %bb.6: # %sw.bb
+; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1
+; WIN64-NEXT: cmpb $45, %r9b # encoding: [0x41,0x80,0xf9,0x2d]
+; WIN64-NEXT: je .LBB3_11 # encoding: [0x74,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1
+; WIN64-NEXT: jmp .LBB3_7 # encoding: [0xeb,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_7-1, kind: FK_PCRel_1
+; WIN64-NEXT: .LBB3_9: # %sw.bb14
+; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1
+; WIN64-NEXT: movzbl (%rcx), %r9d # encoding: [0x44,0x0f,0xb6,0x09]
+; WIN64-NEXT: .LBB3_7: # %if.else
+; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1
+; WIN64-NEXT: addl $-48, %r9d # encoding: [0x41,0x83,0xc1,0xd0]
+; WIN64-NEXT: movl $2, %eax # encoding: [0xb8,0x02,0x00,0x00,0x00]
+; WIN64-NEXT: cmpl $10, %r9d # encoding: [0x41,0x83,0xf9,0x0a]
+; WIN64-NEXT: jb .LBB3_11 # encoding: [0x72,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1
+; WIN64-NEXT: jmp .LBB3_8 # encoding: [0xeb,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_8-1, kind: FK_PCRel_1
+; WIN64-NEXT: .LBB3_10: # %sw.bb22
+; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1
+; WIN64-NEXT: movzbl (%rcx), %r9d # encoding: [0x44,0x0f,0xb6,0x09]
+; WIN64-NEXT: addl $-48, %r9d # encoding: [0x41,0x83,0xc1,0xd0]
+; WIN64-NEXT: movl $2, %eax # encoding: [0xb8,0x02,0x00,0x00,0x00]
+; WIN64-NEXT: cmpl $10, %r9d # encoding: [0x41,0x83,0xf9,0x0a]
+; WIN64-NEXT: jae _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_ # TAILCALL
+; WIN64-NEXT: # encoding: [0x73,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_-1, kind: FK_PCRel_1
+; WIN64-NEXT: .LBB3_11: # %for.inc
+; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1
+; WIN64-NEXT: incq %rcx # encoding: [0x48,0xff,0xc1]
+; WIN64-NEXT: decq %r8 # encoding: [0x49,0xff,0xc8]
+; WIN64-NEXT: .LBB3_1: # %for.cond
+; WIN64-NEXT: # =>This Inner Loop Header: Depth=1
+; WIN64-NEXT: testq %r8, %r8 # encoding: [0x4d,0x85,0xc0]
+; WIN64-NEXT: jne .LBB3_2 # encoding: [0x75,A]
+; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_2-1, kind: FK_PCRel_1
+; WIN64-NEXT: # %bb.12:
+; WIN64-NEXT: cmpl $2, %eax # encoding: [0x83,0xf8,0x02]
+; WIN64-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; WIN64-NEXT: # kill: def $al killed $al killed $eax
+; WIN64-NEXT: retq # encoding: [0xc3]
+; WIN64-NEXT: .LBB3_8:
+; WIN64-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; WIN64-NEXT: # kill: def $al killed $al killed $eax
+; WIN64-NEXT: retq # encoding: [0xc3]
entry:
%_M_p.i.i = getelementptr inbounds %"class.std::basic_string", %"class.std::basic_string"* %s, i64 0, i32 0, i32 0
%0 = load i8*, i8** %_M_p.i.i, align 8
; Make sure Machine Copy Propagation doesn't delete the mov to %ecx becaue it
; thinks the conditional tail call clobbers it.
-; CHECK64-LABEL: .LBB3_11:
-; CHECK64: movzbl (%rdi), %ecx
-; CHECK64-NEXT: addl $-48, %ecx
-; CHECK64-NEXT: cmpl $10, %ecx
-; CHECK64-NEXT: movl %r9d, %ecx
-; CHECK64-NEXT: jae _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEE
if.else28: ; preds = %sw.bb22
%call34 = tail call zeroext i1 @_Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_(i8* nonnull %it.sroa.0.0, i8* %add.ptr.i56)
-; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -asm-verbose=false -post-RA-scheduler=true | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -post-RA-scheduler=true | FileCheck %s
declare void @bar(i32)
declare void @car(i32)
; BranchFolding should tail-merge the stores since they all precede
; direct branches to the same place.
-; CHECK-LABEL: tail_merge_me:
-; CHECK-NOT: GHJK
-; CHECK: movl $0, GHJK(%rip)
-; CHECK-NEXT: movl $1, HABC(%rip)
-; CHECK-NOT: GHJK
-
define void @tail_merge_me() nounwind {
+; CHECK-LABEL: tail_merge_me:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: callq qux
+; CHECK-NEXT: testb $1, %al
+; CHECK-NEXT: je .LBB0_1
+; CHECK-NEXT: # %bb.6: # %A
+; CHECK-NEXT: xorl %edi, %edi
+; CHECK-NEXT: callq bar
+; CHECK-NEXT: jmp .LBB0_4
+; CHECK-NEXT: .LBB0_1: # %next
+; CHECK-NEXT: callq qux
+; CHECK-NEXT: testb $1, %al
+; CHECK-NEXT: je .LBB0_3
+; CHECK-NEXT: # %bb.2: # %B
+; CHECK-NEXT: movl $1, %edi
+; CHECK-NEXT: callq car
+; CHECK-NEXT: jmp .LBB0_4
+; CHECK-NEXT: .LBB0_3: # %C
+; CHECK-NEXT: movl $2, %edi
+; CHECK-NEXT: callq dar
+; CHECK-NEXT: .LBB0_4: # %M
+; CHECK-NEXT: movl $0, {{.*}}(%rip)
+; CHECK-NEXT: movl $1, {{.*}}(%rip)
+; CHECK-NEXT: callq qux
+; CHECK-NEXT: testb $1, %al
+; CHECK-NEXT: je .LBB0_5
+; CHECK-NEXT: # %bb.7: # %return
+; CHECK-NEXT: movl $1000, %edi # imm = 0x3E8
+; CHECK-NEXT: callq ear
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: retq
+; CHECK-NEXT: .LBB0_5: # %altret
+; CHECK-NEXT: movl $1001, %edi # imm = 0x3E9
+; CHECK-NEXT: callq far
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: retq
entry:
%a = call i1 @qux()
br i1 %a, label %A, label %next
; BranchFolding should tail-duplicate the indirect jump to avoid
; redundant branching.
-; CHECK-LABEL: tail_duplicate_me:
-; CHECK: movl $0, GHJK(%rip)
-; CHECK-NEXT: jmpq *%r
-; CHECK: movl $0, GHJK(%rip)
-; CHECK-NEXT: jmpq *%r
-; CHECK: movl $0, GHJK(%rip)
-; CHECK-NEXT: jmpq *%r
-
define void @tail_duplicate_me() nounwind {
+; CHECK-LABEL: tail_duplicate_me:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: callq qux
+; CHECK-NEXT: movl $.Ltmp0, %edi
+; CHECK-NEXT: movl $.Ltmp1, %esi
+; CHECK-NEXT: movl %eax, %ebx
+; CHECK-NEXT: callq choose
+; CHECK-NEXT: movq %rax, %r14
+; CHECK-NEXT: testb $1, %bl
+; CHECK-NEXT: je .LBB1_1
+; CHECK-NEXT: # %bb.7: # %A
+; CHECK-NEXT: xorl %edi, %edi
+; CHECK-NEXT: callq bar
+; CHECK-NEXT: movl $0, {{.*}}(%rip)
+; CHECK-NEXT: jmpq *%r14
+; CHECK-NEXT: .Ltmp0: # Block address taken
+; CHECK-NEXT: .LBB1_4: # %return
+; CHECK-NEXT: movl $1000, %edi # imm = 0x3E8
+; CHECK-NEXT: callq ear
+; CHECK-NEXT: jmp .LBB1_5
+; CHECK-NEXT: .LBB1_1: # %next
+; CHECK-NEXT: callq qux
+; CHECK-NEXT: testb $1, %al
+; CHECK-NEXT: je .LBB1_3
+; CHECK-NEXT: # %bb.2: # %B
+; CHECK-NEXT: movl $1, %edi
+; CHECK-NEXT: callq car
+; CHECK-NEXT: movl $0, {{.*}}(%rip)
+; CHECK-NEXT: jmpq *%r14
+; CHECK-NEXT: .Ltmp1: # Block address taken
+; CHECK-NEXT: .LBB1_6: # %altret
+; CHECK-NEXT: movl $1001, %edi # imm = 0x3E9
+; CHECK-NEXT: callq far
+; CHECK-NEXT: .LBB1_5: # %return
+; CHECK-NEXT: addq $8, %rsp
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: retq
+; CHECK-NEXT: .LBB1_3: # %C
+; CHECK-NEXT: movl $2, %edi
+; CHECK-NEXT: callq dar
+; CHECK-NEXT: movl $0, {{.*}}(%rip)
+; CHECK-NEXT: jmpq *%r14
entry:
%a = call i1 @qux()
%c = call i8* @choose(i8* blockaddress(@tail_duplicate_me, %return),
; BranchFolding shouldn't try to merge the tails of two blocks
; with only a branch in common, regardless of the fallthrough situation.
-; CHECK-LABEL: dont_merge_oddly:
-; CHECK-NOT: ret
-; CHECK: ucomiss %xmm{{[0-2]}}, %xmm{{[0-2]}}
-; CHECK-NEXT: jbe .LBB2_3
-; CHECK-NEXT: ucomiss %xmm{{[0-2]}}, %xmm{{[0-2]}}
-; CHECK-NEXT: ja .LBB2_4
-; CHECK-NEXT: .LBB2_2:
-; CHECK-NEXT: movb $1, %al
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB2_3:
-; CHECK-NEXT: ucomiss %xmm{{[0-2]}}, %xmm{{[0-2]}}
-; CHECK-NEXT: jbe .LBB2_2
-; CHECK-NEXT: .LBB2_4:
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: ret
-
define i1 @dont_merge_oddly(float* %result) nounwind {
+; CHECK-LABEL: dont_merge_oddly:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: ucomiss %xmm1, %xmm2
+; CHECK-NEXT: jbe .LBB2_3
+; CHECK-NEXT: # %bb.1: # %bb
+; CHECK-NEXT: ucomiss %xmm0, %xmm1
+; CHECK-NEXT: ja .LBB2_4
+; CHECK-NEXT: .LBB2_2: # %bb30
+; CHECK-NEXT: movb $1, %al
+; CHECK-NEXT: retq
+; CHECK-NEXT: .LBB2_3: # %bb21
+; CHECK-NEXT: ucomiss %xmm0, %xmm2
+; CHECK-NEXT: jbe .LBB2_2
+; CHECK-NEXT: .LBB2_4: # %bb26
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: retq
entry:
%tmp4 = getelementptr float, float* %result, i32 2
%tmp5 = load float, float* %tmp4, align 4
; Do any-size tail-merging when two candidate blocks will both require
; an unconditional jump to complete a two-way conditional branch.
-
-; CHECK-LABEL: c_expand_expr_stmt:
;
; This test only works when register allocation happens to use %rax for both
; load addresses.
-;
-; CHE: jmp .LBB3_11
-; CHE-NEXT: .LBB3_9:
-; CHE-NEXT: movq 8(%rax), %rax
-; CHE-NEXT: xorl %edx, %edx
-; CHE-NEXT: movb 16(%rax), %al
-; CHE-NEXT: cmpb $16, %al
-; CHE-NEXT: je .LBB3_11
-; CHE-NEXT: cmpb $23, %al
-; CHE-NEXT: jne .LBB3_14
-; CHE-NEXT: .LBB3_11:
%0 = type { %struct.rtx_def* }
%struct.lang_decl = type opaque
%union.tree_node = type { %struct.tree_decl }
define fastcc void @c_expand_expr_stmt(%union.tree_node* %expr) nounwind {
+; CHECK-LABEL: c_expand_expr_stmt:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: jne .LBB3_17
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: movb 0, %bl
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: jne .LBB3_16
+; CHECK-NEXT: # %bb.2: # %bb.i
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: je .LBB3_16
+; CHECK-NEXT: # %bb.3: # %lvalue_p.exit
+; CHECK-NEXT: movq 0, %rax
+; CHECK-NEXT: movzbl (%rax), %ecx
+; CHECK-NEXT: testl %ecx, %ecx
+; CHECK-NEXT: je .LBB3_12
+; CHECK-NEXT: # %bb.4: # %lvalue_p.exit
+; CHECK-NEXT: cmpl $2, %ecx
+; CHECK-NEXT: jne .LBB3_5
+; CHECK-NEXT: # %bb.6: # %bb.i1
+; CHECK-NEXT: movq 32(%rax), %rax
+; CHECK-NEXT: movzbl 16(%rax), %ecx
+; CHECK-NEXT: testl %ecx, %ecx
+; CHECK-NEXT: je .LBB3_10
+; CHECK-NEXT: # %bb.7: # %bb.i1
+; CHECK-NEXT: cmpl $2, %ecx
+; CHECK-NEXT: jne .LBB3_8
+; CHECK-NEXT: # %bb.9: # %bb.i.i
+; CHECK-NEXT: xorl %edi, %edi
+; CHECK-NEXT: callq lvalue_p
+; CHECK-NEXT: testl %eax, %eax
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: je .LBB3_15
+; CHECK-NEXT: jmp .LBB3_17
+; CHECK-NEXT: .LBB3_16: # %bb1
+; CHECK-NEXT: cmpb $23, %bl
+; CHECK-NEXT: .LBB3_17: # %bb3
+; CHECK-NEXT: .LBB3_12: # %bb2.i3
+; CHECK-NEXT: movq 8(%rax), %rax
+; CHECK-NEXT: movb 16(%rax), %cl
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: cmpb $23, %cl
+; CHECK-NEXT: je .LBB3_14
+; CHECK-NEXT: # %bb.13: # %bb2.i3
+; CHECK-NEXT: cmpb $16, %cl
+; CHECK-NEXT: je .LBB3_14
+; CHECK-NEXT: jmp .LBB3_17
+; CHECK-NEXT: .LBB3_5:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: je .LBB3_15
+; CHECK-NEXT: jmp .LBB3_17
+; CHECK-NEXT: .LBB3_10: # %bb2.i.i2
+; CHECK-NEXT: movq 8(%rax), %rax
+; CHECK-NEXT: movb 16(%rax), %cl
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: cmpb $16, %cl
+; CHECK-NEXT: je .LBB3_14
+; CHECK-NEXT: # %bb.11: # %bb2.i.i2
+; CHECK-NEXT: cmpb $23, %cl
+; CHECK-NEXT: je .LBB3_14
+; CHECK-NEXT: jmp .LBB3_17
+; CHECK-NEXT: .LBB3_8:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: .LBB3_14: # %lvalue_p.exit4
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: jne .LBB3_17
+; CHECK-NEXT: .LBB3_15: # %lvalue_p.exit4
+; CHECK-NEXT: testb %bl, %bl
entry:
%tmp4 = load i8, i8* null, align 8 ; <i8> [#uses=3]
switch i8 %tmp4, label %bb3 [
; instructions are involved. This function should have only
; one ret instruction.
-; CHECK-LABEL: foo:
-; CHECK: callq func
-; CHECK-NEXT: popq
-; CHECK-NEXT: .LBB4_2:
-; CHECK-NEXT: ret
-
define void @foo(i1* %V) nounwind {
+; CHECK-LABEL: foo:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: je .LBB4_2
+; CHECK-NEXT: # %bb.1: # %bb
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: callq func
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: .LBB4_2: # %return
+; CHECK-NEXT: retq
entry:
%t0 = icmp eq i1* %V, null
br i1 %t0, label %return, label %bb
; one - One instruction may be tail-duplicated even with optsize.
-; CHECK-LABEL: one:
-; CHECK: j{{.*}} tail_call_me
-; CHECK: j{{.*}} tail_call_me
-
@XYZ = external global i32
declare void @tail_call_me()
define void @one(i32 %v) nounwind optsize {
+; CHECK-LABEL: one:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: je .LBB5_3
+; CHECK-NEXT: # %bb.1: # %bby
+; CHECK-NEXT: cmpl $16, %edi
+; CHECK-NEXT: je .LBB5_4
+; CHECK-NEXT: # %bb.2: # %bb7
+; CHECK-NEXT: jmp tail_call_me # TAILCALL
+; CHECK-NEXT: .LBB5_3: # %bbx
+; CHECK-NEXT: cmpl $128, %edi
+; CHECK-NEXT: jne tail_call_me # TAILCALL
+; CHECK-NEXT: .LBB5_4: # %return
+; CHECK-NEXT: retq
entry:
%0 = icmp eq i32 %v, 0
br i1 %0, label %bbx, label %bby
; tail instead of one. This is too much to be merged, given
; the optsize attribute.
-; CHECK-LABEL: two:
-; CHECK-NOT: XYZ
-; CHECK: ret
-; CHECK: movl $0, XYZ(%rip)
-; CHECK: movl $1, XYZ(%rip)
-; CHECK-NOT: XYZ
-
define void @two() nounwind optsize {
+; CHECK-LABEL: two:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: je .LBB6_1
+; CHECK-NEXT: # %bb.2: # %return
+; CHECK-NEXT: retq
+; CHECK-NEXT: .LBB6_1: # %bb7
+; CHECK-NEXT: movl $0, {{.*}}(%rip)
+; CHECK-NEXT: movl $1, {{.*}}(%rip)
entry:
%0 = icmp eq i32 undef, 0
br i1 %0, label %bbx, label %bby
; two_minsize - Same as two, but with minsize instead of optsize.
-; CHECK-LABEL: two_minsize:
-; CHECK-NOT: XYZ
-; CHECK: ret
-; CHECK: movl $0, XYZ(%rip)
-; CHECK: movl $1, XYZ(%rip)
-; CHECK-NOT: XYZ
-
define void @two_minsize() nounwind minsize {
+; CHECK-LABEL: two_minsize:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: je .LBB7_1
+; CHECK-NEXT: # %bb.2: # %return
+; CHECK-NEXT: retq
+; CHECK-NEXT: .LBB7_1: # %bb7
+; CHECK-NEXT: movl $0, {{.*}}(%rip)
+; CHECK-NEXT: movl $1, {{.*}}(%rip)
entry:
%0 = icmp eq i32 undef, 0
br i1 %0, label %bbx, label %bby
; two_nosize - Same as two, but without the optsize attribute.
; Now two instructions are enough to be tail-duplicated.
-; CHECK-LABEL: two_nosize:
-; CHECK: movl $0, XYZ(%rip)
-; CHECK: jmp tail_call_me
-; CHECK: movl $0, XYZ(%rip)
-; CHECK: jmp tail_call_me
-
define void @two_nosize() nounwind {
+; CHECK-LABEL: two_nosize:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: jne .LBB8_3
+; CHECK-NEXT: # %bb.1: # %bby
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: jne .LBB8_4
+; CHECK-NEXT: # %bb.2: # %bb7
+; CHECK-NEXT: movl $0, {{.*}}(%rip)
+; CHECK-NEXT: jmp tail_call_me # TAILCALL
+; CHECK-NEXT: .LBB8_3: # %bbx
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: je .LBB8_5
+; CHECK-NEXT: .LBB8_4: # %return
+; CHECK-NEXT: retq
+; CHECK-NEXT: .LBB8_5: # %bb12
+; CHECK-NEXT: movl $0, {{.*}}(%rip)
+; CHECK-NEXT: jmp tail_call_me # TAILCALL
entry:
%0 = icmp eq i32 undef, 0
br i1 %0, label %bbx, label %bby
; Tail-merging should merge the two ret instructions since one side
; can fall-through into the ret and the other side has to branch anyway.
-; CHECK-LABEL: TESTE:
-; CHECK: ret
-; CHECK-NOT: ret
-; CHECK: size TESTE
-
define i64 @TESTE(i64 %parami, i64 %paraml) nounwind readnone {
+; CHECK-LABEL: TESTE:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: movl $1, %eax
+; CHECK-NEXT: cmovgq %rdi, %rax
+; CHECK-NEXT: testq %rsi, %rsi
+; CHECK-NEXT: jle .LBB9_2
+; CHECK-NEXT: # %bb.1: # %bb.nph
+; CHECK-NEXT: imulq %rdi, %rsi
+; CHECK-NEXT: movq %rsi, %rax
+; CHECK-NEXT: .LBB9_2: # %for.end
+; CHECK-NEXT: retq
entry:
%cmp = icmp slt i64 %parami, 1 ; <i1> [#uses=1]
%varx.0 = select i1 %cmp, i64 1, i64 %parami ; <i64> [#uses=1]
; out-of-line after the main return, so we should try to eliminate as many of
; them as possible.
-; CHECK-LABEL: merge_aborts:
-; CHECK-NOT: callq abort
-; CHECK: ret
-; CHECK: callq abort
-; CHECK-NOT: callq abort
-; CHECK: .Lfunc_end{{.*}}:
-
declare void @abort()
define void @merge_aborts() {
+; CHECK-LABEL: merge_aborts:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: callq qux
+; CHECK-NEXT: testb $1, %al
+; CHECK-NEXT: je .LBB10_5
+; CHECK-NEXT: # %bb.1: # %cont1
+; CHECK-NEXT: callq qux
+; CHECK-NEXT: testb $1, %al
+; CHECK-NEXT: je .LBB10_5
+; CHECK-NEXT: # %bb.2: # %cont2
+; CHECK-NEXT: callq qux
+; CHECK-NEXT: testb $1, %al
+; CHECK-NEXT: je .LBB10_5
+; CHECK-NEXT: # %bb.3: # %cont3
+; CHECK-NEXT: callq qux
+; CHECK-NEXT: testb $1, %al
+; CHECK-NEXT: je .LBB10_5
+; CHECK-NEXT: # %bb.4: # %cont4
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+; CHECK-NEXT: .LBB10_5: # %abort1
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: callq abort
entry:
%c1 = call i1 @qux()
br i1 %c1, label %cont1, label %abort1
; Use alternating abort functions so that the blocks we wish to merge are not
; layout successors during branch folding.
-; CHECK-LABEL: merge_alternating_aborts:
-; CHECK-NOT: callq abort
-; CHECK: ret
-; CHECK: callq abort
-; CHECK: callq alt_abort
-; CHECK-NOT: callq abort
-; CHECK-NOT: callq alt_abort
-; CHECK: .Lfunc_end{{.*}}:
-
declare void @alt_abort()
define void @merge_alternating_aborts() {
+; CHECK-LABEL: merge_alternating_aborts:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: callq qux
+; CHECK-NEXT: testb $1, %al
+; CHECK-NEXT: je .LBB11_5
+; CHECK-NEXT: # %bb.1: # %cont1
+; CHECK-NEXT: callq qux
+; CHECK-NEXT: testb $1, %al
+; CHECK-NEXT: je .LBB11_6
+; CHECK-NEXT: # %bb.2: # %cont2
+; CHECK-NEXT: callq qux
+; CHECK-NEXT: testb $1, %al
+; CHECK-NEXT: je .LBB11_5
+; CHECK-NEXT: # %bb.3: # %cont3
+; CHECK-NEXT: callq qux
+; CHECK-NEXT: testb $1, %al
+; CHECK-NEXT: je .LBB11_6
+; CHECK-NEXT: # %bb.4: # %cont4
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+; CHECK-NEXT: .LBB11_5: # %abort1
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: callq abort
+; CHECK-NEXT: .LBB11_6: # %abort2
+; CHECK-NEXT: callq alt_abort
entry:
%c1 = call i1 @qux()
br i1 %c1, label %cont1, label %abort1