}
void TargetARM32::lowerCall(const InstCall *Instr) {
- Operand *CallTarget = Instr->getCallTarget();
+ // Note: Keep original call target. This allows us to call the correct
+ // postamble helper, even if the CallTarget gets modified during lowering.
+ Operand *OrigCallTarget = Instr->getCallTarget();
+ Operand *CallTarget = OrigCallTarget;
if (Instr->isTargetHelperCall()) {
auto TargetHelperPreamble = ARM32HelpersPreamble.find(CallTarget);
if (TargetHelperPreamble != ARM32HelpersPreamble.end()) {
}
}
- // Allow ConstantRelocatable to be left alone as a direct call, but force
- // other constants like ConstantInteger32 to be in a register and make it an
- // indirect call.
- if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
- CallTarget = legalize(CallTarget, Legal_Reg);
- }
+ // Note: To allow far calls, even for constant relocatables, we force
+ // the call target into a register, and make an indirect call.
+ CallTarget = legalizeToReg(CallTarget);
// Copy arguments to be passed in registers to the appropriate registers.
for (auto &FPArg : FPArgs) {
}
if (Instr->isTargetHelperCall()) {
- auto TargetHelpersPostamble = ARM32HelpersPostamble.find(CallTarget);
+ auto TargetHelpersPostamble = ARM32HelpersPostamble.find(OrigCallTarget);
if (TargetHelpersPostamble != ARM32HelpersPostamble.end()) {
(this->*TargetHelpersPostamble->second)(Instr);
}
--- /dev/null
+; Show that we convert direct calls, into indirect calls (to handle far
+; branches).
+
+; NOTE: We use -O2 to get rid of memory stores.
+
+; REQUIRES: allow_dump
+
+; Compile using standalone assembler.
+; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 -allow-extern \
+; RUN: -reg-use r5 | FileCheck %s --check-prefix=ASM
+
+; Show bytes in assembled standalone code.
+; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
+; RUN: --args -O2 -allow-extern -reg-use r5 | FileCheck %s --check-prefix=DIS
+
+; Compile using integrated assembler.
+; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -O2 \
+; RUN: -allow-extern -reg-use r5 | FileCheck %s --check-prefix=IASM
+
+; Show bytes in assembled integrated code.
+; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
+; RUN: --args -O2 -allow-extern -reg-use r5 | FileCheck %s --check-prefix=DIS
+
+declare external void @doSomething()
+
+define internal void @callSomething() {
+; ASM-LABEL:callSomething:
+; DIS-LABEL:{{.+}} <callSomething>:
+; IASM-LABEL:callSomething:
+
+ call void @doSomething();
+
+; ASM: movw r5, #:lower16:doSomething
+; DIS: {{.+}}: e3005000
+; ASM-NOT: movw
+
+; ASM-NEXT: movt r5, #:upper16:doSomething
+; DIS-NEXT: {{.+}}: e3405000
+; ASM-NOT: movt
+
+; ASM-NEXT: blx r5
+; DIS-NEXT: {{.+}}: e12fff35
+; ASM-NOT: blx
+ ret void
+}
; ASM-NEXT: vpush {s22, s23}
; ASM-NEXT: push {lr}
-; DIS: 0: ed2daa01
-; DIS-NEXT: 4: ed2dba02
-; DIS-NEXT: 8: e52de004
+; DIS: {{.+}}: ed2daa01
+; DIS-NEXT: {{.+}}: ed2dba02
+; DIS-NEXT: {{.+}}: e52de004
; IASM-NOT: vpush
; IASM-NOT: push
; ASM-NEXT: vpop {s22, s23}
; ASM-NEXT: vpop {s20}
-; DIS: 40: e49de004
-; DIS-NEXT: 44: ecbdba02
-; DIS-NEXT: 48: ecbdaa01
+; DIS: {{.+}}: e49de004
+; DIS-NEXT: {{.+}}: ecbdba02
+; DIS-NEXT: {{.+}}: ecbdaa01
; IASM-NOT: pop
; IASM-NOT: vpop
; Compile using standalone assembler.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 -allow-extern \
-; RUN: | FileCheck %s --check-prefix=ASM
+; RUN: -reg-use r0,r1,r2,r3,r4,r5 | FileCheck %s --check-prefix=ASM
; Show bytes in assembled standalone code.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
-; RUN: --args -O2 -allow-extern | FileCheck %s --check-prefix=DIS
+; RUN: --args -O2 -allow-extern -reg-use r0,r1,r2,r3,r4,r5 \
+; RUN: | FileCheck %s --check-prefix=DIS
; Compile using integrated assembler.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -O2 \
-; RUN: -allow-extern | FileCheck %s --check-prefix=IASM
+; RUN: -allow-extern -reg-use r0,r1,r2,r3,r4,r5 \
+; RUN: | FileCheck %s --check-prefix=IASM
; Show bytes in assembled integrated code.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
-; RUN: --args -O2 -allow-extern | FileCheck %s --check-prefix=DIS
+; RUN: --args -O2 -allow-extern -reg-use r0,r1,r2,r3,r4,r5 \
+; RUN: | FileCheck %s --check-prefix=DIS
declare external void @DoSomething()
define internal void @SinglePushPop() {
+; ASM-LABEL:SinglePushPop:
+; DIS-LABEL:{{.+}} <SinglePushPop>:
+; IASM-LABEL:SinglePushPop:
+
+; ASM: push {lr}
+; DIS: {{.+}} e52de004
+; IASM-NOT: push
+
call void @DoSomething();
ret void
-}
-; ASM-LABEL:SinglePushPop:
-; ASM-NEXT:.LSinglePushPop$__0:
-; ASM-NEXT: push {lr}
-; ASM-NEXT: sub sp, sp, #12
-; ASM-NEXT: bl DoSomething
-; ASM-NEXT: add sp, sp, #12
-; ASM-NEXT: pop {lr}
-; ASM-NEXT: # lr = def.pseudo
-; ASM-NEXT: bx lr
-
-; DIS-LABEL:00000000 <SinglePushPop>:
-; DIS-NEXT: 0: e52de004
-; DIS-NEXT: 4: e24dd00c
-; DIS-NEXT: 8: ebfffffe
-; DIS-NEXT: c: e28dd00c
-; DIS-NEXT: 10: e49de004
-; DIS-NEXT: 14: e12fff1e
+; ASM: pop {lr}
+; DIS: {{.+}} e49de004
+; IASM-NOT: pop
-; IASM-LABEL:SinglePushPop:
-; IASM-NEXT:.LSinglePushPop$__0:
-; IASM-NEXT: .byte 0x4
-; IASM-NEXT: .byte 0xe0
-; IASM-NEXT: .byte 0x2d
-; IASM-NEXT: .byte 0xe5
-
-; IASM-NEXT: .byte 0xc
-; IASM-NEXT: .byte 0xd0
-; IASM-NEXT: .byte 0x4d
-; IASM-NEXT: .byte 0xe2
-; IASM-NEXT: bl DoSomething @ .word ebfffffe
-; IASM-NEXT: .byte 0xc
-; IASM-NEXT: .byte 0xd0
-; IASM-NEXT: .byte 0x8d
-; IASM-NEXT: .byte 0xe2
-
-; IASM-NEXT: .byte 0x4
-; IASM-NEXT: .byte 0xe0
-; IASM-NEXT: .byte 0x9d
-; IASM-NEXT: .byte 0xe4
-
-; IASM: .byte 0x1e
-; IASM-NEXT: .byte 0xff
-; IASM-NEXT: .byte 0x2f
-; IASM-NEXT: .byte 0xe1
+}
; This test is based on taking advantage of the over-eager -O2
; register allocator that puts V1 and V2 into callee-save registers,
; requires the callee-save registers to be pushed/popped in the
; prolog/epilog.
define internal i32 @MultPushPop(i32 %v1, i32 %v2) {
+; ASM-LABEL:MultPushPop:
+; DIS_LABEL: {{.+}} <MultPushPop>:
+; IASM-LABEL:MultPushPop:
+; ASM: push {r4, r5, lr}
+; DIS: {{.+}}: e92d4030
+
+; IASM-NOT: push
+
+
call void @DoSomething();
%v3 = add i32 %v1, %v2
ret i32 %v3
-}
-; ASM-LABEL:MultPushPop:
-; ASM-NEXT:.LMultPushPop$__0:
-; ASM-NEXT: push {r4, r5, lr}
-; ASM-NEXT: sub sp, sp, #4
-; ASM-NEXT: mov r4, r0
-; ASM-NEXT: mov r5, r1
-; ASM-NEXT: bl DoSomething
-; ASM-NEXT: add r4, r4, r5
-; ASM-NEXT: mov r0, r4
-; ASM-NEXT: add sp, sp, #4
-; ASM-NEXT: pop {r4, r5, lr}
-; ASM-NEXT: # r4 = def.pseudo
-; ASM-NEXT: # r5 = def.pseudo
-; ASM-NEXT: # lr = def.pseudo
-; ASM-NEXT: bx lr
-
-; DIS-LABEL:00000020 <MultPushPop>:
-; DIS-NEXT: 20: e92d4030
-; DIS-NEXT: 24: e24dd004
-; DIS-NEXT: 28: e1a04000
-; DIS-NEXT: 2c: e1a05001
-; DIS-NEXT: 30: ebfffffe
-; DIS-NEXT: 34: e0844005
-; DIS-NEXT: 38: e1a00004
-; DIS-NEXT: 3c: e28dd004
-; DIS-NEXT: 40: e8bd4030
-; DIS-NEXT: 44: e12fff1e
+; ASM: pop {r4, r5, lr}
+; DIS: {{.+}} e8bd4030
+; IASM-NOT: pop
-; IASM-LABEL:MultPushPop:
-; IASM-NEXT:.LMultPushPop$__0:
-; IASM-NEXT: .byte 0x30
-; IASM-NEXT: .byte 0x40
-; IASM-NEXT: .byte 0x2d
-; IASM-NEXT: .byte 0xe9
-
-; IASM-NEXT: .byte 0x4
-; IASM-NEXT: .byte 0xd0
-; IASM-NEXT: .byte 0x4d
-; IASM-NEXT: .byte 0xe2
-
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0x40
-; IASM-NEXT: .byte 0xa0
-; IASM-NEXT: .byte 0xe1
-
-; IASM-NEXT: .byte 0x1
-; IASM-NEXT: .byte 0x50
-; IASM-NEXT: .byte 0xa0
-; IASM-NEXT: .byte 0xe1
-
-; IASM-NEXT: bl DoSomething @ .word ebfffffe
-; IASM-NEXT: .byte 0x5
-; IASM-NEXT: .byte 0x40
-; IASM-NEXT: .byte 0x84
-; IASM-NEXT: .byte 0xe0
-
-; IASM-NEXT: .byte 0x4
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0xa0
-; IASM-NEXT: .byte 0xe1
-
-; IASM-NEXT: .byte 0x4
-; IASM-NEXT: .byte 0xd0
-; IASM-NEXT: .byte 0x8d
-; IASM-NEXT: .byte 0xe2
-
-; IASM-NEXT: .byte 0x30
-; IASM-NEXT: .byte 0x40
-; IASM-NEXT: .byte 0xbd
-; IASM-NEXT: .byte 0xe8
-
-; IASM: .byte 0x1e
-; IASM-NEXT: .byte 0xff
-; IASM-NEXT: .byte 0x2f
-; IASM-NEXT: .byte 0xe1
+}
; REQUIRES: allow_dump, target_ARM32
; RUN: %p2i -i %s --sandbox --filetype=asm --target=arm32 --assemble \
; RUN: --disassemble --args -Om1 -allow-externally-defined-symbols \
-; RUN: -ffunction-sections | FileCheck %s
+; RUN: -ffunction-sections -reg-use r0,r1,r3 | FileCheck %s
declare void @call_target()
declare void @call_target1(i32 %arg0)
define internal void @test_direct_call() {
entry:
call void @call_target()
+ ; bundle aigned.
+
+ call void @call_target()
+ ret void
+}
+
+; CHECK-LABEL:<test_direct_call>:
+; Search for bundle alignment of first call.
+; CHECK: {{[0-9a-f]*}}c: {{.+}} blx
+; CHECK-NEXT: movw [[REG:r[0-9]]], {{.+}} call_target
+; CHECK-NEXT: movt [[REG]], {{.+}} call_target
+; CHECK-NEXT: bic [[REG]], [[REG]], {{.+}} ; 0xc000000f
+; CHECK-NEXT: blx [[REG]]
+; CHECK-NEXT: {{[0-9a-f]*}}0:
+
+; Same as above, but force bundle padding by adding three (branch) instruction
+; before the tested call.
+define internal void @test_direct_call_with_padding_1() {
+entry:
+ call void @call_target()
+ ; bundle aigned.
+
+ br label %next1 ; add 1 inst.
+next1:
+ br label %next2 ; add 1 inst.
+next2:
+ br label %next3 ; add 1 inst.
+next3:
+ call void @call_target()
+ ret void
+}
+; CHECK-LABEL:<test_direct_call_with_padding_1>:
+; Search for bundle alignment of first call.
+; CHECK: {{[0-9a-f]*}}c: {{.+}} blx
+; CHECK-NEXT: b
+; CHECK-NEXT: b
+; CHECK-NEXT: b
+; CHECK-NEXT: movw [[REG:r[0-9]]], {{.+}} call_target
+; CHECK-NEXT: movt [[REG]], {{.+}} call_target
+; CHECK-NEXT: nop
+; CHECK-NEXT: bic [[REG]], [[REG]], {{.+}} ; 0xc000000f
+; CHECK-NEXT: blx r0
+; CHECk-NEXT: {{[0-9a-f]*}}0:
+
+; Same as above, but force bundle padding by adding two (branch) instruction
+; before the tested call.
+define internal void @test_direct_call_with_padding_2() {
+entry:
+ call void @call_target()
+ ; bundle aigned.
+
+ br label %next1 ; add 1 inst.
+next1:
+ br label %next2 ; add 1 inst.
+next2:
+ call void @call_target()
ret void
}
-; CHECK-LABEL: test_direct_call
-; CHECK: sub sp,
-; CHECK-NEXT: bic sp, sp, {{.*}} ; 0xc0000000
-; CHECK: {{[0-9]*}}c: {{.*}} bl {{.*}} call_target
-; CHECK-NEXT: {{[0-9]*}}0:
+
+; CHECK-LABEL:<test_direct_call_with_padding_2>:
+; Search for bundle alignment of first call.
+; CHECK: {{[0-9a-f]*}}c: {{.+}} blx
+; CHECK-NEXT: b
+; CHECK-NEXT: b
+; CHECK-NEXT: movw [[REG:r[0-9]]], {{.+}} call_target
+; CHECK-NEXT: movt [[REG]], {{.+}} call_target
+; CHECK-NEXT: nop
+; CHECK-NEXT: nop
+; CHECK-NEXT: bic [[REG]], [[REG]], {{.+}} ; 0xc000000f
+; CHECK-NEXT: blx r0
+; CHECk-NEXT: {{[0-9a-f]*}}0:
+
+; Same as above, but force bundle padding by adding single (branch) instruction
+; before the tested call.
+define internal void @test_direct_call_with_padding_3() {
+entry:
+ call void @call_target()
+ ; bundle aigned.
+
+ br label %next ; add 1 inst.
+next:
+ call void @call_target()
+ ret void
+}
+
+; CHECK-LABEL:<test_direct_call_with_padding_3>:
+; Search for bundle alignment of first call.
+; CHECK: {{[0-9a-f]*}}c: {{.+}} blx
+; CHECK-NEXT: b
+; CHECK-NEXT: movw [[REG:r[0-9]]], {{.+}} call_target
+; CHECK-NEXT: movt [[REG]], {{.+}} call_target
+; CHECK-NEXT: nop
+; CHECK-NEXT: nop
+; CHECK-NEXT: nop
+; CHECK-NEXT: bic [[REG]], [[REG]], {{.+}} ; 0xc000000f
+; CHECK-NEXT: blx r0
+; CHECk-NEXT: {{[0-9a-f]*}}0:
; An indirect call sequence uses the right mask and register-call sequence.
define internal void @test_indirect_call(i32 %target) {
entry:
%__1 = inttoptr i32 %target to void ()*
- call void %__1()
+ call void @call_target();
+ ; bundle aigned.
+
+ br label %next ; add 1 inst.
+next:
+ call void %__1() ; requires 3 insts.
ret void
}
-; CHECK-LABEL: test_indirect_call
-; CHECK: sub sp,
-; CHECK: bic sp, sp, {{.*}} ; 0xc0000000
-; CHECK-NOT: bic sp, sp, {{.*}} ; 0xc0000000
-; CHECK: ldr [[REG:r[0-9]+]], [sp,
+
+; CHECK-LABEL:<test_indirect_call>:
+; Search for bundle alignment of first call.
+; CHECK: {{[0-9a-f]*}}c: {{.+}} blx
+; CHECK-NEXT: b
+; CHECK-NEXT: ldr
+; CHECK-NEXT: bic [[REG:r[0-3]]], [[REG]], {{.*}} 0xc000000f
+; CHECK-NEXT: blx [[REG]]
+; CHECk-NEXT: {{[0-9]+}}0:
+
+; An indirect call sequence uses the right mask and register-call sequence.
+; Forces bundling before the tested call.
+define internal void @test_indirect_call_with_padding_1(i32 %target) {
+entry:
+ %__1 = inttoptr i32 %target to void ()*
+ call void @call_target();
+ ; bundle aigned.
+ call void %__1() ; requires 3 insts.
+ ret void
+}
+
+; CHECK-LABEL: <test_indirect_call_with_padding_1>:
+; Search for bundle alignment of first call.
+; CHECK: {{[0-9a-f]*}}c: {{.+}} blx
+; CHECK-NEXT: ldr
; CHECK-NEXT: nop
-; CHECK: {{[0-9]+}}8: {{.*}} bic [[REG:r[0-9]+]], [[REG]], {{.*}} 0xc000000f
+; CHECK-NEXT: bic [[REG:r[0-3]]], [[REG]], {{.*}} 0xc000000f
; CHECK-NEXT: blx [[REG]]
; CHECk-NEXT: {{[0-9]+}}0:
-; A return sequences uses the right pop / mask / jmp sequence.
-define internal void @test_ret() {
+; An indirect call sequence uses the right mask and register-call sequence.
+; Forces bundling by adding three (branch) instructions befor the tested call.
+define internal void @test_indirect_call_with_padding_2(i32 %target) {
entry:
+ %__1 = inttoptr i32 %target to void ()*
+ call void @call_target();
+ ; bundle aigned.
+
+ br label %next1 ; add 1 inst.
+next1:
+ br label %next2 ; add 1 inst.
+next2:
+ br label %next3 ; add 1 inst.
+next3:
+ call void %__1() ; requires 3 insts.
ret void
}
-; CHECK-LABEL: test_ret
-; CHECK: 0: {{.*}} bic lr, lr, {{.*}} 0xc000000f
-; CHECK-NEXT: bx lr
-; Bundle lock without padding.
-define internal void @bundle_lock_without_padding() {
+; CHECK-LABEL: <test_indirect_call_with_padding_2>:
+; Search for bundle alignment of first call.
+; CHECK: {{[0-9a-f]*}}c: {{.+}} blx
+; CHECK-NEXT: b
+; CHECK-NEXT: b
+; CHECK-NEXT: b
+; CHECK-NEXT: ldr
+; CHECK-NEXT: nop
+; CHECK-NEXT: nop
+; CHECK-NEXT: bic [[REG:r[0-3]]], [[REG]], {{.*}} 0xc000000f
+; CHECK-NEXT: blx [[REG]]
+; CHECk-NEXT: {{[0-9]+}}0:
+
+; An indirect call sequence uses the right mask and register-call sequence.
+; Forces bundling by adding two (branch) instructions befor the tested call.
+define internal void @test_indirect_call_with_padding_3(i32 %target) {
entry:
- %addr_short = bitcast [2 x i8]* @global_short to i16*
- store i16 0, i16* %addr_short, align 1
+ %__1 = inttoptr i32 %target to void ()*
+ call void @call_target();
+ ; bundle aigned.
+
+ br label %next1 ; add 1 inst
+next1:
+ br label %next2 ; add 1 inst
+next2:
+ call void %__1() ; requires 3 insts.
ret void
}
-; CHECK-LABEL: bundle_lock_without_padding
-; CHECK: 0: {{.*}} movw
-; CHECK-NEXT: movt
-; CHECK-NEXT: mov
+; CHECK-LABEL: <test_indirect_call_with_padding_3>:
+; Search for bundle alignment of first call.
+; CHECK: {{[0-9a-f]*}}c: {{.+}} blx
+; CHECK-NEXT: b
+; CHECK-NEXT: b
+; CHECK-NEXT: ldr
+; CHECK-NEXT: nop
; CHECK-NEXT: nop
-; CHECK-NEXT: bic [[REG:r[0-9]+]], {{.*}} 0xc0000000
-; CHECK-NEXT: strh {{.*}}, {{[[]}}[[REG]]
-; CHECK-NEXT: bic lr, lr, {{.*}} ; 0xc000000f
-; CHECK-NEXT: {{.*}} bx lr
+; CHECK-NEXT: nop
+; CHECK-NEXT: bic [[REG:r[0-3]]], [[REG]], {{.*}} 0xc000000f
+; CHECK-NEXT: blx [[REG]]
+; CHECk-NEXT: {{[0-9]+}}0:
-; Bundle lock with padding.
-define internal void @bundle_lock_with_padding() {
+; A return sequences uses the right pop / mask / jmp sequence.
+define internal void @test_ret() {
entry:
call void @call_target()
- ; bundle boundary
- store i16 0, i16* undef, align 1 ; 3 insts
- store i16 0, i16* undef, align 1 ; 3 insts
- store i16 0, i16* undef, align 1 ; 3 insts
- ; SP adjustment + pop
- ; nop
- ; bundle boundary
+ ; Bundle boundary.
+ br label %next ; add 1 inst.
+next:
+ ret void
+}
+; CHECK-LABEL:<test_ret>:
+; Search for bundle alignment of first call.
+; CHECK: {{[0-9a-f]*}}c: {{.+}} blx
+; CHECK-NEXT: b
+; CHECK-NEXT: add sp, sp
+; CHECK-NEXT: bic sp, sp, {{.+}} ; 0xc0000000
+; CHECK-NEXT: pop {lr}
+; CHECK-NEXT: {{[0-9a-f]*}}0: {{.+}} bic lr, lr, {{.+}} ; 0xc000000f
+; CHECK-NEXT: bx lr
+
+; A return sequence with padding for bundle lock.
+define internal void @test_ret_with_padding() {
+ call void @call_target()
+ ; Bundle boundary.
ret void
}
-; CHECK-LABEL: bundle_lock_with_padding
-; CHECK: 48: {{.*}} pop
+
+; CHECK-LABEL:<test_ret_with_padding>:
+; Search for bundle alignment of first call.
+; CHECK: {{[0-9a-f]*}}c: {{.+}} blx
+; CHECK-NEXT: add sp, sp
+; CHECK-NEXT: bic sp, sp, {{.+}} ; 0xc0000000
+; CHECK-NEXT: pop {lr}
; CHECK-NEXT: nop
-; CHECK-NEXT: bic lr, {{.*}} 0xc000000f
-; CHECK-NEXT: {{.*}} bx lr
+; CHECK-NEXT: {{[0-9a-f]*}}0: {{.+}} bic lr, lr, {{.+}} ; 0xc000000f
+; CHECK-NEXT: bx lr
-; Bundle lock align_to_end without any padding.
-define internal void @bundle_lock_align_to_end_padding_0() {
+; Store without bundle padding.
+define internal void @test_store() {
entry:
call void @call_target()
- ; bundle boundary
- call void @call_target3(i32 1, i32 2, i32 3)
- ; bundle boundary
+ ; Bundle boundary
+ store i16 1, i16* undef, align 1 ; 3 insts + bic.
ret void
}
-; CHECK-LABEL: bundle_lock_align_to_end_padding_0
-; CHECK: c: {{.*}} bl {{.*}} call_target
-; CHECK-NEXT: mov
-; CHECK-NEXT: mov
-; CHECK-NEXT: mov
-; CHECK-NEXT: {{[0-9]+}}c: {{.*}} bl {{.*}} call_target3
-; CHECK-NEXT: add sp
-; CHECK-NEXT: bic sp, {{.*}} 0xc0000000
-; CHECK-NEXT: pop
-; CHECK: {{[0-9]+}}0: {{.*}} bic lr, lr, {{.*}} 0xc000000f
-; CHECK-NEXT: {{.*}} bx lr
-
-; Bundle lock align_to_end with one bunch of padding.
-define internal void @bundle_lock_align_to_end_padding_1() {
+
+; CHECK-LABEL: test_store
+; Search for call at end of bundle.
+; CHECK: {{[0-9a-f]*}}c: {{.+}} blx
+; CHECK-NEXT: mov [[REG:r[0-9]]], #0
+; CHECK-NEXT: mov
+; CHECK-NEXT: bic [[REG]], [[REG]], {{.+}} ; 0xc0000000
+; CHECK-NEXT: strh r{{.+}}[[REG]]
+
+; Store with bundle padding. Force padding by adding a single branch
+; instruction.
+define internal void @test_store_with_padding() {
entry:
call void @call_target()
; bundle boundary
- call void @call_target2(i32 1, i32 2)
- ; bundle boundary
+ br label %next ; add 1 inst.
+next:
+ store i16 0, i16* undef, align 1 ; 3 insts
ret void
}
-; CHECK-LABEL: bundle_lock_align_to_end_padding_1
-; CHECK: {{[0-9]*}}c: {{.*}} bl {{.*}} call_target
-; CHECK-NEXT: mov
-; CHECK-NEXT: mov
+; CHECK-LABEL: test_store_with_padding
+; Search for call at end of bundle.
+; CHECK: {{[0-9a-f]*}}c: {{.+}} blx
+; CHECK-NEXT: b
+; CHECK-NEXT: mov [[REG:r[0-9]]], #0
+; CHECK-NEXT: mov
; CHECK-NEXT: nop
-; CHECK-NEXT: bl {{.*}} call_target2
-; CHECK: {{[0-9]+}}0: {{.*}} bic lr, lr, {{.*}} 0xc000000f
-; CHECK-NEXT: {{.*}} bx lr
+; CHECK-NEXT: bic [[REG]], [[REG]], {{.+}} ; 0xc0000000
+; CHECK-NEXT: strh r{{.+}}[[REG]]
-; Bundle lock align_to_end with two bunches of padding.
-define internal void @bundle_lock_align_to_end_padding_2() {
+
+; Store without bundle padding.
+define internal i32 @test_load() {
entry:
- call void @call_target2(i32 1, i32 2)
- ; bundle boundary
- ret void
+ call void @call_target()
+ ; Bundle boundary
+ %v = load i32, i32* undef, align 1 ; 4 insts, bundling middle 2.
+ ret i32 %v
}
-; CHECK-LABEL: bundle_lock_align_to_end_padding_2
-; CHECK: mov
-; CHECK-NEXT: mov
-; CHECK-NEXT: nop
+
+; CHECK-LABEL: test_load
+; Search for call at end of bundle.
+; CHECK: {{[0-9a-f]*}}c: {{.+}} blx
+; CHECK-NEXT: mov [[REG:r[0-9]]], #0
+; CHECK-NEXT: bic [[REG]], [[REG]], {{.+}} ; 0xc0000000
+; CHECK-NEXT: ldr r{{.+}}[[REG]]
+
+; Store with bundle padding.
+define internal i32 @test_load_with_padding() {
+entry:
+ call void @call_target()
+ ; Bundle boundary
+ br label %next1 ; add 1 inst.
+next1:
+ br label %next2 ; add 1 inst.
+next2:
+ %v = load i32, i32* undef, align 1 ; 4 insts, bundling middle 2.
+ ret i32 %v
+}
+
+; CHECK-LABEL: test_load_with_padding
+; Search for call at end of bundle.
+; CHECK: {{[0-9a-f]*}}c: {{.+}} blx
+; CHECK-NEXT: b
+; CHECK-NEXT: b
+; CHECK-NEXT: mov [[REG:r[0-9]]], #0
; CHECK-NEXT: nop
-; CHECK-NEXT: bl {{.*}} call_target2
+; CHECK-NEXT: bic [[REG]], [[REG]], {{.+}} ; 0xc0000000
+; CHECK-NEXT: ldr r{{.+}}[[REG]]
define internal double @testVpushVpop(double %v1, double %v2) {
; ASM-LABEL: testVpushVpop:
-; DIS-LABEL: 00000000 <testVpushVpop>:
+; DIS-LABEL: {{.+}} <testVpushVpop>:
; ASM: vpush {s18, s19, s20, s21}
-; DIS: 0: ed2d9a04
+; DIS: {{.+}}: ed2d9a04
; IASM-NOT: vpush
call void @foo()
ret double %res
; ASM: vpop {s18, s19, s20, s21}
-; DIS: 28: ecbd9a04
+; DIS: {{.+}}: ecbd9a04
; IASM-NOT: vpopd
}
; RUN: --target x8632 -i %s --args -Om1 -allow-externally-defined-symbols \
; RUN: | %if --need=target_X8632 --command FileCheck --check-prefix=OPTM1 %s
-; TODO(jvoung): Stop skipping unimplemented parts (via --skip-unimplemented)
-; once enough infrastructure is in. Also, switch to --filetype=obj
-; when possible.
; RUN: %if --need=target_ARM32 --need=allow_dump \
-; RUN: --command %p2i --filetype=asm --assemble \
-; RUN: --disassemble --target arm32 -i %s --args -O2 --skip-unimplemented \
+; RUN: --command %p2i --filetype=obj --assemble \
+; RUN: --disassemble --target arm32 -i %s --args -O2 \
; RUN: -allow-externally-defined-symbols \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 --check-prefix ARM32-O2 %s
; RUN: %if --need=target_ARM32 --need=allow_dump \
-; RUN: --command %p2i --filetype=asm --assemble --disassemble --target arm32 \
-; RUN: -i %s --args -Om1 --skip-unimplemented \
+; RUN: --command %p2i --filetype=obj --assemble --disassemble --target arm32 \
+; RUN: -i %s --args -Om1 \
; RUN: -allow-externally-defined-symbols \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 --check-prefix ARM32-OM1 %s
; ARM32-LABEL: pass64BitArg
; ARM32: str {{.*}}, [sp]
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} ignore64BitArgNoInline
+; ARM32: movt [[CALL]], {{.+}} ignore64BitArgNoInline
; ARM32: mov r2, #123
-; ARM32: bl {{.*}} ignore64BitArgNoInline
+; ARM32: blx [[CALL]]
; ARM32: str {{.*}}, [sp]
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} ignore64BitArgNoInline
+; ARM32: movt [[CALL]], {{.+}} ignore64BitArgNoInline
; ARM32: {{mov|ldr}} r0
; ARM32: {{mov|ldr}} r1
; ARM32: mov r2, #123
-; ARM32: bl {{.*}} ignore64BitArgNoInline
+; ARM32: blx [[CALL]]
; ARM32: str {{.*}}, [sp]
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} ignore64BitArgNoInline
+; ARM32: movt [[CALL]], {{.+}} ignore64BitArgNoInline
; ARM32: {{mov|ldr}} r0
; ARM32: {{mov|ldr}} r1
; ARM32: mov r2, #123
-; ARM32: bl {{.*}} ignore64BitArgNoInline
+; ARM32: blx [[CALL]]
; MIPS32-LABEL: pass64BitArg
; ARM32: movt [[REG2]], {{.*}} ; 0x1234
; ARM32: str [[REG1]], [sp, #4]
; ARM32: str [[REG2]], [sp]
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} ignore64BitArgNoInline
+; ARM32: movt [[CALL]], {{.+}} ignore64BitArgNoInline
; ARM32: {{mov|ldr}} r0
; ARM32: {{mov|ldr}} r1
; ARM32: mov r2, #123
-; ARM32: bl {{.*}} ignore64BitArgNoInline
+; ARM32: blx [[CALL]]
+
define internal i32 @pass64BitUndefArg() {
entry:
; ARM32: sub sp
; ARM32: mov {{.*}}, #0
; ARM32: str
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} ignore64BitArgNoInline
+; ARM32: movt [[CALL]], {{.+}} ignore64BitArgNoInline
; ARM32: mov {{.*}}, #123
-; ARM32: bl {{.*}} ignore64BitArgNoInline
+; ARM32: blx [[CALL]]
; MIPS32-LABEL: pass64BitUndefArg
; MIPS32: jr ra
; ARM32-LABEL: div64BitSigned
; ARM32: orrs {{r.*}}, {{r.*}}
; ARM32: bne
-; ARM32: bl {{.*}} __divdi3
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} __divdi3
+; ARM32: movt [[CALL]], {{.+}} __divdi3
+; ARM32: blx [[CALL]]
define internal i64 @div64BitSignedConst(i64 %a) {
entry:
; ARM32-LABEL: div64BitSignedConst
; For a constant, we should be able to optimize-out the divide by zero check.
; ARM32-NOT: orrs
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} __divdi3
+; ARM32: movt [[CALL]], {{.+}} __divdi3
; ARM32: movw {{.*}} ; 0x2ff2
; ARM32: movt {{.*}} ; 0x73ce
; ARM32: movw {{.*}} ; 0xb3a
-; ARM32: bl {{.*}} __divdi3
+; ARM32: blx [[CALL]]
define internal i64 @div64BitUnsigned(i64 %a, i64 %b) {
entry:
; ARM32-LABEL: div64BitUnsigned
; ARM32: orrs {{r.*}}, {{r.*}}
; ARM32: bne
-; ARM32: bl {{.*}} __udivdi3
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} __udivdi3
+; ARM32: movt [[CALL]], {{.+}} __udivdi3
+; ARM32: blx [[CALL]]
define internal i64 @rem64BitSigned(i64 %a, i64 %b) {
entry:
; ARM32-LABEL: rem64BitSigned
; ARM32: orrs {{r.*}}, {{r.*}}
; ARM32: bne
-; ARM32: bl {{.*}} __moddi3
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} __moddi3
+; ARM32: movt [[CALL]], {{.+}} __moddi3
+; ARM32: blx [[CALL]]
define internal i64 @rem64BitUnsigned(i64 %a, i64 %b) {
entry:
; ARM32-LABEL: rem64BitUnsigned
; ARM32: orrs {{r.*}}, {{r.*}}
; ARM32: bne
-; ARM32: bl {{.*}} __umoddi3
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} __umoddi3
+; ARM32: movt [[CALL]], {{.+}} __umoddi3
+; ARM32: blx [[CALL]]
define internal i64 @shl64BitSigned(i64 %a, i64 %b) {
entry:
; ARM32: cmpeq
; ARM32-OM1: tst
; ARM32: bne
-; ARM32: bl {{.*}} <func>
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} func
+; ARM32: movt [[CALL]], {{.+}} func
+; ARM32: blx [[CALL]]
; ARM32: cmp
; ARM32: cmpeq
; ARM32-OM1: tst
; ARM32: bne
-; ARM32: bl {{.*}} <func>
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} func
+; ARM32: movt [[CALL]], {{.+}} func
+; ARM32: blx [[CALL]]
; ARM32: bx
declare void @func()
; ARM32-OM1: tst
; ARM32-OM1: bne
; ARM32-O2: beq
-; ARM32: bl {{.*}} <func>
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} func
+; ARM32: movt [[CALL]], {{.+}} func
+; ARM32: blx [[CALL]]
; ARM32: cmp
; ARM32: cmpeq
; ARM32-OM1: tst
; RUN: | %if --need=target_X8632 --command FileCheck \
; RUN: --check-prefix CHECK-OPTM1 %s
-; TODO(jvoung): Stop skipping unimplemented parts (via --skip-unimplemented)
-; once enough infrastructure is in. Also, switch to --filetype=obj
-; when possible.
; RUN: %if --need=target_ARM32 --need=allow_dump \
-; RUN: --command %p2i --filetype=asm --assemble \
-; RUN: --disassemble --target arm32 -i %s --args -O2 --skip-unimplemented \
+; RUN: --command %p2i --filetype=obj --assemble \
+; RUN: --disassemble --target arm32 -i %s --args -O2 \
; RUN: -allow-externally-defined-symbols \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 --check-prefix=ARM-OPT2 %s
; RUN: %if --need=target_ARM32 --need=allow_dump \
-; RUN: --command %p2i --filetype=asm --assemble \
-; RUN: --disassemble --target arm32 -i %s --args -Om1 --skip-unimplemented \
+; RUN: --command %p2i --filetype=obj --assemble \
+; RUN: --disassemble --target arm32 -i %s --args -Om1 \
; RUN: -allow-externally-defined-symbols \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 --check-prefix=ARM-OPTM1 %s
; ARM32-LABEL: fixed_416_align_16
; ARM32-OPT2: sub sp, sp, #428
; ARM32-OPTM1: sub sp, sp, #416
-; ARM32: bl {{.*}} R_{{.*}} f1
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} f1
+; ARM32: movt [[CALL]], {{.+}} f1
+; ARM32: blx [[CALL]]
define internal void @fixed_416_align_32(i32 %n) {
entry:
; ARM32-OPT2: sub sp, sp, #424
; ARM32-OPTM1: sub sp, sp, #416
; ARM32: bic sp, sp, #31
-; ARM32: bl {{.*}} R_{{.*}} f1
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} f1
+; ARM32: movt [[CALL]], {{.+}} f1
+; ARM32: blx [[CALL]]
; Show that the amount to allocate will be rounded up.
define internal void @fixed_351_align_16(i32 %n) {
; ARM32-LABEL: fixed_351_align_16
; ARM32-OPT2: sub sp, sp, #364
; ARM32-OPTM1: sub sp, sp, #352
-; ARM32: bl {{.*}} R_{{.*}} f1
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} f1
+; ARM32: movt [[CALL]], {{.+}} f1
+; ARM32: blx [[CALL]]
define internal void @fixed_351_align_32(i32 %n) {
entry:
; ARM32-OPT2: sub sp, sp, #360
; ARM32-OPTM1: sub sp, sp, #352
; ARM32: bic sp, sp, #31
-; ARM32: bl {{.*}} R_{{.*}} f1
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} f1
+; ARM32: movt [[CALL]], {{.+}} f1
+; ARM32: blx [[CALL]]
declare void @f1(i32 %ignored)
; ARM32: add r0, r0, #15
; ARM32: bic r0, r0, #15
; ARM32: sub sp, sp, r0
-; ARM32: bl {{.*}} R_{{.*}} f2
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} f2
+; ARM32: movt [[CALL]], {{.+}} f2
+; ARM32: blx [[CALL]]
define internal void @variable_n_align_32(i32 %n) {
entry:
; ARM32: add r0, r0, #31
; ARM32: bic r0, r0, #31
; ARM32: sub sp, sp, r0
-; ARM32: bl {{.*}} R_{{.*}} f2
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} f2
+; ARM32: movt [[CALL]], {{.+}} f2
+; ARM32: blx [[CALL]]
; ARM32: mov sp, fp
; ARM32: pop {fp, lr}
; RUN: --target x8632 -i %s --args -O2 \
; RUN: | %if --need=target_X8632 --command FileCheck %s
-; TODO(jvoung): Stop skipping unimplemented parts (via --skip-unimplemented)
-; once enough infrastructure is in. Also, switch to --filetype=obj
-; when possible.
; RUN: %if --need=target_ARM32 --need=allow_dump \
-; RUN: --command %p2i --filetype=asm --assemble --disassemble --target arm32 \
-; RUN: -i %s --args -O2 --skip-unimplemented \
+; RUN: --command %p2i --filetype=obj --assemble --disassemble --target arm32 \
+; RUN: -i %s --args -O2 \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 --check-prefix ARM-OPT2 %s
; RUN: %if --need=target_ARM32 --need=allow_dump \
-; RUN: --command %p2i --filetype=asm --assemble --disassemble --target arm32 \
-; RUN: -i %s --args -O2 --mattr=hwdiv-arm --skip-unimplemented \
+; RUN: --command %p2i --filetype=obj --assemble --disassemble --target arm32 \
+; RUN: -i %s --args -O2 --mattr=hwdiv-arm \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32HWDIV %s
; RUN: %if --need=target_ARM32 --need=allow_dump \
-; RUN: --command %p2i --filetype=asm --assemble --disassemble --target arm32 \
-; RUN: -i %s --args -Om1 --skip-unimplemented \
+; RUN: --command %p2i --filetype=obj --assemble --disassemble --target arm32 \
+; RUN: -i %s --args -Om1 \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 --check-prefix ARM32-OPTM1 %s
;
+; TODO(kschimpf): Stop skipping unimplemented parts (via --skip-unimplemented)
+; once enough infrastructure is in. Also, switch to --filetype=obj
+; when possible.
; RUN: %if --need=target_MIPS32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble --disassemble --target mips32\
-; RUN: -i %s --args -O2 --skip-unimplemented \
+; RUN: -i %s --args -O2 -skip-unimplemented \
; RUN: | %if --need=target_MIPS32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix MIPS32 %s
; ARM32-LABEL: Sdiv
; ARM32: tst [[DENOM:r.*]], [[DENOM]]
; ARM32: bne
-; ARM32: .word 0xe7fedef0
-; ARM32: {{.*}} bl {{.*}} __divsi3
+; AARM32: .word 0xe7fedef0
+; ARM32: udf #60896 ; 0xede0
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} __divsi3
+; ARM32: movt [[CALL]], {{.+}} __divsi3
+; ARM32: blx [[CALL]]
; ARM32HWDIV-LABEL: Sdiv
; ARM32HWDIV: tst
; ARM32HWDIV: bne
;
; ARM32-LABEL: SdivConst
; ARM32-NOT: tst
-; ARM32: bl {{.*}} __divsi3
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} __divsi3
+; ARM32: movt [[CALL]], {{.+}} __divsi3
+; ARM32: blx [[CALL]]
; ARM32HWDIV-LABEL: SdivConst
; ARM32HWDIV-NOT: tst
; ARM32HWDIV: sdiv
; ARM32-LABEL: Srem
; ARM32: tst [[DENOM:r.*]], [[DENOM]]
; ARM32: bne
-; ARM32: bl {{.*}} __modsi3
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} __modsi3
+; ARM32: movt [[CALL]], {{.+}} __modsi3
+; ARM32: blx [[CALL]]
; ARM32HWDIV-LABEL: Srem
; ARM32HWDIV: tst
; ARM32HWDIV: bne
; ARM32-LABEL: Udiv
; ARM32: tst [[DENOM:r.*]], [[DENOM]]
; ARM32: bne
-; ARM32: bl {{.*}} __udivsi3
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} __udivsi3
+; ARM32: movt [[CALL]], {{.+}} __udivsi3
+; ARM32: blx [[CALL]]
; ARM32HWDIV-LABEL: Udiv
; ARM32HWDIV: tst
; ARM32HWDIV: bne
; ARM32-LABEL: Urem
; ARM32: tst [[DENOM:r.*]], [[DENOM]]
; ARM32: bne
-; ARM32: bl {{.*}} __umodsi3
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} __umodsi3
+; ARM32: movt [[CALL]], {{.+}} __umodsi3
+; ARM32: blx [[CALL]]
; ARM32HWDIV-LABEL: Urem
; ARM32HWDIV: tst
; ARM32HWDIV: bne
; CHECK: jge
; ARM32-LABEL: fold_cmp_br_intervening_insts
; ARM32: push {{[{].*[}]}}
-; ARM32: bl use_value
+; ARM32: movw [[CALL:r[0-9]]], #:lower16:use_value
+; ARM32: movt [[CALL]], #:upper16:use_value
+; ARM32: blx [[CALL]]
; ARM32: cmp {{r[0-9]+}}, {{r[0-9]+}}
; ARM32: bge
; ARM32: mov r0, #1
; CHECK: cmp
; CHECK: cmovl
; ARM32-LABEL: fold_cmp_select_intervening_insts
-; ARM32: bl use_value
+; ARM32: movw [[CALL:r[0-9]]], #:lower16:use_value
+; ARM32: movt [[CALL]], #:upper16:use_value
+; ARM32: blx [[CALL]]
; ARM32: cmp r{{[0-9]+}}, r{{[0-9]+}}
; ARM32: movlt
; ARM32: bx lr
; RUN: --target x8632 -i %s --args -Om1 -allow-externally-defined-symbols \
; RUN: | %if --need=target_X8632 --command FileCheck --check-prefix=OM1 %s
-; TODO(jvoung): Stop skipping unimplemented parts (via --skip-unimplemented)
-; once enough infrastructure is in. Also, switch to --filetype=obj
-; when possible.
; RUN: %if --need=target_ARM32 --need=allow_dump \
-; RUN: --command %p2i --filetype=asm --assemble \
+; RUN: --command %p2i --filetype=obj --assemble \
; RUN: --disassemble --target arm32 -i %s --args -O2 \
; RUN: -allow-externally-defined-symbols \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32O2 %s
; RUN: %if --need=target_ARM32 --need=allow_dump \
-; RUN: --command %p2i --filetype=asm --assemble \
+; RUN: --command %p2i --filetype=obj --assemble \
; RUN: --disassemble --target arm32 -i %s --args -Om1 \
; RUN: -allow-externally-defined-symbols \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; OM1: call
; ARM32O2-LABEL: testUncondToNextBlock
-; ARM32O2: bl {{.*}} dummy
-; ARM32O2-NEXT: bl {{.*}} dummy
+; ARM32O2: movw {{.+}} dummy
+; ARM32O2-NEXT: movt
+; ARM32O2-NEXT: blx
+; ARM32O2-NEXT: movw {{.+}} dummy
+; ARM32O2-NEXT: movt
+; ARM32O2-NEXT: blx
; ARM32OM1-LABEL: testUncondToNextBlock
-; ARM32OM1: bl {{.*}} dummy
+; ARM32OM1: movw {{.+}} dummy
+; ARM32OM1-NEXT: movt
+; ARM32OM1-NEXT: blx
; ARM32OM1-NEXT: b
-; ARM32OM1-NEXT: bl {{.*}} dummy
+; ARM32OM1-NEXT: movw {{.+}} dummy
+; ARM32OM1-NEXT: movt
+; ARM32OM1-NEXT: blx
+
; For a conditional branch with a fallthrough to the next block, the
; fallthrough branch should be removed.
; OM1: ret
; ARM32O2-LABEL: testCondFallthroughToNextBlock
-; ARM32O2: cmp {{.*}}, #123
+; ARM32O2: cmp {{.*}}, #123
; ARM32O2-NEXT: bge
-; ARM32O2-NEXT: bl
-; ARM32O2: bx lr
-; ARM32O2: bl
-; ARM32O2: bx lr
+; ARM32O2-NEXT: movw {{.+}} dummy
+; ARM32O2-NEXT: movt
+; ARM32O2-NEXT: blx
+; ARM32O2: bx lr
+; ARM32O2-NEXT: movw {{.+}} dummy
+; ARM32O2-NEXT: movt
+; ARM32O2-NEXT: blx
+; ARM32O2: bx lr
; ARM32OM1-LABEL: testCondFallthroughToNextBlock
; ARM32OM1: mov {{.*}}, #0
; ARM32OM1: tst {{.*}}, #1
; ARM32OM1: bne
; ARM32OM1: b
-; ARM32OM1: bl
+; ARM32OM1: movw
+; ARM32OM1: movt
+; ARM32OM1: blx
; ARM32OM1: bx lr
-; ARM32OM1: bl
+; ARM32OM1: movw
+; ARM32OM1: movt
+; ARM32OM1: blx
; ARM32OM1: bx lr
; For a conditional branch with the next block as the target and a
; Note that compare and branch folding isn't implemented yet
; (compared to x86-32).
; ARM32O2-LABEL: testCondTargetNextBlock
-; ARM32O2: cmp {{.*}}, #123
+; ARM32O2: cmp {{.*}}, #123
; ARM32O2-NEXT: blt
-; ARM32O2-NEXT: bl
-; ARM32O2: bx lr
-; ARM32O2: bl
-; ARM32O2: bx lr
+; ARM32O2-NEXT: movw
+; ARM32O2-NEXT: movt
+; ARM32O2-NEXT: blx
+; ARM32O2: bx lr
+; ARM32O2-NEXT: movw
+; ARM32O2-NEXT: movt
+; ARM32O2-NEXT: blx
+; ARM32O2: bx lr
; ARM32OM1-LABEL: testCondTargetNextBlock
; ARM32OM1: cmp {{.*}}, #123
; ARM32OM1: tst {{.*}}, #1
; ARM32OM1: bne
; ARM32OM1: b
-; ARM32OM1: bl
+; ARM32OM1: blx
; ARM32OM1: bx lr
-; ARM32OM1: bl
+; ARM32OM1: blx
; ARM32OM1: bx lr
; Unconditional branches to the block after a contracted block should be
; OM1: call
; ARM32O2-LABEL: testUncondToBlockAfterContract
-; ARM32O2: bl {{.*}} dummy
-; ARM32O2-NEXT: bl {{.*}} dummy
+; ARM32O2: movw {{.+}} dummy
+; ARM32O2-NEXT: movt
+; ARM32O2-NEXT: blx
+; ARM32O2-NEXT: movw {{.+}} dummy
+; ARM32O2-NEXT: movt
+; ARM32O2-NEXT: blx
; ARM32OM1-LABEL: testUncondToBlockAfterContract
-; ARM32OM1: bl {{.*}} dummy
+; ARM32OM1: movw {{.+}} dummy
+; ARM32OM1-NEXT: movt
+; ARM32OM1-NEXT: blx
; ARM32OM1-NEXT: b
-; ARM32OM1-NEXT: bl {{.*}} dummy
+; ARM32OM1-NEXT: movw {{.+}} dummy
+; ARM32OM1-NEXT: movt
+; ARM32OM1-NEXT: blx
; CHECK-LABEL: remFloat
; CHECK: call {{.*}} R_{{.*}} fmodf
; ARM32-LABEL: remFloat
-; ARM32: bl {{.*}} fmodf
+; ARM32: movw {{.+}} fmodf
+; ARM32: movt
+; ARM32: blx
define internal double @remDouble(double %a, double %b) {
entry:
; CHECK-LABEL: remDouble
; CHECK: call {{.*}} R_{{.*}} fmod
; ARM32-LABEL: remDouble
-; ARM32: bl {{.*}} fmod
+; ARM32: movw {{.+}} fmod
+; ARM32: movt
+; ARM32: blx
; CHECK-LABEL: floatHarness
call void @float1(float 1.0)
; CHECK-DAG: vmov.f32 s0
-; CHECK: bl {{.*}} float1
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float1
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @float2(float 1.0, float 2.0)
; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vmov.f32 s1
-; CHECK: bl {{.*}} float2
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float2
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @float3(float 1.0, float 2.0, float 3.0)
; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vmov.f32 s1
; CHECK-DAG: vmov.f32 s2
-; CHECK: bl {{.*}} float3
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float3
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @float4(float 1.0, float 2.0, float 3.0, float 4.0)
; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vmov.f32 s1
; CHECK-DAG: vmov.f32 s2
; CHECK-DAG: vmov.f32 s3
-; CHECK: bl {{.*}} float4
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float4
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @float5(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0)
; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vmov.f32 s1
; CHECK-DAG: vmov.f32 s2
; CHECK-DAG: vmov.f32 s3
; CHECK-DAG: vmov.f32 s4
-; CHECK: bl {{.*}} float5
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float5
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @float6(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
float 6.0)
; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vmov.f32 s3
; CHECK-DAG: vmov.f32 s4
; CHECK-DAG: vmov.f32 s5
-; CHECK: bl {{.*}} float6
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float6
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @float7(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
float 6.0, float 7.0)
; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vmov.f32 s4
; CHECK-DAG: vmov.f32 s5
; CHECK-DAG: vmov.f32 s6
-; CHECK: bl {{.*}} float7
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float7
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @float8(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
float 6.0, float 7.0, float 8.0)
; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vmov.f32 s5
; CHECK-DAG: vmov.f32 s6
; CHECK-DAG: vmov.f32 s7
-; CHECK: bl {{.*}} float8
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float8
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @float9(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
float 6.0, float 7.0, float 8.0, float 9.0)
; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vmov.f32 s6
; CHECK-DAG: vmov.f32 s7
; CHECK-DAG: vmov.f32 s8
-; CHECK: bl {{.*}} float9
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float9
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @float10(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
float 6.0, float 7.0, float 8.0, float 9.0, float 10.0)
; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vmov.f32 s7
; CHECK-DAG: vmov.f32 s8
; CHECK-DAG: vmov.f32 s9
-; CHECK: bl {{.*}} float10
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float10
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @float11(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
float 6.0, float 7.0, float 8.0, float 9.0, float 10.0,
float 11.0)
; CHECK-DAG: vmov.f32 s8
; CHECK-DAG: vmov.f32 s9
; CHECK-DAG: vmov.f32 s10
-; CHECK: bl {{.*}} float11
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float11
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @float12(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
float 6.0, float 7.0, float 8.0, float 9.0, float 10.0,
float 11.0, float 12.0)
; CHECK-DAG: vmov.f32 s9
; CHECK-DAG: vmov.f32 s10
; CHECK-DAG: vmov.f32 s11
-; CHECK: bl {{.*}} float12
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float12
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @float13(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
float 6.0, float 7.0, float 8.0, float 9.0, float 10.0,
float 11.0, float 12.0, float 13.0)
; CHECK-DAG: vmov.f32 s10
; CHECK-DAG: vmov.f32 s11
; CHECK-DAG: vmov.f32 s12
-; CHECK: bl {{.*}} float13
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float13
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @float14(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
float 6.0, float 7.0, float 8.0, float 9.0, float 10.0,
float 11.0, float 12.0, float 13.0, float 14.0)
; CHECK-DAG: vmov.f32 s11
; CHECK-DAG: vmov.f32 s12
; CHECK-DAG: vmov.f32 s13
-; CHECK: bl {{.*}} float14
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float14
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @float15(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
float 6.0, float 7.0, float 8.0, float 9.0, float 10.0,
float 11.0, float 12.0, float 13.0, float 14.0,
; CHECK-DAG: vmov.f32 s12
; CHECK-DAG: vmov.f32 s13
; CHECK-DAG: vmov.f32 s14
-; CHECK: bl {{.*}} float15
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float15
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @float16(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
float 6.0, float 7.0, float 8.0, float 9.0, float 10.0,
float 11.0, float 12.0, float 13.0, float 14.0,
; CHECK-DAG: vmov.f32 s13
; CHECK-DAG: vmov.f32 s14
; CHECK-DAG: vmov.f32 s15
-; CHECK: bl {{.*}} float16
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float16
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @float17(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
float 6.0, float 7.0, float 8.0, float 9.0, float 10.0,
float 11.0, float 12.0, float 13.0, float 14.0,
; CHECK-DAG: vmov.f32 s14
; CHECK-DAG: vmov.f32 s15
; CHECK-DAG: vstr s{{.*}}, [sp]
-; CHECK: bl {{.*}} float17
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float17
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @float18(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
float 6.0, float 7.0, float 8.0, float 9.0, float 10.0,
float 11.0, float 12.0, float 13.0, float 14.0,
; CHECK-DAG: vmov.f32 s15
; CHECK-DAG: vstr s{{.*}}, [sp]
; CHECK-DAG: vstr s{{.*}}, [sp, #4]
-; CHECK: bl {{.*}} float18
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} float18
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
ret void
}
; CHECK-LABEL: doubleHarness
call void @double1(double 1.0)
; CHECK-DAG: vmov.f64 d0
-; CHECK: bl {{.*}} double1
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} double1
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @double2(double 1.0, double 2.0)
; CHECK-DAG: vmov.f64 d0
; CHECK-DAG: vmov.f64 d1
-; CHECK: bl {{.*}} double2
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} double2
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @double3(double 1.0, double 2.0, double 3.0)
; CHECK-DAG: vmov.f64 d0
; CHECK-DAG: vmov.f64 d1
; CHECK-DAG: vmov.f64 d2
-; CHECK: bl {{.*}} double3
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} double3
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @double4(double 1.0, double 2.0, double 3.0, double 4.0)
; CHECK-DAG: vmov.f64 d0
; CHECK-DAG: vmov.f64 d1
; CHECK-DAG: vmov.f64 d2
; CHECK-DAG: vmov.f64 d3
-; CHECK: bl {{.*}} double4
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} double4
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @double5(double 1.0, double 2.0, double 3.0, double 4.0,
double 5.0)
; CHECK-DAG: vmov.f64 d0
; CHECK-DAG: vmov.f64 d2
; CHECK-DAG: vmov.f64 d3
; CHECK-DAG: vmov.f64 d4
-; CHECK: bl {{.*}} double5
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} double5
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @double6(double 1.0, double 2.0, double 3.0, double 4.0,
double 5.0, double 6.0)
; CHECK-DAG: vmov.f64 d0
; CHECK-DAG: vmov.f64 d3
; CHECK-DAG: vmov.f64 d4
; CHECK-DAG: vmov.f64 d5
-; CHECK: bl {{.*}} double6
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} double6
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @double7(double 1.0, double 2.0, double 3.0, double 4.0,
double 5.0, double 6.0, double 7.0)
; CHECK-DAG: vmov.f64 d0
; CHECK-DAG: vmov.f64 d4
; CHECK-DAG: vmov.f64 d5
; CHECK-DAG: vmov.f64 d6
-; CHECK: bl {{.*}} double7
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} double7
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @double8(double 1.0, double 2.0, double 3.0, double 4.0,
double 5.0, double 6.0, double 7.0, double 8.0)
; CHECK-DAG: vmov.f64 d0
; CHECK-DAG: vmov.f64 d5
; CHECK-DAG: vmov.f64 d6
; CHECK-DAG: vmov.f64 d7
-; CHECK: bl {{.*}} double8
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} double8
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @double9(double 1.0, double 2.0, double 3.0, double 4.0,
double 5.0, double 6.0, double 7.0, double 8.0,
double 9.0)
; CHECK-DAG: vmov.f64 d5
; CHECK-DAG: vmov.f64 d6
; CHECK-DAG: vmov.f64 d7
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} double9
+; CHECK-DAG: movt [[CALL]]
; CHECK-DAG: vstr d{{.*}}, [sp]
-; CHECK: bl {{.*}} double9
+; CHECK: blx [[CALL]]
call void @double10(double 1.0, double 2.0, double 3.0, double 4.0,
double 5.0, double 6.0, double 7.0, double 8.0,
double 9.0, double 10.0)
; CHECK-DAG: vmov.f64 d7
; CHECK-DAG: vstr d{{.*}}, [sp]
; CHECK-DAG: vstr d{{.*}}, [sp, #8]
-; CHECK: bl {{.*}} double10
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} double10
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
ret void
}
; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vmov.f64 d1
; CHECK-DAG: vmov.f32 s1
-; CHECK: bl {{.*}} testFDF
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} testFDF
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @testFDDF(float 1.0, double 2.0, double 3.0, float 4.0)
; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vmov.f64 d1
; CHECK-DAG: vmov.f64 d2
; CHECK-DAG: vmov.f32 s1
-; CHECK: bl {{.*}} testFDDF
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} testFDDF
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @testFDDDF(float 1.0, double 2.0, double 3.0, double 4.0,
float 5.0)
; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vmov.f64 d2
; CHECK-DAG: vmov.f64 d3
; CHECK-DAG: vmov.f32 s1
-; CHECK: bl {{.*}} testFDDDF
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} testFDDDF
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @testFDDDDF(float 1.0, double 2.0, double 3.0, double 4.0,
double 5.0, float 6.0)
; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vmov.f64 d3
; CHECK-DAG: vmov.f64 d4
; CHECK-DAG: vmov.f32 s1
-; CHECK: bl {{.*}} testFDDDDF
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} testFDDDDF
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @testFDDDDDF(float 1.0, double 2.0, double 3.0, double 4.0,
double 5.0, double 6.0, float 7.0)
; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vmov.f64 d4
; CHECK-DAG: vmov.f64 d5
; CHECK-DAG: vmov.f32 s1
-; CHECK: bl {{.*}} testFDDDDDF
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} testFDDDDDF
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @testFDDDDDDF(float 1.0, double 2.0, double 3.0, double 4.0,
double 5.0, double 6.0, double 7.0, float 8.0)
; CHECK-DAG: vmov.f32 s0
; CHECK-DAG: vmov.f64 d5
; CHECK-DAG: vmov.f64 d6
; CHECK-DAG: vmov.f32 s1
-; CHECK: bl {{.*}} testFDDDDDDF
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} testFDDDDDDF
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @testFDDDDDDDF(float 1.0, double 2.0, double 3.0, double 4.0,
double 5.0, double 6.0, double 7.0, double 8.0,
float 9.0)
; CHECK-DAG: vmov.f64 d6
; CHECK-DAG: vmov.f64 d7
; CHECK-DAG: vmov.f32 s1
-; CHECK: bl {{.*}} testFDDDDDDDF
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} testFDDDDDDDF
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @testFDDDDDDDFD(float 1.0, double 2.0, double 3.0, double 4.0,
double 5.0, double 6.0, double 7.0, double 8.0,
float 9.0, double 10.0)
; CHECK-DAG: vmov.f64 d7
; CHECK-DAG: vstr d{{.*}}, [sp]
; CHECK-DAG: vmov.f32 s1
-; CHECK: bl {{.*}} testFDDDDDDDFD
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} testFDDDDDDDFD
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @testFDDDDDDDDF(float 1.0, double 2.0, double 3.0, double 4.0,
double 5.0, double 6.0, double 7.0, double 8.0,
double 9.0, float 10.0)
; CHECK-DAG: vmov.f64 d7
; CHECK-DAG: vstr d{{.*}}, [sp]
; CHECK-DAG: vstr s{{.*}}, [sp, #8]
-; CHECK: bl {{.*}} testFDDDDDDDDF
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} testFDDDDDDDDF
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @testFDDDDDDDDDF(float 1.0, double 2.0, double 3.0, double 4.0,
double 5.0, double 6.0, double 7.0, double 8.0,
double 9.0, double 10.0, float 11.0)
; CHECK-DAG: vstr d{{.*}}, [sp]
; CHECK-DAG: vstr d{{.*}}, [sp, #8]
; CHECK-DAG: vstr s{{.*}}, [sp, #16]
-; CHECK: bl {{.*}} testFDDDDDDDDDF
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} testFDDDDDDDDDF
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @testFDDDDDDDDFD(float 1.0, double 2.0, double 3.0, double 4.0,
double 5.0, double 6.0, double 7.0, double 8.0,
double 9.0, float 10.0, double 11.0)
; CHECK-DAG: vstr d{{.*}}, [sp]
; CHECK-DAG: vstr s{{.*}}, [sp, #8]
; CHECK-DAG: vstr d{{.*}}, [sp, #16]
-; CHECK: bl {{.*}} testFDDDDDDDDFD
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} testFDDDDDDDDFD
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
call void @testFDDDDDDDDFDF(float 1.0, double 2.0, double 3.0, double 4.0,
double 5.0, double 6.0, double 7.0, double 8.0,
double 9.0, float 10.0, double 11.0, float 12.0)
; CHECK-DAG: vstr s{{.*}}, [sp, #8]
; CHECK-DAG: vstr d{{.*}}, [sp, #16]
; CHECK-DAG: vstr s{{.*}}, [sp, #24]
-; CHECK: bl {{.*}} testFDDDDDDDDFD
+; CHECK-DAG: movw [[CALL:r[0-9]]], {{.+}} testFDDDDDDDDFD
+; CHECK-DAG: movt [[CALL]]
+; CHECK: blx [[CALL]]
ret void
}
; ARM32-OM1: mov [[R0:r[0-9]+]], #0
; ARM32-OM1: moveq [[R0]], #1
; ARM32-O2: bne
-; ARM32: bl func
+; ARM32: movw [[CALL:r[0-9]]], #:lower16:func
+; ARM32: movt [[CALL:r[0-9]]], #:upper16:func
+; ARM32: blx [[CALL]]
; ARM32: vcmp.f64
; ARM32: vmrs
; ARM32-OM1: mov [[R1:r[0-9]+]], #0
; RUN: %if --need=target_ARM32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble --disassemble --target arm32 \
-; RUN: -i %s --args -Om1 --skip-unimplemented --test-stack-extra 4096 \
+; RUN: -i %s --args -Om1 --test-stack-extra 4096 \
; RUN: -allow-externally-defined-symbols \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 %s
; Now skip ahead to where the call in br_1 begins, to check how %t2 is used.
; ARM32: movw ip, #4232
; ARM32-NEXT: add ip, sp, ip
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} dummy
+; ARM32: movt [[CALL]]
; ARM32: ldr r2, [ip, #-4]
-; ARM32: bl {{.*}} dummy
+; ARM32: blx [[CALL]]
; The call clobbers ip, so we need to re-create the base register.
; ARM32: movw ip, #4{{.*}}
; ARM32: b {{[a-f0-9]+}}
-; ARM32: bl {{.*}} dummy
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} dummy
+; ARM32: movt [[CALL]]
+; ARM32: blx [[CALL]]
; Similar, but test a function that uses FP as the base register (originally).
define internal i64 @usesFrameReg(i32 %a, i32 %b, i32 %c, i32 %d) {
; Now skip ahead to where the call in br_1 begins, to check how %t2 is used.
; ARM32: movw ip, #4120
; ARM32-NEXT: sub ip, fp, ip
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} dummy
+; ARM32: movt [[CALL]]
; ARM32: ldr r2, [ip, #-4]
-; ARM32: bl {{.*}} dummy
+; ARM32: blx [[CALL]]
; The call clobbers ip, so we need to re-create the base register.
; ARM32: movw ip, #4{{.*}}
; ARM32: b {{[a-f0-9]+}}
-; ARM32: bl {{.*}} dummy
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} dummy
+; ARM32: movt [[CALL]]
+; ARM32: blx [[CALL]]
; RUN: | %if --need=target_X8632 --command FileCheck --check-prefix OM1 %s
; RUN: %if --need=target_ARM32 --need=allow_dump \
-; RUN: --command %p2i --filetype=asm --assemble --disassemble --target arm32 \
-; RUN: -i %s --args -O2 --skip-unimplemented \
+; RUN: --command %p2i --filetype=obj --assemble --disassemble --target arm32 \
+; RUN: -i %s --args -O2 \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 %s
; OM1-LABEL: test_memcpy
; OM1: call {{.*}} memcpy
; ARM32-LABEL: test_memcpy
-; ARM32: bl {{.*}} memcpy
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} memcpy
+; ARM32: movt [[CALL]]
+; ARM32: blx [[CALL]]
define internal void @test_memcpy_long_const_len(i32 %iptr_dst, i32 %iptr_src) {
entry:
; OM1-LABEL: test_memcpy_long_const_len
; OM1: call {{.*}} memcpy
; ARM32-LABEL: test_memcpy_long_const_len
-; ARM32: bl {{.*}} memcpy
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} memcpy
+; ARM32: movt [[CALL]]
+; ARM32: blx [[CALL]]
define internal void @test_memcpy_very_small_const_len(i32 %iptr_dst,
i32 %iptr_src) {
; OM1-LABEL: test_memcpy_very_small_const_len
; OM1: call {{.*}} memcpy
; ARM32-LABEL: test_memcpy_very_small_const_len
-; ARM32: bl {{.*}} memcpy
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} memcpy
+; ARM32: movt [[CALL]]
+; ARM32: blx [[CALL]]
define internal void @test_memcpy_const_len_3(i32 %iptr_dst, i32 %iptr_src) {
entry:
; OM1-LABEL: test_memcpy_const_len_3
; OM1: call {{.*}} memcpy
; ARM32-LABEL: test_memcpy_const_len_3
-; ARM32: bl {{.*}} memcpy
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} memcpy
+; ARM32: movt [[CALL]]
+; ARM32: blx [[CALL]]
define internal void @test_memcpy_mid_const_len(i32 %iptr_dst, i32 %iptr_src) {
entry:
; OM1-LABEL: test_memcpy_mid_const_len
; OM1: call {{.*}} memcpy
; ARM32-LABEL: test_memcpy_mid_const_len
-; ARM32: bl {{.*}} memcpy
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} memcpy
+; ARM32: movt [[CALL]]
+; ARM32: blx [[CALL]]
define internal void @test_memcpy_mid_const_len_overlap(i32 %iptr_dst,
i32 %iptr_src) {
; OM1-LABEL: test_memcpy_mid_const_len_overlap
; OM1: call {{.*}} memcpy
; ARM32-LABEL: test_memcpy_mid_const_len_overlap
-; ARM32: bl {{.*}} memcpy
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} memcpy
+; ARM32: movt [[CALL]]
+; ARM32: blx [[CALL]]
define internal void @test_memcpy_big_const_len_overlap(i32 %iptr_dst,
i32 %iptr_src) {
; OM1-LABEL: test_memcpy_big_const_len_overlap
; OM1: call {{.*}} memcpy
; ARM32-LABEL: test_memcpy_big_const_len_overlap
-; ARM32: bl {{.*}} memcpy
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} memcpy
+; ARM32: movt [[CALL]]
+; ARM32: blx [[CALL]]
define internal void @test_memcpy_large_const_len(i32 %iptr_dst,
i32 %iptr_src) {
; OM1-LABEL: test_memcpy_large_const_len
; OM1: call {{.*}} memcpy
; ARM32-LABEL: test_memcpy_large_const_len
-; ARM32: bl {{.*}} memcpy
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} memcpy
+; ARM32: movt [[CALL]]
+; ARM32: blx [[CALL]]
define internal void @test_memmove(i32 %iptr_dst, i32 %iptr_src, i32 %len) {
entry:
; OM1-LABEL: test_memmove
; OM1: call {{.*}} memmove
; ARM32-LABEL: test_memmove
-; ARM32: bl {{.*}} memmove
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} memmove
+; ARM32: movt [[CALL]]
+; ARM32: blx [[CALL]]
define internal void @test_memmove_long_const_len(i32 %iptr_dst,
i32 %iptr_src) {
; OM1-LABEL: test_memmove_long_const_len
; OM1: call {{.*}} memmove
; ARM32-LABEL: test_memmove_long_const_len
-; ARM32: bl {{.*}} memmove
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} memmove
+; ARM32: movt [[CALL]]
+; ARM32: blx [[CALL]]
define internal void @test_memmove_very_small_const_len(i32 %iptr_dst,
i32 %iptr_src) {
; OM1-LABEL: test_memmove_very_small_const_len
; OM1: call {{.*}} memmove
; ARM32-LABEL: test_memmove_very_small_const_len
-; ARM32: bl {{.*}} memmove
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} memmove
+; ARM32: movt [[CALL]]
+; ARM32: blx [[CALL]]
define internal void @test_memmove_const_len_3(i32 %iptr_dst, i32 %iptr_src) {
entry:
; OM1-LABEL: test_memmove_const_len_3
; OM1: call {{.*}} memmove
; ARM32-LABEL: test_memmove_const_len_3
-; ARM32: bl {{.*}} memmove
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} memmove
+; ARM32: movt [[CALL]]
+; ARM32: blx [[CALL]]
define internal void @test_memmove_mid_const_len(i32 %iptr_dst, i32 %iptr_src) {
entry:
; OM1-LABEL: test_memmove_mid_const_len
; OM1: call {{.*}} memmove
; ARM32-LABEL: test_memmove_mid_const_len
-; ARM32: bl {{.*}} memmove
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} memmove
+; ARM32: movt [[CALL]]
+; ARM32: blx [[CALL]]
define internal void @test_memmove_mid_const_len_overlap(i32 %iptr_dst,
i32 %iptr_src) {
; OM1-LABEL: test_memmove_mid_const_len_overlap
; OM1: call {{.*}} memmove
; ARM32-LABEL: test_memmove_mid_const_len_overlap
-; ARM32: bl {{.*}} memmove
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} memmove
+; ARM32: movt [[CALL]]
+; ARM32: blx [[CALL]]
define internal void @test_memmove_big_const_len_overlap(i32 %iptr_dst,
i32 %iptr_src) {
; OM1-LABEL: test_memmove_big_const_len_overlap
; OM1: call {{.*}} memmove
; ARM32-LABEL: test_memmove_big_const_len_overlap
-; ARM32: bl {{.*}} memmove
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} memmove
+; ARM32: movt [[CALL]]
+; ARM32: blx [[CALL]]
define internal void @test_memmove_large_const_len(i32 %iptr_dst,
i32 %iptr_src) {
; OM1-LABEL: test_memmove_large_const_len
; OM1: call {{.*}} memmove
; ARM32-LABEL: test_memmove_large_const_len
-; ARM32: bl {{.*}} memmove
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} memmove
+; ARM32: movt [[CALL]]
+; ARM32: blx [[CALL]]
define internal void @test_memset(i32 %iptr_dst, i32 %wide_val, i32 %len) {
entry:
; OM1: call {{.*}} R_{{.*}} memset
; ARM32-LABEL: test_memset
; ARM32: uxtb
-; ARM32: bl {{.*}} memset
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} memset
+; ARM32: movt [[CALL]]
+; ARM32: blx [[CALL]]
define internal void @test_memset_const_len_align(i32 %iptr_dst,
i32 %wide_val) {
; OM1: call {{.*}} R_{{.*}} memset
; ARM32-LABEL: test_memset_const_len_align
; ARM32: uxtb
-; ARM32: bl {{.*}} memset
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} memset
+; ARM32: movt [[CALL]]
+; ARM32: blx [[CALL]]
define internal void @test_memset_long_const_len_zero_val_align(
i32 %iptr_dst) {
; OM1: call {{.*}} R_{{.*}} memset
; ARM32-LABEL: test_memset_long_const_len_zero_val_align
; ARM32: uxtb
-; ARM32: bl {{.*}} memset
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} memset
+; ARM32: movt [[CALL]]
+; ARM32: blx [[CALL]]
define internal void @test_memset_const_val(i32 %iptr_dst, i32 %len) {
entry:
; OM1: call {{.*}} R_{{.*}} memset
; ARM32-LABEL: test_memset_const_val
; ARM32: uxtb
-; ARM32: bl {{.*}} memset
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} memset
+; ARM32: movt [[CALL]]
+; ARM32: blx [[CALL]]
define internal void @test_memset_const_val_len_very_small(i32 %iptr_dst) {
entry:
; OM1: call {{.*}} R_{{.*}} memset
; ARM32-LABEL: test_memset_const_val_len_very_small
; ARM32: uxtb
-; ARM32: bl {{.*}} memset
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} memset
+; ARM32: movt [[CALL]]
+; ARM32: blx [[CALL]]
define internal void @test_memset_const_val_len_3(i32 %iptr_dst) {
entry:
; OM1: call {{.*}} R_{{.*}} memset
; ARM32-LABEL: test_memset_const_val_len_3
; ARM32: uxtb
-; ARM32: bl {{.*}} memset
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} memset
+; ARM32: movt [[CALL]]
+; ARM32: blx [[CALL]]
define internal void @test_memset_const_val_len_mid(i32 %iptr_dst) {
entry:
; OM1: call {{.*}} R_{{.*}} memset
; ARM32-LABEL: test_memset_const_val_len_mid
; ARM32: uxtb
-; ARM32: bl {{.*}} memset
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} memset
+; ARM32: movt [[CALL]]
+; ARM32: blx [[CALL]]
define internal void @test_memset_zero_const_len_small(i32 %iptr_dst) {
entry:
; OM1: call {{.*}} R_{{.*}} memset
; ARM32-LABEL: test_memset_zero_const_len_small
; ARM32: uxtb
-; ARM32: bl {{.*}} memset
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} memset
+; ARM32: movt [[CALL]]
+; ARM32: blx [[CALL]]
define internal void @test_memset_zero_const_len_small_overlap(i32 %iptr_dst) {
entry:
; OM1: call {{.*}} R_{{.*}} memset
; ARM32-LABEL: test_memset_zero_const_len_small_overlap
; ARM32: uxtb
-; ARM32: bl {{.*}} memset
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} memset
+; ARM32: movt [[CALL]]
+; ARM32: blx [[CALL]]
define internal void @test_memset_zero_const_len_big_overlap(i32 %iptr_dst) {
entry:
; OM1: call {{.*}} R_{{.*}} memset
; ARM32-LABEL: test_memset_zero_const_len_big_overlap
; ARM32: uxtb
-; ARM32: bl {{.*}} memset
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} memset
+; ARM32: movt [[CALL]]
+; ARM32: blx [[CALL]]
define internal void @test_memset_zero_const_len_large(i32 %iptr_dst) {
entry:
; OM1: call {{.*}} R_{{.*}} memset
; ARM32-LABEL: test_memset_zero_const_len_large
; ARM32: uxtb
-; ARM32: bl {{.*}} memset
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} memset
+; ARM32: movt [[CALL]]
+; ARM32: blx [[CALL]]
; CHECKO2REM: call {{.*}} R_{{.*}} setjmp
; CHECKO2REM: call {{.*}} R_{{.*}} longjmp
; ARM32-LABEL: test_setjmplongjmp
-; ARM32: bl {{.*}} setjmp
-; ARM32: bl {{.*}} longjmp
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} setjmp
+; ARM32: movt [[CALL]]
+; ARM32: blx [[CALL]]
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} longjmp
+; ARM32: movt [[CALL]]
+; ARM32: blx [[CALL]]
define internal i32 @test_setjmp_unused(i32 %iptr_env, i32 %i_other) {
entry:
; CHECK-LABEL: test_popcount_32
; CHECK: call {{.*}} R_{{.*}} __popcountsi2
; ARM32-LABEL: test_popcount_32
-; ARM32: bl {{.*}} __popcountsi2
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} __popcountsi2
+; ARM32: movt [[CALL]]
+; ARM32: blx [[CALL]]
define internal i64 @test_popcount_64(i64 %x) {
entry:
; the return value just in case.
; CHECK: mov {{.*}},0x0
; ARM32-LABEL: test_popcount_64
-; ARM32: bl {{.*}} __popcountdi2
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} __popcountdi2
+; ARM32: movt [[CALL]]
+; ARM32: blx [[CALL]]
; ARM32: mov {{.*}}, #0
define internal i32 @test_popcount_64_ret_i32(i64 %x) {
; CHECK: ret
; ARM32-LABEL: testSelect
; ARM32: cmp
-; ARM32: bl {{.*}} useInt
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} useInt
+; ARM32: movt [[CALL]]
+; ARM32; blx [[CALL]]
; ARM32-Om1: mov {{.*}}, #20
; ARM32-O2: mov [[REG:r[0-9]+]], #20
; ARM32: tst
; ARM32-Om1: movne {{.*}}, #10
; ARM32-O2: movne [[REG]], #10
-; ARM32: bl {{.*}} useInt
-; ARM32: bl {{.*}} useInt
-; ARM32: bl {{.*}} useInt
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} useInt
+; ARM32: movt [[CALL]]
+; ARM32; blx [[CALL]]
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} useInt
+; ARM32: movt [[CALL]]
+; ARM32; blx [[CALL]]
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} useInt
+; ARM32: movt [[CALL]]
+; ARM32; blx [[CALL]]
; ARM32: bx lr
; Check for valid addressing mode in the cmp instruction when the
; RUN: %if --need=target_ARM32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble \
-; RUN: --disassemble --target arm32 -i %s --args -O2 --skip-unimplemented \
+; RUN: --disassemble --target arm32 -i %s --args -O2 \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 %s
; RUN: %if --need=target_ARM32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble \
-; RUN: --disassemble --target arm32 -i %s --args -Om1 --skip-unimplemented \
+; RUN: --disassemble --target arm32 -i %s --args -Om1 \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 %s
; ARM32-LABEL: divide
; ARM32: tst
; ARM32: .word 0xe7fedef0
-; ARM32: bl {{.*}} __divsi3
+; ARM32: movw [[CALL:r[0-9]]], {{.+}} __divsi3
+; ARM32: movt [[CALL]]
+; ARM32; blx [[CALL]]
; ARM32: bx lr