From b32894764b8608996b18d364252661d9d08c0e78 Mon Sep 17 00:00:00 2001 From: Oliver Stannard Date: Mon, 9 Nov 2015 11:03:18 +0000 Subject: [PATCH] [CodeGen] Always promote f16 if not legal We don't currently have any runtime library functions for operations on f16 values (other than conversions to and from f32 and f64), so we should always promote it to f32, even if that is not a legal type. In that case, the f32 values would be softened to f32 library calls. SoftenFloatRes_FP_EXTEND now needs to check the promoted operand's type, as it may ne a no-op or require a different library call. getCopyFromParts and getCopyToParts now need to cope with a floating-point value stored in a larger integer part, as is the case for any target that needs to store an f16 value in a 32-bit integer register. Differential Revision: http://reviews.llvm.org/D12856 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@252459 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp | 9 + lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 14 + lib/CodeGen/TargetLoweringBase.cpp | 20 +- test/CodeGen/ARM/fp16-promote.ll | 314 +++++++++++------------ 4 files changed, 180 insertions(+), 177 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 56a79612f60..97e88bf84a7 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -418,6 +418,15 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { SoftenFloatResult(Op.getNode(), 0); } + if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) { + Op = GetPromotedFloat(Op); + // If the promotion did the FP_EXTEND to the destination type for us, + // there's nothing left to do here. + if (Op.getValueType() == N->getValueType(0)) { + return BitConvertToInteger(Op); + } + } + RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftenFloat) Op = GetSoftenedFloat(Op); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 2f5e329c95b..60b06b4ab0e 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -198,6 +198,14 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, if (PartEVT == ValueVT) return Val; + if (PartEVT.isInteger() && ValueVT.isFloatingPoint() && + ValueVT.bitsLT(PartEVT)) { + // For an FP value in an integer part, we need to truncate to the right + // width first. + PartEVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); + Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val); + } + if (PartEVT.isInteger() && ValueVT.isInteger()) { if (ValueVT.bitsLT(PartEVT)) { // For a truncate, see if we have any information to @@ -384,6 +392,12 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, assert(NumParts == 1 && "Do not know what to promote to!"); Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val); } else { + if (ValueVT.isFloatingPoint()) { + // FP values need to be bitcast, then extended if they are being put + // into a larger container. + ValueVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); + Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + } assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && ValueVT.isInteger() && "Unknown mismatch!"); diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 24bdef3040e..69eec888745 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -1277,20 +1277,14 @@ void TargetLoweringBase::computeRegisterProperties( ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat); } + // Decide how to handle f16. If the target does not have native f16 support, + // promote it to f32, because there are no f16 library calls (except for + // conversions). if (!isTypeLegal(MVT::f16)) { - // If the target has native f32 support, promote f16 operations to f32. If - // f32 is not supported, generate soft float library calls. - if (isTypeLegal(MVT::f32)) { - NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32]; - RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32]; - TransformToType[MVT::f16] = MVT::f32; - ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat); - } else { - NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::i16]; - RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::i16]; - TransformToType[MVT::f16] = MVT::i16; - ValueTypeActions.setTypeAction(MVT::f16, TypeSoftenFloat); - } + NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32]; + RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32]; + TransformToType[MVT::f16] = MVT::f32; + ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat); } // Loop over all of the vector value types to see which need transformations. diff --git a/test/CodeGen/ARM/fp16-promote.ll b/test/CodeGen/ARM/fp16-promote.ll index 0352e5e2a79..2a2eb8d2b6b 100644 --- a/test/CodeGen/ARM/fp16-promote.ll +++ b/test/CodeGen/ARM/fp16-promote.ll @@ -1,18 +1,18 @@ -; RUN: llc -asm-verbose=false < %s -mattr=+vfp3,+fp16 | FileCheck %s -check-prefix=CHECK-FP16 -check-prefix=CHECK-ALL -; RUN: llc -asm-verbose=false < %s | FileCheck %s -check-prefix=CHECK-LIBCALL -check-prefix=CHECK-ALL +; RUN: llc -asm-verbose=false < %s -mattr=+vfp3,+fp16 | FileCheck %s -check-prefix=CHECK-FP16 --check-prefix=CHECK-VFP -check-prefix=CHECK-ALL +; RUN: llc -asm-verbose=false < %s | FileCheck %s -check-prefix=CHECK-LIBCALL --check-prefix=CHECK-VFP -check-prefix=CHECK-ALL --check-prefix=CHECK-LIBCALL-VFP +; RUN: llc -asm-verbose=false < %s -mattr=-vfp2 | FileCheck %s --check-prefix=CHECK-LIBCALL -check-prefix=CHECK-NOVFP -check-prefix=CHECK-ALL target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32" target triple = "armv7---eabihf" -; CHECK-FP16-LABEL: test_fadd: +; CHECK-ALL-LABEL: test_fadd: ; CHECK-FP16: vcvtb.f32.f16 ; CHECK-FP16: vcvtb.f32.f16 -; CHECK-FP16: vadd.f32 -; CHECK-FP16: vcvtb.f16.f32 -; CHECK-LIBCALL-LABEL: test_fadd: ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL: vadd.f32 +; CHECK-VFP: vadd.f32 +; CHECK-NOVFP: bl __aeabi_fadd +; CHECK-FP16: vcvtb.f16.f32 ; CHECK-LIBCALL: bl __aeabi_f2h define void @test_fadd(half* %p, half* %q) #0 { %a = load half, half* %p, align 2 @@ -22,15 +22,14 @@ define void @test_fadd(half* %p, half* %q) #0 { ret void } -; CHECK-FP16-LABEL: test_fsub: +; CHECK-ALL-LABEL: test_fsub: ; CHECK-FP16: vcvtb.f32.f16 ; CHECK-FP16: vcvtb.f32.f16 -; CHECK-FP16: vsub.f32 -; CHECK-FP16: vcvtb.f16.f32 -; CHECK-LIBCALL-LABEL: test_fsub: ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL: vsub.f32 +; CHECK-VFP: vsub.f32 +; CHECK-NOVFP: bl __aeabi_fsub +; CHECK-FP16: vcvtb.f16.f32 ; CHECK-LIBCALL: bl __aeabi_f2h define void @test_fsub(half* %p, half* %q) #0 { %a = load half, half* %p, align 2 @@ -40,15 +39,14 @@ define void @test_fsub(half* %p, half* %q) #0 { ret void } -; CHECK-FP16-LABEL: test_fmul: +; CHECK-ALL-LABEL: test_fmul: ; CHECK-FP16: vcvtb.f32.f16 ; CHECK-FP16: vcvtb.f32.f16 -; CHECK-FP16: vmul.f32 -; CHECK-FP16: vcvtb.f16.f32 -; CHECK-LIBCALL-LABEL: test_fmul ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL: vmul.f32 +; CHECK-VFP: vmul.f32 +; CHECK-NOVFP: bl __aeabi_fmul +; CHECK-FP16: vcvtb.f16.f32 ; CHECK-LIBCALL: bl __aeabi_f2h define void @test_fmul(half* %p, half* %q) #0 { %a = load half, half* %p, align 2 @@ -58,15 +56,14 @@ define void @test_fmul(half* %p, half* %q) #0 { ret void } -; CHECK-FP16-LABEL: test_fdiv: +; CHECK-ALL-LABEL: test_fdiv: ; CHECK-FP16: vcvtb.f32.f16 ; CHECK-FP16: vcvtb.f32.f16 -; CHECK-FP16: vdiv.f32 -; CHECK-FP16: vcvtb.f16.f32 -; CHECK-LIBCALL-LABEL: test_fdiv ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL: vdiv.f32 +; CHECK-VFP: vdiv.f32 +; CHECK-NOVFP: bl __aeabi_fdiv +; CHECK-FP16: vcvtb.f16.f32 ; CHECK-LIBCALL: bl __aeabi_f2h define void @test_fdiv(half* %p, half* %q) #0 { %a = load half, half* %p, align 2 @@ -76,15 +73,13 @@ define void @test_fdiv(half* %p, half* %q) #0 { ret void } -; CHECK-FP16-LABEL: test_frem: +; CHECK-ALL-LABEL: test_frem: ; CHECK-FP16: vcvtb.f32.f16 ; CHECK-FP16: vcvtb.f32.f16 -; CHECK-FP16: bl fmodf -; CHECK-FP16: vcvtb.f16.f32 -; CHECK-LIBCALL-LABEL: test_frem ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL: bl fmodf +; CHECK-FP16: vcvtb.f16.f32 ; CHECK-LIBCALL: bl __aeabi_f2h define void @test_frem(half* %p, half* %q) #0 { %a = load half, half* %p, align 2 @@ -96,9 +91,8 @@ define void @test_frem(half* %p, half* %q) #0 { ; CHECK-ALL-LABEL: test_load_store: ; CHECK-ALL-NEXT: .fnstart -; CHECK-ALL-NEXT: ldrh r0, [r0] -; CHECK-ALL-NEXT: strh r0, [r1] -; CHECK-ALL-NEXT: bx lr +; CHECK-ALL: ldrh {{r[0-9]+}}, [{{r[0-9]+}}] +; CHECK-ALL: strh {{r[0-9]+}}, [{{r[0-9]+}}] define void @test_load_store(half* %p, half* %q) #0 { %a = load half, half* %p, align 2 store half %a, half* %q @@ -125,9 +119,12 @@ define half @test_call(half %a, half %b) #0 { ; CHECK-ALL-NEXT: .fnstart ; CHECK-ALL-NEXT: .save {r11, lr} ; CHECK-ALL-NEXT: push {r11, lr} -; CHECK-ALL-NEXT: vmov.f32 s2, s0 -; CHECK-ALL-NEXT: vmov.f32 s0, s1 -; CHECK-ALL-NEXT: vmov.f32 s1, s2 +; CHECK-VFP-NEXT: vmov.f32 s2, s0 +; CHECK-VFP-NEXT: vmov.f32 s0, s1 +; CHECK-VFP-NEXT: vmov.f32 s1, s2 +; CHECK-NOVFP-NEXT: mov r2, r0 +; CHECK-NOVFP-NEXT: mov r0, r1 +; CHECK-NOVFP-NEXT: mov r1, r2 ; CHECK-ALL-NEXT: bl test_callee ; CHECK-ALL-NEXT: pop {r11, pc} define half @test_call_flipped(half %a, half %b) #0 { @@ -137,9 +134,12 @@ define half @test_call_flipped(half %a, half %b) #0 { ; CHECK-ALL-LABEL: test_tailcall_flipped: ; CHECK-ALL-NEXT: .fnstart -; CHECK-ALL-NEXT: vmov.f32 s2, s0 -; CHECK-ALL-NEXT: vmov.f32 s0, s1 -; CHECK-ALL-NEXT: vmov.f32 s1, s2 +; CHECK-VFP-NEXT: vmov.f32 s2, s0 +; CHECK-VFP-NEXT: vmov.f32 s0, s1 +; CHECK-VFP-NEXT: vmov.f32 s1, s2 +; CHECK-NOVFP-NEXT: mov r2, r0 +; CHECK-NOVFP-NEXT: mov r0, r1 +; CHECK-NOVFP-NEXT: mov r1, r2 ; CHECK-ALL-NEXT: b test_callee define half @test_tailcall_flipped(half %a, half %b) #0 { %r = tail call half @test_callee(half %b, half %a) @@ -149,12 +149,10 @@ define half @test_tailcall_flipped(half %a, half %b) #0 { ; Optimizer picks %p or %q based on %c and only loads that value ; No conversion is needed ; CHECK-ALL-LABEL: test_select: -; CHECK-ALL-NEXT: .fnstart -; CHECK-ALL-NEXT: cmp r2, #0 -; CHECK-ALL-NEXT: movne r1, r0 -; CHECK-ALL-NEXT: ldrh r1, [r1] -; CHECK-ALL-NEXT: strh r1, [r0] -; CHECK-ALL-NEXT: bx lr +; CHECK-ALL: cmp {{r[0-9]+}}, #0 +; CHECK-ALL: movne {{r[0-9]+}}, {{r[0-9]+}} +; CHECK-ALL: ldrh {{r[0-9]+}}, [{{r[0-9]+}}] +; CHECK-ALL: strh {{r[0-9]+}}, [{{r[0-9]+}}] define void @test_select(half* %p, half* %q, i1 zeroext %c) #0 { %a = load half, half* %p, align 2 %b = load half, half* %q, align 2 @@ -165,17 +163,15 @@ define void @test_select(half* %p, half* %q, i1 zeroext %c) #0 { ; Test only two variants of fcmp. These get translated to f32 vcmpe ; instructions anyway. -; CHECK-FP16-LABEL: test_fcmp_une: +; CHECK-ALL-LABEL: test_fcmp_une: ; CHECK-FP16: vcvtb.f32.f16 ; CHECK-FP16: vcvtb.f32.f16 -; CHECK-FP16: vcmpe.f32 -; CHECK-FP16: vmrs APSR_nzcv, fpscr -; CHECK-FP16: movwne -; CHECK-LIBCALL-LABEL: test_fcmp_une: ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL: vcmpe.f32 -; CHECK-LIBCALL: movwne +; CHECK-VFP: vcmpe.f32 +; CHECK-NOVFP: bl __aeabi_fcmpeq +; CHECK-FP16: vmrs APSR_nzcv, fpscr +; CHECK-ALL: movw{{ne|eq}} define i1 @test_fcmp_une(half* %p, half* %q) #0 { %a = load half, half* %p, align 2 %b = load half, half* %q, align 2 @@ -183,18 +179,15 @@ define i1 @test_fcmp_une(half* %p, half* %q) #0 { ret i1 %r } -; CHECK-FP16-LABEL: test_fcmp_ueq: +; CHECK-ALL-LABEL: test_fcmp_ueq: ; CHECK-FP16: vcvtb.f32.f16 ; CHECK-FP16: vcvtb.f32.f16 -; CHECK-FP16: vcmpe.f32 -; CHECK-FP16: vmrs APSR_nzcv, fpscr -; CHECK-FP16: movweq -; CHECK-FP16: movwvs -; CHECK-LIBCALL-LABEL: test_fcmp_ueq: ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL: vcmpe.f32 -; CHECK-LIBCALL: movweq +; CHECK-VFP: vcmpe.f32 +; CHECK-NOVFP: bl __aeabi_fcmpeq +; CHECK-FP16: vmrs APSR_nzcv, fpscr +; CHECK-LIBCALL: movw{{ne|eq}} define i1 @test_fcmp_ueq(half* %p, half* %q) #0 { %a = load half, half* %p, align 2 %b = load half, half* %q, align 2 @@ -202,19 +195,18 @@ define i1 @test_fcmp_ueq(half* %p, half* %q) #0 { ret i1 %r } -; CHECK-FP16-LABEL: test_br_cc: +; CHECK-ALL-LABEL: test_br_cc: ; CHECK-FP16: vcvtb.f32.f16 ; CHECK-FP16: vcvtb.f32.f16 -; CHECK-FP16: vcmpe.f32 -; CHECK-FP16: vmrs APSR_nzcv, fpscr -; CHECK-FP16: strmi -; CHECK-FP16: strpl -; CHECK-LIBCALL-LABEL: test_br_cc: ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL: vcmpe.f32 -; CHECK-LIBCALL: strmi -; CHECK-LIBCALL: strpl +; CHECK-VFP: vcmpe.f32 +; CHECK-NOVFP: bl __aeabi_fcmplt +; CHECK-FP16: vmrs APSR_nzcv, fpscr +; CHECK-VFP: strmi +; CHECK-VFP: strpl +; CHECK-NOVFP: strne +; CHECK-NOVFP: streq define void @test_br_cc(half* %p, half* %q, i32* %p1, i32* %p2) #0 { %a = load half, half* %p, align 2 %b = load half, half* %q, align 2 @@ -229,20 +221,19 @@ else: } declare i1 @test_dummy(half* %p) #0 -; CHECK-FP16-LABEL: test_phi: +; CHECK-ALL-LABEL: test_phi: ; CHECK-FP16: vcvtb.f32.f16 ; CHECK-FP16: [[LOOP:.LBB[1-9_]+]]: ; CHECK-FP16: vcvtb.f32.f16 ; CHECK-FP16: bl test_dummy ; CHECK-FP16: bne [[LOOP]] ; CHECK-FP16: vcvtb.f16.f32 -; CHECK-LIBCALL-LABEL: test_phi: -; CHECK-LIBCALL: bl __aeabi_h2f +; CHECK-LIBCALL-VFP: bl __aeabi_h2f ; CHECK-LIBCALL: [[LOOP:.LBB[1-9_]+]]: -; CHECK-LIBCALL: bl __aeabi_h2f +; CHECK-LIBCALL-VFP: bl __aeabi_h2f ; CHECK-LIBCALL: bl test_dummy ; CHECK-LIBCALL: bne [[LOOP]] -; CHECK-LIBCALL: bl __aeabi_f2h +; CHECK-LIBCALL-VFP: bl __aeabi_f2h define void @test_phi(half* %p) #0 { entry: %a = load half, half* %p @@ -257,59 +248,52 @@ return: ret void } -; CHECK-FP16-LABEL: test_fptosi_i32: +; CHECK-ALL-LABEL: test_fptosi_i32: ; CHECK-FP16: vcvtb.f32.f16 -; CHECK-FP16: vcvt.s32.f32 -; CHECK-LIBCALL-LABEL: test_fptosi_i32: ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL: vcvt.s32.f32 +; CHECK-VFP: vcvt.s32.f32 +; CHECK-NOVFP: bl __aeabi_f2iz define i32 @test_fptosi_i32(half* %p) #0 { %a = load half, half* %p, align 2 %r = fptosi half %a to i32 ret i32 %r } -; CHECK-FP16-LABEL: test_fptosi_i64: +; CHECK-ALL-LABEL: test_fptosi_i64: ; CHECK-FP16: vcvtb.f32.f16 -; CHECK-FP16: bl __aeabi_f2lz -; CHECK-LIBCALL-LABEL: test_fptosi_i64: ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL: bl __aeabi_f2lz +; CHECK-ALL: bl __aeabi_f2lz define i64 @test_fptosi_i64(half* %p) #0 { %a = load half, half* %p, align 2 %r = fptosi half %a to i64 ret i64 %r } -; CHECK-FP16-LABEL: test_fptoui_i32: +; CHECK-ALL-LABEL: test_fptoui_i32: ; CHECK-FP16: vcvtb.f32.f16 -; CHECK-FP16: vcvt.u32.f32 -; CHECK-LIBCALL-LABEL: test_fptoui_i32: ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL: vcvt.u32.f32 +; CHECK-VFP: vcvt.u32.f32 +; CHECK-NOVFP: bl __aeabi_f2uiz define i32 @test_fptoui_i32(half* %p) #0 { %a = load half, half* %p, align 2 %r = fptoui half %a to i32 ret i32 %r } -; CHECK-FP16-LABEL: test_fptoui_i64: +; CHECK-ALL-LABEL: test_fptoui_i64: ; CHECK-FP16: vcvtb.f32.f16 -; CHECK-FP16: bl __aeabi_f2ulz -; CHECK-LIBCALL-LABEL: test_fptoui_i64: ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL: bl __aeabi_f2ulz +; CHECK-ALL: bl __aeabi_f2ulz define i64 @test_fptoui_i64(half* %p) #0 { %a = load half, half* %p, align 2 %r = fptoui half %a to i64 ret i64 %r } -; CHECK-FP16-LABEL: test_sitofp_i32: -; CHECK-FP16: vcvt.f32.s32 +; CHECK-ALL-LABEL: test_sitofp_i32: +; CHECK-VFP: vcvt.f32.s32 +; CHECK-NOVFP: bl __aeabi_i2f ; CHECK-FP16: vcvtb.f16.f32 -; CHECK-LIBCALL-LABEL: test_sitofp_i32: -; CHECK-LIBCALL: vcvt.f32.s32 ; CHECK-LIBCALL: bl __aeabi_f2h define void @test_sitofp_i32(i32 %a, half* %p) #0 { %r = sitofp i32 %a to half @@ -317,11 +301,10 @@ define void @test_sitofp_i32(i32 %a, half* %p) #0 { ret void } -; CHECK-FP16-LABEL: test_uitofp_i32: -; CHECK-FP16: vcvt.f32.u32 +; CHECK-ALL-LABEL: test_uitofp_i32: +; CHECK-VFP: vcvt.f32.u32 +; CHECK-NOVFP: bl __aeabi_ui2f ; CHECK-FP16: vcvtb.f16.f32 -; CHECK-LIBCALL-LABEL: test_uitofp_i32: -; CHECK-LIBCALL: vcvt.f32.u32 ; CHECK-LIBCALL: bl __aeabi_f2h define void @test_uitofp_i32(i32 %a, half* %p) #0 { %r = uitofp i32 %a to half @@ -329,11 +312,9 @@ define void @test_uitofp_i32(i32 %a, half* %p) #0 { ret void } -; CHECK-FP16-LABEL: test_sitofp_i64: -; CHECK-FP16: bl __aeabi_l2f +; CHECK-ALL-LABEL: test_sitofp_i64: +; CHECK-ALL: bl __aeabi_l2f ; CHECK-FP16: vcvtb.f16.f32 -; CHECK-LIBCALL-LABEL: test_sitofp_i64: -; CHECK-LIBCALL: bl __aeabi_l2f ; CHECK-LIBCALL: bl __aeabi_f2h define void @test_sitofp_i64(i64 %a, half* %p) #0 { %r = sitofp i64 %a to half @@ -341,11 +322,9 @@ define void @test_sitofp_i64(i64 %a, half* %p) #0 { ret void } -; CHECK-FP16-LABEL: test_uitofp_i64: -; CHECK-FP16: bl __aeabi_ul2f +; CHECK-ALL-LABEL: test_uitofp_i64: +; CHECK-ALL: bl __aeabi_ul2f ; CHECK-FP16: vcvtb.f16.f32 -; CHECK-LIBCALL-LABEL: test_uitofp_i64: -; CHECK-LIBCALL: bl __aeabi_ul2f ; CHECK-LIBCALL: bl __aeabi_f2h define void @test_uitofp_i64(i64 %a, half* %p) #0 { %r = uitofp i64 %a to half @@ -385,10 +364,10 @@ define float @test_fpextend_float(half* %p) { ; CHECK-FP16-LABEL: test_fpextend_double: ; CHECK-FP16: vcvtb.f32.f16 -; CHECK-FP16: vcvt.f64.f32 ; CHECK-LIBCALL-LABEL: test_fpextend_double: ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL: vcvt.f64.f32 +; CHECK-VFP: vcvt.f64.f32 +; CHECK-NOVFP: bl __aeabi_f2d define double @test_fpextend_double(half* %p) { %a = load half, half* %p, align 2 %r = fpext half %a to double @@ -438,13 +417,13 @@ declare half @llvm.nearbyint.f16(half %a) #0 declare half @llvm.round.f16(half %a) #0 declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0 -; CHECK-FP16-LABEL: test_sqrt: +; CHECK-ALL-LABEL: test_sqrt: ; CHECK-FP16: vcvtb.f32.f16 ; CHECK-FP16: vsqrt.f32 ; CHECK-FP16: vcvtb.f16.f32 -; CHECK-LIBCALL-LABEL: test_sqrt: ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL: vsqrt.f32 +; CHECK-VFP-LIBCALL: vsqrt.f32 +; CHECK-NOVFP: bl sqrtf ; CHECK-LIBCALL: bl __aeabi_f2h define void @test_sqrt(half* %p) #0 { %a = load half, half* %p, align 2 @@ -671,7 +650,10 @@ define void @test_maxnum(half* %p, half* %q) #0 { ; CHECK-LIBCALL-LABEL: test_copysign: ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL: vbsl +; CHECK-VFP-LIBCALL: vbsl +; CHECK-NOVFP: bfc +; CHECK-NOVFP: and +; CHECK-NOVFP: orr ; CHECK-LIBCALL: bl __aeabi_f2h define void @test_copysign(half* %p, half* %q) #0 { %a = load half, half* %p, align 2 @@ -781,7 +763,8 @@ define void @test_round(half* %p) { ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL: bl __aeabi_h2f ; CHECK-LIBCALL: bl __aeabi_h2f -; CHECK-LIBCALL: vmla.f32 +; CHECK-VFP-LIBCALL: vmla.f32 +; CHECK-NOVFP: bl __aeabi_fmul ; CHECK-LIBCALL: bl __aeabi_f2h define void @test_fmuladd(half* %p, half* %q, half* %r) #0 { %a = load half, half* %p, align 2 @@ -797,31 +780,28 @@ define void @test_fmuladd(half* %p, half* %q, half* %r) #0 { ; and extractelement have these extra loads and stores. ; CHECK-ALL-LABEL: test_insertelement: -; CHECK-ALL-NEXT: .fnstart -; CHECK-ALL-NEXT: .pad #8 -; CHECK-ALL-NEXT: sub sp, sp, #8 -; CHECK-ALL-NEXT: ldrh -; CHECK-ALL-NEXT: strh -; CHECK-ALL-NEXT: ldrh -; CHECK-ALL-NEXT: strh -; CHECK-ALL-NEXT: ldrh -; CHECK-ALL-NEXT: strh -; CHECK-ALL-NEXT: ldrh -; CHECK-ALL-NEXT: strh -; CHECK-ALL-NEXT: mov -; CHECK-ALL-NEXT: ldrh -; CHECK-ALL-NEXT: add -; CHECK-ALL-NEXT: strh -; CHECK-ALL-NEXT: ldrh -; CHECK-ALL-NEXT: strh -; CHECK-ALL-NEXT: ldrh -; CHECK-ALL-NEXT: strh -; CHECK-ALL-NEXT: ldrh -; CHECK-ALL-NEXT: strh -; CHECK-ALL-NEXT: ldrh -; CHECK-ALL-NEXT: strh -; CHECK-ALL-NEXT: add sp, sp, #8 -; CHECK-ALL-NEXT: bx lr +; CHECK-ALL: sub sp, sp, #8 +; CHECK-ALL: ldrh +; CHECK-ALL: strh +; CHECK-ALL: ldrh +; CHECK-ALL: strh +; CHECK-ALL: ldrh +; CHECK-ALL: strh +; CHECK-ALL: ldrh +; CHECK-ALL: strh +; CHECK-ALL: mov +; CHECK-ALL-DAG: ldrh +; CHECK-ALL-DAG: add +; CHECK-ALL: strh +; CHECK-ALL: ldrh +; CHECK-ALL: strh +; CHECK-ALL: ldrh +; CHECK-ALL: strh +; CHECK-ALL: ldrh +; CHECK-ALL: strh +; CHECK-ALL: ldrh +; CHECK-ALL: strh +; CHECK-ALL: add sp, sp, #8 define void @test_insertelement(half* %p, <4 x half>* %q, i32 %i) #0 { %a = load half, half* %p, align 2 %b = load <4 x half>, <4 x half>* %q, align 8 @@ -831,23 +811,30 @@ define void @test_insertelement(half* %p, <4 x half>* %q, i32 %i) #0 { } ; CHECK-ALL-LABEL: test_extractelement: -; CHECK-ALL-NEXT: .fnstart -; CHECK-ALL-NEXT: .pad #8 -; CHECK-ALL-NEXT: sub sp, sp, #8 -; CHECK-ALL-NEXT: ldrh -; CHECK-ALL-NEXT: ldrh -; CHECK-ALL-NEXT: orr -; CHECK-ALL-NEXT: str -; CHECK-ALL-NEXT: ldrh -; CHECK-ALL-NEXT: ldrh -; CHECK-ALL-NEXT: orr -; CHECK-ALL-NEXT: str -; CHECK-ALL-NEXT: mov -; CHECK-ALL-NEXT: add -; CHECK-ALL-NEXT: ldrh -; CHECK-ALL-NEXT: strh -; CHECK-ALL-NEXT: add sp, sp, #8 -; CHECK-ALL-NEXT: bx lr +; CHECK-VFP: sub sp, sp, #8 +; CHECK-VFP: ldrh +; CHECK-VFP: ldrh +; CHECK-VFP: orr +; CHECK-VFP: str +; CHECK-VFP: ldrh +; CHECK-VFP: ldrh +; CHECK-VFP: orr +; CHECK-VFP: str +; CHECK-VFP: mov +; CHECK-VFP: add +; CHECK-VFP: ldrh +; CHECK-VFP: strh +; CHECK-VFP: add sp, sp, #8 +; CHECK-VFP: bx lr +; CHECK-NOVFP: ldrh +; CHECK-NOVFP: strh +; CHECK-NOVFP: ldrh +; CHECK-NOVFP: strh +; CHECK-NOVFP: ldrh +; CHECK-NOVFP: strh +; CHECK-NOVFP: ldrh +; CHECK-NOVFP: strh +; CHECK-NOVFP: ldrh define void @test_extractelement(half* %p, <4 x half>* %q, i32 %i) #0 { %a = load <4 x half>, <4 x half>* %q, align 8 %b = extractelement <4 x half> %a, i32 %i @@ -860,12 +847,10 @@ define void @test_extractelement(half* %p, <4 x half>* %q, i32 %i) #0 { %struct.dummy = type { i32, half } ; CHECK-ALL-LABEL: test_insertvalue: -; CHECK-ALL-NEXT: .fnstart -; CHECK-ALL-NEXT: ldr -; CHECK-ALL-NEXT: ldrh -; CHECK-ALL-NEXT: strh -; CHECK-ALL-NEXT: str -; CHECK-ALL-NEXT: bx lr +; CHECK-ALL-DAG: ldr +; CHECK-ALL-DAG: ldrh +; CHECK-ALL-DAG: strh +; CHECK-ALL-DAG: str define void @test_insertvalue(%struct.dummy* %p, half* %q) { %a = load %struct.dummy, %struct.dummy* %p %b = load half, half* %q @@ -875,10 +860,9 @@ define void @test_insertvalue(%struct.dummy* %p, half* %q) { } ; CHECK-ALL-LABEL: test_extractvalue: -; CHECK-ALL-NEXT: .fnstart -; CHECK-ALL-NEXT: ldrh -; CHECK-ALL-NEXT: strh -; CHECK-ALL-NEXT: bx lr +; CHECK-ALL: .fnstart +; CHECK-ALL: ldrh +; CHECK-ALL: strh define void @test_extractvalue(%struct.dummy* %p, half* %q) { %a = load %struct.dummy, %struct.dummy* %p %b = extractvalue %struct.dummy %a, 1 @@ -886,10 +870,11 @@ define void @test_extractvalue(%struct.dummy* %p, half* %q) { ret void } -; CHECK-FP16-LABEL: test_struct_return: +; CHECK-ALL-LABEL: test_struct_return: ; CHECK-FP16: vcvtb.f32.f16 -; CHECK-LIBCALL-LABEL: test_struct_return: -; CHECK-LIBCALL: bl __aeabi_h2f +; CHECK-VFP-LIBCALL: bl __aeabi_h2f +; CHECK-NOVFP-DAG: ldr +; CHECK-NOVFP-DAG: ldrh define %struct.dummy @test_struct_return(%struct.dummy* %p) { %a = load %struct.dummy, %struct.dummy* %p ret %struct.dummy %a @@ -897,6 +882,7 @@ define %struct.dummy @test_struct_return(%struct.dummy* %p) { ; CHECK-ALL-LABEL: test_struct_arg: ; CHECK-ALL-NEXT: .fnstart +; CHECK-NOVFP-NEXT: mov r0, r1 ; CHECK-ALL-NEXT: bx lr define half @test_struct_arg(%struct.dummy %p) { %a = extractvalue %struct.dummy %p, 1 -- 2.11.0