From 4417e07e393ea639e05235a29b7faf090ec34d6d Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Mon, 31 Mar 2014 15:46:30 +0000 Subject: [PATCH] ARM64: shuffle patterns around for fmin/fmax & add tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205205 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM64/ARM64InstrInfo.td | 16 +++--- test/CodeGen/ARM64/fminv.ll | 101 +++++++++++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+), 8 deletions(-) create mode 100644 test/CodeGen/ARM64/fminv.ll diff --git a/lib/Target/ARM64/ARM64InstrInfo.td b/lib/Target/ARM64/ARM64InstrInfo.td index 4804f9af638..1b47fb576d2 100644 --- a/lib/Target/ARM64/ARM64InstrInfo.td +++ b/lib/Target/ARM64/ARM64InstrInfo.td @@ -2857,12 +2857,20 @@ def : Pat<(f32 (int_arm64_neon_faddv (v4f32 V128:$Rn))), (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>; def : Pat<(f64 (int_arm64_neon_faddv (v2f64 V128:$Rn))), (FADDPv2i64p V128:$Rn)>; +def : Pat<(f32 (int_arm64_neon_fmaxnmv (v2f32 V64:$Rn))), + (FMAXNMPv2i32p V64:$Rn)>; def : Pat<(f64 (int_arm64_neon_fmaxnmv (v2f64 V128:$Rn))), (FMAXNMPv2i64p V128:$Rn)>; +def : Pat<(f32 (int_arm64_neon_fmaxv (v2f32 V64:$Rn))), + (FMAXPv2i32p V64:$Rn)>; def : Pat<(f64 (int_arm64_neon_fmaxv (v2f64 V128:$Rn))), (FMAXPv2i64p V128:$Rn)>; +def : Pat<(f32 (int_arm64_neon_fminnmv (v2f32 V64:$Rn))), + (FMINNMPv2i32p V64:$Rn)>; def : Pat<(f64 (int_arm64_neon_fminnmv (v2f64 V128:$Rn))), (FMINNMPv2i64p V128:$Rn)>; +def : Pat<(f32 (int_arm64_neon_fminv (v2f32 V64:$Rn))), + (FMINPv2i32p V64:$Rn)>; def : Pat<(f64 (int_arm64_neon_fminv (v2f64 V128:$Rn))), (FMINPv2i64p V128:$Rn)>; @@ -3072,17 +3080,9 @@ defm UMINV : SIMDAcrossLanesBHS<1, 0b11010, "uminv">; defm SADDLV : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">; defm UADDLV : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">; defm FMAXNMV : SIMDAcrossLanesS<0b01100, 0, "fmaxnmv", int_arm64_neon_fmaxnmv>; -def : Pat<(f32 (int_arm64_neon_fmaxnmv (v2f32 V64:$Rn))), - (EXTRACT_SUBREG (FMAXNMPv2f32 V64:$Rn, V64:$Rn), ssub)>; defm FMAXV : SIMDAcrossLanesS<0b01111, 0, "fmaxv", int_arm64_neon_fmaxv>; -def : Pat<(f32 (int_arm64_neon_fmaxv (v2f32 V64:$Rn))), - (EXTRACT_SUBREG (FMAXPv2f32 V64:$Rn, V64:$Rn), ssub)>; defm FMINNMV : SIMDAcrossLanesS<0b01100, 1, "fminnmv", int_arm64_neon_fminnmv>; -def : Pat<(f32 (int_arm64_neon_fminnmv (v2f32 V64:$Rn))), - (EXTRACT_SUBREG (FMINNMPv2f32 V64:$Rn, V64:$Rn), ssub)>; defm FMINV : SIMDAcrossLanesS<0b01111, 1, "fminv", int_arm64_neon_fminv>; -def : Pat<(f32 (int_arm64_neon_fminv (v2f32 V64:$Rn))), - (EXTRACT_SUBREG (FMINPv2f32 V64:$Rn, V64:$Rn), ssub)>; multiclass SIMDAcrossLanesSignedIntrinsic { // If there is a sign extension after this intrinsic, consume it as smov already diff --git a/test/CodeGen/ARM64/fminv.ll b/test/CodeGen/ARM64/fminv.ll new file mode 100644 index 00000000000..ca706d897ca --- /dev/null +++ b/test/CodeGen/ARM64/fminv.ll @@ -0,0 +1,101 @@ +; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s + +define float @test_fminv_v2f32(<2 x float> %in) { +; CHECK: test_fminv_v2f32: +; CHECK: fminp s0, v0.2s + %min = call float @llvm.arm64.neon.fminv.f32.v2f32(<2 x float> %in) + ret float %min +} + +define float @test_fminv_v4f32(<4 x float> %in) { +; CHECK: test_fminv_v4f32: +; CHECK: fminv s0, v0.4s + %min = call float @llvm.arm64.neon.fminv.f32.v4f32(<4 x float> %in) + ret float %min +} + +define double @test_fminv_v2f64(<2 x double> %in) { +; CHECK: test_fminv_v2f64: +; CHECK: fminp d0, v0.2d + %min = call double @llvm.arm64.neon.fminv.f64.v2f64(<2 x double> %in) + ret double %min +} + +declare float @llvm.arm64.neon.fminv.f32.v2f32(<2 x float>) +declare float @llvm.arm64.neon.fminv.f32.v4f32(<4 x float>) +declare double @llvm.arm64.neon.fminv.f64.v2f64(<2 x double>) + +define float @test_fmaxv_v2f32(<2 x float> %in) { +; CHECK: test_fmaxv_v2f32: +; CHECK: fmaxp s0, v0.2s + %max = call float @llvm.arm64.neon.fmaxv.f32.v2f32(<2 x float> %in) + ret float %max +} + +define float @test_fmaxv_v4f32(<4 x float> %in) { +; CHECK: test_fmaxv_v4f32: +; CHECK: fmaxv s0, v0.4s + %max = call float @llvm.arm64.neon.fmaxv.f32.v4f32(<4 x float> %in) + ret float %max +} + +define double @test_fmaxv_v2f64(<2 x double> %in) { +; CHECK: test_fmaxv_v2f64: +; CHECK: fmaxp d0, v0.2d + %max = call double @llvm.arm64.neon.fmaxv.f64.v2f64(<2 x double> %in) + ret double %max +} + +declare float @llvm.arm64.neon.fmaxv.f32.v2f32(<2 x float>) +declare float @llvm.arm64.neon.fmaxv.f32.v4f32(<4 x float>) +declare double @llvm.arm64.neon.fmaxv.f64.v2f64(<2 x double>) + +define float @test_fminnmv_v2f32(<2 x float> %in) { +; CHECK: test_fminnmv_v2f32: +; CHECK: fminnmp s0, v0.2s + %minnm = call float @llvm.arm64.neon.fminnmv.f32.v2f32(<2 x float> %in) + ret float %minnm +} + +define float @test_fminnmv_v4f32(<4 x float> %in) { +; CHECK: test_fminnmv_v4f32: +; CHECK: fminnmv s0, v0.4s + %minnm = call float @llvm.arm64.neon.fminnmv.f32.v4f32(<4 x float> %in) + ret float %minnm +} + +define double @test_fminnmv_v2f64(<2 x double> %in) { +; CHECK: test_fminnmv_v2f64: +; CHECK: fminnmp d0, v0.2d + %minnm = call double @llvm.arm64.neon.fminnmv.f64.v2f64(<2 x double> %in) + ret double %minnm +} + +declare float @llvm.arm64.neon.fminnmv.f32.v2f32(<2 x float>) +declare float @llvm.arm64.neon.fminnmv.f32.v4f32(<4 x float>) +declare double @llvm.arm64.neon.fminnmv.f64.v2f64(<2 x double>) + +define float @test_fmaxnmv_v2f32(<2 x float> %in) { +; CHECK: test_fmaxnmv_v2f32: +; CHECK: fmaxnmp s0, v0.2s + %maxnm = call float @llvm.arm64.neon.fmaxnmv.f32.v2f32(<2 x float> %in) + ret float %maxnm +} + +define float @test_fmaxnmv_v4f32(<4 x float> %in) { +; CHECK: test_fmaxnmv_v4f32: +; CHECK: fmaxnmv s0, v0.4s + %maxnm = call float @llvm.arm64.neon.fmaxnmv.f32.v4f32(<4 x float> %in) + ret float %maxnm +} + +define double @test_fmaxnmv_v2f64(<2 x double> %in) { +; CHECK: test_fmaxnmv_v2f64: +; CHECK: fmaxnmp d0, v0.2d + %maxnm = call double @llvm.arm64.neon.fmaxnmv.f64.v2f64(<2 x double> %in) + ret double %maxnm +} + +declare float @llvm.arm64.neon.fmaxnmv.f32.v2f32(<2 x float>) +declare float @llvm.arm64.neon.fmaxnmv.f32.v4f32(<4 x float>) +declare double @llvm.arm64.neon.fmaxnmv.f64.v2f64(<2 x double>) -- 2.11.0