From 4417e07e393ea639e05235a29b7faf090ec34d6d Mon Sep 17 00:00:00 2001
From: Tim Northover <tnorthover@apple.com>
Date: Mon, 31 Mar 2014 15:46:30 +0000
Subject: [PATCH] ARM64: shuffle patterns around for fmin/fmax & add tests.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205205 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/ARM64/ARM64InstrInfo.td |  16 +++---
 test/CodeGen/ARM64/fminv.ll        | 101 +++++++++++++++++++++++++++++++++++++
 2 files changed, 109 insertions(+), 8 deletions(-)
 create mode 100644 test/CodeGen/ARM64/fminv.ll
diff --git a/lib/Target/ARM64/ARM64InstrInfo.td b/lib/Target/ARM64/ARM64InstrInfo.td
index 4804f9af638..1b47fb576d2 100644
--- a/lib/Target/ARM64/ARM64InstrInfo.td
+++ b/lib/Target/ARM64/ARM64InstrInfo.td
@@ -2857,12 +2857,20 @@ def : Pat<(f32 (int_arm64_neon_faddv (v4f32 V128:$Rn))),
           (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>;
 def : Pat<(f64 (int_arm64_neon_faddv (v2f64 V128:$Rn))),
           (FADDPv2i64p V128:$Rn)>;
+def : Pat<(f32 (int_arm64_neon_fmaxnmv (v2f32 V64:$Rn))),
+          (FMAXNMPv2i32p V64:$Rn)>;
 def : Pat<(f64 (int_arm64_neon_fmaxnmv (v2f64 V128:$Rn))),
           (FMAXNMPv2i64p V128:$Rn)>;
+def : Pat<(f32 (int_arm64_neon_fmaxv (v2f32 V64:$Rn))),
+          (FMAXPv2i32p V64:$Rn)>;
 def : Pat<(f64 (int_arm64_neon_fmaxv (v2f64 V128:$Rn))),
           (FMAXPv2i64p V128:$Rn)>;
+def : Pat<(f32 (int_arm64_neon_fminnmv (v2f32 V64:$Rn))),
+          (FMINNMPv2i32p V64:$Rn)>;
 def : Pat<(f64 (int_arm64_neon_fminnmv (v2f64 V128:$Rn))),
           (FMINNMPv2i64p V128:$Rn)>;
+def : Pat<(f32 (int_arm64_neon_fminv (v2f32 V64:$Rn))),
+          (FMINPv2i32p V64:$Rn)>;
 def : Pat<(f64 (int_arm64_neon_fminv (v2f64 V128:$Rn))),
           (FMINPv2i64p V128:$Rn)>;
 
@@ -3072,17 +3080,9 @@ defm UMINV   : SIMDAcrossLanesBHS<1, 0b11010, "uminv">;
 defm SADDLV  : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">;
 defm UADDLV  : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">;
 defm FMAXNMV : SIMDAcrossLanesS<0b01100, 0, "fmaxnmv", int_arm64_neon_fmaxnmv>;
-def : Pat<(f32 (int_arm64_neon_fmaxnmv (v2f32 V64:$Rn))),
-          (EXTRACT_SUBREG (FMAXNMPv2f32 V64:$Rn, V64:$Rn), ssub)>;
 defm FMAXV   : SIMDAcrossLanesS<0b01111, 0, "fmaxv", int_arm64_neon_fmaxv>;
-def : Pat<(f32 (int_arm64_neon_fmaxv (v2f32 V64:$Rn))),
-          (EXTRACT_SUBREG (FMAXPv2f32 V64:$Rn, V64:$Rn), ssub)>;
 defm FMINNMV : SIMDAcrossLanesS<0b01100, 1, "fminnmv", int_arm64_neon_fminnmv>;
-def : Pat<(f32 (int_arm64_neon_fminnmv (v2f32 V64:$Rn))),
-          (EXTRACT_SUBREG (FMINNMPv2f32 V64:$Rn, V64:$Rn), ssub)>;
 defm FMINV   : SIMDAcrossLanesS<0b01111, 1, "fminv", int_arm64_neon_fminv>;
-def : Pat<(f32 (int_arm64_neon_fminv (v2f32 V64:$Rn))),
-          (EXTRACT_SUBREG (FMINPv2f32 V64:$Rn, V64:$Rn), ssub)>;
 
 multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc, Intrinsic intOp> {
 // If there is a sign extension after this intrinsic, consume it as smov already
diff --git a/test/CodeGen/ARM64/fminv.ll b/test/CodeGen/ARM64/fminv.ll
new file mode 100644
index 00000000000..ca706d897ca
--- /dev/null
+++ b/test/CodeGen/ARM64/fminv.ll
@@ -0,0 +1,101 @@
+; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s
+
+define float @test_fminv_v2f32(<2 x float> %in) {
+; CHECK: test_fminv_v2f32:
+; CHECK: fminp s0, v0.2s
+  %min = call float @llvm.arm64.neon.fminv.f32.v2f32(<2 x float> %in)
+  ret float %min
+}
+
+define float @test_fminv_v4f32(<4 x float> %in) {
+; CHECK: test_fminv_v4f32:
+; CHECK: fminv s0, v0.4s
+  %min = call float @llvm.arm64.neon.fminv.f32.v4f32(<4 x float> %in)
+  ret float %min
+}
+
+define double @test_fminv_v2f64(<2 x double> %in) {
+; CHECK: test_fminv_v2f64:
+; CHECK: fminp d0, v0.2d
+  %min = call double @llvm.arm64.neon.fminv.f64.v2f64(<2 x double> %in)
+  ret double %min
+}
+
+declare float @llvm.arm64.neon.fminv.f32.v2f32(<2 x float>)
+declare float @llvm.arm64.neon.fminv.f32.v4f32(<4 x float>)
+declare double @llvm.arm64.neon.fminv.f64.v2f64(<2 x double>)
+
+define float @test_fmaxv_v2f32(<2 x float> %in) {
+; CHECK: test_fmaxv_v2f32:
+; CHECK: fmaxp s0, v0.2s
+  %max = call float @llvm.arm64.neon.fmaxv.f32.v2f32(<2 x float> %in)
+  ret float %max
+}
+
+define float @test_fmaxv_v4f32(<4 x float> %in) {
+; CHECK: test_fmaxv_v4f32:
+; CHECK: fmaxv s0, v0.4s
+  %max = call float @llvm.arm64.neon.fmaxv.f32.v4f32(<4 x float> %in)
+  ret float %max
+}
+
+define double @test_fmaxv_v2f64(<2 x double> %in) {
+; CHECK: test_fmaxv_v2f64:
+; CHECK: fmaxp d0, v0.2d
+  %max = call double @llvm.arm64.neon.fmaxv.f64.v2f64(<2 x double> %in)
+  ret double %max
+}
+
+declare float @llvm.arm64.neon.fmaxv.f32.v2f32(<2 x float>)
+declare float @llvm.arm64.neon.fmaxv.f32.v4f32(<4 x float>)
+declare double @llvm.arm64.neon.fmaxv.f64.v2f64(<2 x double>)
+
+define float @test_fminnmv_v2f32(<2 x float> %in) {
+; CHECK: test_fminnmv_v2f32:
+; CHECK: fminnmp s0, v0.2s
+  %minnm = call float @llvm.arm64.neon.fminnmv.f32.v2f32(<2 x float> %in)
+  ret float %minnm
+}
+
+define float @test_fminnmv_v4f32(<4 x float> %in) {
+; CHECK: test_fminnmv_v4f32:
+; CHECK: fminnmv s0, v0.4s
+  %minnm = call float @llvm.arm64.neon.fminnmv.f32.v4f32(<4 x float> %in)
+  ret float %minnm
+}
+
+define double @test_fminnmv_v2f64(<2 x double> %in) {
+; CHECK: test_fminnmv_v2f64:
+; CHECK: fminnmp d0, v0.2d
+  %minnm = call double @llvm.arm64.neon.fminnmv.f64.v2f64(<2 x double> %in)
+  ret double %minnm
+}
+
+declare float @llvm.arm64.neon.fminnmv.f32.v2f32(<2 x float>)
+declare float @llvm.arm64.neon.fminnmv.f32.v4f32(<4 x float>)
+declare double @llvm.arm64.neon.fminnmv.f64.v2f64(<2 x double>)
+
+define float @test_fmaxnmv_v2f32(<2 x float> %in) {
+; CHECK: test_fmaxnmv_v2f32:
+; CHECK: fmaxnmp s0, v0.2s
+  %maxnm = call float @llvm.arm64.neon.fmaxnmv.f32.v2f32(<2 x float> %in)
+  ret float %maxnm
+}
+
+define float @test_fmaxnmv_v4f32(<4 x float> %in) {
+; CHECK: test_fmaxnmv_v4f32:
+; CHECK: fmaxnmv s0, v0.4s
+  %maxnm = call float @llvm.arm64.neon.fmaxnmv.f32.v4f32(<4 x float> %in)
+  ret float %maxnm
+}
+
+define double @test_fmaxnmv_v2f64(<2 x double> %in) {
+; CHECK: test_fmaxnmv_v2f64:
+; CHECK: fmaxnmp d0, v0.2d
+  %maxnm = call double @llvm.arm64.neon.fmaxnmv.f64.v2f64(<2 x double> %in)
+  ret double %maxnm
+}
+
+declare float @llvm.arm64.neon.fmaxnmv.f32.v2f32(<2 x float>)
+declare float @llvm.arm64.neon.fmaxnmv.f32.v4f32(<4 x float>)
+declare double @llvm.arm64.neon.fmaxnmv.f64.v2f64(<2 x double>)
-- 
2.11.0