[AArch64] Fix some failures exposed by value type v4f16 and v8f16.

author Jiangning Liu <jiangning.liu@arm.com>

Fri, 29 Aug 2014 01:31:42 +0000 (01:31 +0000)

committer Jiangning Liu <jiangning.liu@arm.com>

Fri, 29 Aug 2014 01:31:42 +0000 (01:31 +0000)
author Jiangning Liu <jiangning.liu@arm.com>
Fri, 29 Aug 2014 01:31:42 +0000 (01:31 +0000)
committer Jiangning Liu <jiangning.liu@arm.com>
Fri, 29 Aug 2014 01:31:42 +0000 (01:31 +0000)
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp

index 60cd784..d3ec172 100644 (file)
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -576,13 +576,13 @@ AArch64TargetLowering::AArch64TargetLowering(TargetMachine &TM)
  }
  
  void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
-  if (VT == MVT::v2f32) {
+  if (VT == MVT::v2f32 || VT == MVT::v4f16) {
      setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
      AddPromotedToType(ISD::LOAD, VT.getSimpleVT(), MVT::v2i32);
  
      setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
      AddPromotedToType(ISD::STORE, VT.getSimpleVT(), MVT::v2i32);
-  } else if (VT == MVT::v2f64 || VT == MVT::v4f32) {
+  } else if (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16) {
      setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
      AddPromotedToType(ISD::LOAD, VT.getSimpleVT(), MVT::v2i64);
  
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td

index 024624f..48f1fc7 100644 (file)
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5233,6 +5233,9 @@ def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))),
  def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))),
                              (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
                                              (REV64v8i16 FPR128:$src), (i32 8)))>;
+def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))),
+                            (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
+                                            (REV64v8i16 FPR128:$src), (i32 8)))>;
  def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))),
                              (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>;
  def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))),
@@ -5247,6 +5250,7 @@ let Predicates = [IsLE] in {
  def : Pat<(v2f64 (bitconvert (f128  FPR128:$src))), (v2f64 FPR128:$src)>;
  def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>;
  def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), (v2f64 FPR128:$src)>;
  def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>;
  def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>;
  }
@@ -5258,6 +5262,8 @@ def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))),
                               (v2f64 (REV64v4i32 FPR128:$src))>;
  def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))),
                               (v2f64 (REV64v8i16 FPR128:$src))>;
+def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))),
+                             (v2f64 (REV64v8i16 FPR128:$src))>;
  def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))),
                               (v2f64 (REV64v16i8 FPR128:$src))>;
  def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))),
@@ -5268,6 +5274,7 @@ def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>;
  let Predicates = [IsLE] in {
  def : Pat<(v4f32 (bitconvert (f128  FPR128:$src))), (v4f32 FPR128:$src)>;
  def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), (v4f32 FPR128:$src)>;
  def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>;
  def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
  def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>;
@@ -5278,6 +5285,8 @@ def : Pat<(v4f32 (bitconvert (f128  FPR128:$src))),
                                      (REV64v4i32 FPR128:$src), (i32 8)))>;
  def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))),
                               (v4f32 (REV32v8i16 FPR128:$src))>;
+def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))),
+                             (v4f32 (REV32v8i16 FPR128:$src))>;
  def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))),
                               (v4f32 (REV32v16i8 FPR128:$src))>;
  def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))),
diff --git a/test/CodeGen/AArch64/aarch64_f16_be.ll b/test/CodeGen/AArch64/aarch64_f16_be.ll

new file mode 100644 (file)

index 0000000..7504439
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64_f16_be.ll
@@ -0,0 +1,67 @@
+; RUN: llc -mtriple=aarch64-linux-gnuabi -O0 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64_be-linux-gnuabi -O0 < %s | FileCheck %s --check-prefix=CHECK-BE
+
+define void @test_bitcast_v8f16_to_v4f32(<8 x half> %a) {
+; CHECK-LABEL: test_bitcast_v8f16_to_v4f32:
+; CHECK-NOT: st1
+
+; CHECK-BE-LABEL: test_bitcast_v8f16_to_v4f32:
+; CHECK-BE: st1
+
+  %x = alloca <4 x float>, align 16
+  %y = bitcast <8 x half> %a to <4 x float>
+  store <4 x float> %y, <4 x float>* %x, align 16
+  ret void
+}
+
+define void @test_bitcast_v8f16_to_v2f64(<8 x half> %a) {
+; CHECK-LABEL: test_bitcast_v8f16_to_v2f64:
+; CHECK-NOT: st1
+
+; CHECK-BE-LABEL: test_bitcast_v8f16_to_v2f64:
+; CHECK-BE: st1
+
+  %x = alloca <2 x double>, align 16
+  %y = bitcast <8 x half> %a to <2 x double>
+  store <2 x double> %y, <2 x double>* %x, align 16
+  ret void
+}
+
+define void @test_bitcast_v8f16_to_fp128(<8 x half> %a) {
+; CHECK-LABEL: test_bitcast_v8f16_to_fp128:
+; CHECK-NOT: st1
+
+; CHECK-BE-LABEL: test_bitcast_v8f16_to_fp128:
+; CHECK-BE: st1
+
+  %x = alloca fp128, align 16
+  %y = bitcast <8 x half> %a to fp128
+  store fp128 %y, fp128* %x, align 16
+  ret void
+}
+
+define void @test_bitcast_v4f16_to_v2f32(<4 x half> %a) {
+; CHECK-LABEL: test_bitcast_v4f16_to_v2f32:
+; CHECK-NOT: st1
+
+; CHECK-BE-LABEL: test_bitcast_v4f16_to_v2f32:
+; CHECK-BE: st1
+
+  %x = alloca <2 x float>, align 8
+  %y = bitcast <4 x half> %a to <2 x float>
+  store <2 x float> %y, <2 x float>* %x, align 8
+  ret void
+}
+
+define void @test_bitcast_v4f16_to_v1f64(<4 x half> %a) {
+; CHECK-LABEL: test_bitcast_v4f16_to_v1f64:
+; CHECK-NOT: st1
+
+; CHECK-BE-LABEL: test_bitcast_v4f16_to_v1f64:
+; CHECK-BE: st1
+
+  %x = alloca <1 x double>, align 8
+  %y = bitcast <4 x half> %a to <1 x double>
+  store <1 x double> %y, <1 x double>* %x, align 8
+  ret void
+}
author	Jiangning Liu <jiangning.liu@arm.com>
	Fri, 29 Aug 2014 01:31:42 +0000 (01:31 +0000)
committer	Jiangning Liu <jiangning.liu@arm.com>
	Fri, 29 Aug 2014 01:31:42 +0000 (01:31 +0000)
lib/Target/AArch64/AArch64ISelLowering.cpp		patch \| blob \| history
lib/Target/AArch64/AArch64InstrInfo.td		patch \| blob \| history
test/CodeGen/AArch64/aarch64_f16_be.ll	[new file with mode: 0644]	patch \| blob