Don't combine fp_round (fp_round x) if f80 to f16 is generated

author Pirama Arumuga Nainar <pirama@google.com>

Sat, 13 Feb 2016 00:08:05 +0000 (00:08 +0000)

committer Pirama Arumuga Nainar <pirama@google.com>

Sat, 13 Feb 2016 00:08:05 +0000 (00:08 +0000)
author Pirama Arumuga Nainar <pirama@google.com>
Sat, 13 Feb 2016 00:08:05 +0000 (00:08 +0000)
committer Pirama Arumuga Nainar <pirama@google.com>
Sat, 13 Feb 2016 00:08:05 +0000 (00:08 +0000)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 3fca3b4..8b1b6bb 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9019,6 +9019,17 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
    if (N0.getOpcode() == ISD::FP_ROUND) {
      const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
      const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1;
+
+    // Skip this folding if it results in an fp_round from f80 to f16.
+    //
+    // f80 to f16 always generates an expensive (and as yet, unimplemented)
+    // libcall to __truncxfhf2 instead of selecting native f16 conversion
+    // instructions from f32 or f64.  Moreover, the first (value-preserving)
+    // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
+    // x86.
+    if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
+      return SDValue();
+
      // If the first fp_round isn't a value preserving truncation, it might
      // introduce a tie in the second fp_round, that wouldn't occur in the
      // single-step fp_round we want to fold to.
diff --git a/test/CodeGen/X86/half.ll b/test/CodeGen/X86/half.ll

index 531891f..43e6aa8 100644 (file)
--- a/test/CodeGen/X86/half.ll
+++ b/test/CodeGen/X86/half.ll
@@ -2,6 +2,8 @@
  ; RUN:   | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LIBCALL
  ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+f16c -asm-verbose=false \
  ; RUN:    | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-F16C
+; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr +sse2 -asm-verbose=false \
+; RUN:    | FileCheck %s -check-prefix=CHECK-I686
  
  define void @test_load_store(half* %in, half* %out) {
  ; CHECK-LABEL: test_load_store:
@@ -260,4 +262,17 @@ define void @test_trunc64_vec4(<4 x double> %a, <4 x half>* %p) {
    ret void
  }
  
+declare float @test_floatret();
+
+; On i686, if SSE2 is available, the return value from test_floatret is loaded
+; to f80 and then rounded to f32.  The DAG combiner should not combine this
+; fp_round and the subsequent fptrunc from float to half.
+define half @test_f80trunc_nodagcombine() #0 {
+; CHECK-LABEL: test_f80trunc_nodagcombine:
+; CHECK-I686-NOT: calll __truncxfhf2
+  %1 = call float @test_floatret()
+  %2 = fptrunc float %1 to half
+  ret half %2
+}
+
  attributes #0 = { nounwind }
author	Pirama Arumuga Nainar <pirama@google.com>
	Sat, 13 Feb 2016 00:08:05 +0000 (00:08 +0000)
committer	Pirama Arumuga Nainar <pirama@google.com>
	Sat, 13 Feb 2016 00:08:05 +0000 (00:08 +0000)
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
test/CodeGen/X86/half.ll		patch \| blob \| history