Utilize new SDNode flag functionality to expand current support for fadd

author Michael Berg <michael_c_berg@apple.com>

Mon, 18 Jun 2018 23:44:59 +0000 (23:44 +0000)

committer Michael Berg <michael_c_berg@apple.com>

Mon, 18 Jun 2018 23:44:59 +0000 (23:44 +0000)
author Michael Berg <michael_c_berg@apple.com>
Mon, 18 Jun 2018 23:44:59 +0000 (23:44 +0000)
committer Michael Berg <michael_c_berg@apple.com>
Mon, 18 Jun 2018 23:44:59 +0000 (23:44 +0000)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index b66b2f8..b3f5531 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10327,20 +10327,21 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
      return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags);
    }
  
-  // FIXME: Auto-upgrade the target/function-level option.
-  if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) {
-    // fold (fadd A, 0) -> A
-    if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
-      if (N1C->isZero())
-        return N0;
+  ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1);
+  if (N1C && N1C->isZero()) {
+    if (N1C->isNegative() || Options.UnsafeFPMath ||
+        Flags.hasNoSignedZeros()) {
+      // fold (fadd A, 0) -> A
+      return N0;
+    }
    }
  
    // No FP constant should be created after legalization as Instruction
    // Selection pass has a hard time dealing with FP constants.
    bool AllowNewConst = (Level < AfterLegalizeDAG);
  
-  // TODO: fmf test for NaNs could be done here too
-  if (Options.UnsafeFPMath && AllowNewConst) {
+  // If 'unsafe math' or nnan is enabled, fold lots of things.
+  if ((Options.UnsafeFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
      // If allowed, fold (fadd (fneg x), x) -> 0.0
      if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
        return DAG.getConstantFP(0.0, DL, VT);
@@ -10350,9 +10351,12 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
        return DAG.getConstantFP(0.0, DL, VT);
    }
  
-  // If 'unsafe math' is enabled, fold lots of things.
-  // TODO: fmf testing for reassoc/nsz could be done here too
-  if (Options.UnsafeFPMath && AllowNewConst) {
+  // If 'unsafe math' or reassoc and nsz, fold lots of things.
+  // TODO: break out portions of the transformations below for which Unsafe is
+  //       considered and which do not require both nsz and reassoc
+  if ((Options.UnsafeFPMath ||
+       (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
+      AllowNewConst) {
      // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
      if (N1CFP && N0.getOpcode() == ISD::FADD &&
          isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

index d227c4a..4fdd3de 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4442,24 +4442,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
    case ISD::FMUL:
    case ISD::FDIV:
    case ISD::FREM:
-    if (getTarget().Options.UnsafeFPMath) {
-      if (Opcode == ISD::FADD) {
-        // x+0 --> x
-        if (N2CFP && N2CFP->getValueAPF().isZero())
-          return N1;
-      } else if (Opcode == ISD::FSUB) {
-        // x-0 --> x
-        if (N2CFP && N2CFP->getValueAPF().isZero())
-          return N1;
-      } else if (Opcode == ISD::FMUL) {
-        // x*0 --> 0
-        if (N2CFP && N2CFP->isZero())
-          return N2;
-        // x*1 --> x
-        if (N2CFP && N2CFP->isExactlyValue(1.0))
-          return N1;
-      }
-    }
      assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
      assert(N1.getValueType() == N2.getValueType() &&
             N1.getValueType() == VT && "Binary operator types must match!");
diff --git a/test/CodeGen/AArch64/fadd-combines.ll b/test/CodeGen/AArch64/fadd-combines.ll

index 3c5524f..be027a7 100644 (file)
--- a/test/CodeGen/AArch64/fadd-combines.ll
+++ b/test/CodeGen/AArch64/fadd-combines.ll
@@ -100,11 +100,12 @@ define float @fadd_const_multiuse_fmf(float %x) {
  ; CHECK-LABEL: fadd_const_multiuse_fmf:
  ; CHECK:       // %bb.0:
  ; CHECK-NEXT:    adrp x8, .LCPI7_0
+; CHECK-NEXT:    adrp x9, .LCPI7_1
  ; CHECK-NEXT:    ldr s1, [x8, :lo12:.LCPI7_0]
-; CHECK-NEXT:    fadd s0, s0, s1
-; CHECK-NEXT:    fmov s1, #17.00000000
+; CHECK-NEXT:    ldr s2, [x9, :lo12:.LCPI7_1]
  ; CHECK-NEXT:    fadd s1, s0, s1
-; CHECK-NEXT:    fadd s0, s0, s1
+; CHECK-NEXT:    fadd s0, s0, s2
+; CHECK-NEXT:    fadd s0, s1, s0
  ; CHECK-NEXT:    ret
    %a1 = fadd float %x, 42.0
    %a2 = fadd nsz reassoc float %a1, 17.0
diff --git a/test/CodeGen/AMDGPU/fadd.ll b/test/CodeGen/AMDGPU/fadd.ll

index a2f1f71..c08fd35 100644 (file)
--- a/test/CodeGen/AMDGPU/fadd.ll
+++ b/test/CodeGen/AMDGPU/fadd.ll
@@ -66,7 +66,7 @@ define amdgpu_kernel void @fadd_v8f32(<8 x float> addrspace(1)* %out, <8 x float
  ; FUNC-LABEL: {{^}}fadd_0_nsz_attr_f32:
  ; SI-NOT: v_add_f32
  define amdgpu_kernel void @fadd_0_nsz_attr_f32(float addrspace(1)* %out, float %a) #1 {
-   %add = fadd float %a, 0.0
+   %add = fadd nsz float %a, 0.0
     store float %add, float addrspace(1)* %out, align 4
     ret void
  }
diff --git a/test/CodeGen/X86/fmf-flags.ll b/test/CodeGen/X86/fmf-flags.ll

index d958378..4fb2040 100644 (file)
--- a/test/CodeGen/X86/fmf-flags.ll
+++ b/test/CodeGen/X86/fmf-flags.ll
@@ -32,18 +32,13 @@ declare float @llvm.fmuladd.f32(float %a, float %b, float %c);
  define float @fast_fmuladd_opts(float %a , float %b , float %c) {
  ; X64-LABEL: fast_fmuladd_opts:
  ; X64:       # %bb.0:
-; X64-NEXT:    movaps %xmm0, %xmm1
-; X64-NEXT:    addss %xmm0, %xmm1
-; X64-NEXT:    addss %xmm0, %xmm1
-; X64-NEXT:    movaps %xmm1, %xmm0
+; X64-NEXT:    mulss {{.*}}(%rip), %xmm0
  ; X64-NEXT:    retq
  ;
  ; X86-LABEL: fast_fmuladd_opts:
  ; X86:       # %bb.0:
  ; X86-NEXT:    flds {{[0-9]+}}(%esp)
-; X86-NEXT:    fld %st(0)
-; X86-NEXT:    fadd %st(1)
-; X86-NEXT:    faddp %st(1)
+; X86-NEXT:    fmuls {{.*}}
  ; X86-NEXT:    retl
    %res = call fast float @llvm.fmuladd.f32(float %a, float 2.0, float %a)
    ret float %res
@@ -56,9 +51,9 @@ define float @fast_fmuladd_opts(float %a , float %b , float %c) {
  define double @not_so_fast_mul_add(double %x) {
  ; X64-LABEL: not_so_fast_mul_add:
  ; X64:       # %bb.0:
-; X64-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
+; X64-NEXT:    movsd {{.*}}(%rip), %xmm1
  ; X64-NEXT:    mulsd %xmm0, %xmm1
-; X64-NEXT:    addsd %xmm1, %xmm0
+; X64-NEXT:    mulsd {{.*}}(%rip), %xmm0
  ; X64-NEXT:    movsd %xmm1, {{.*}}(%rip)
  ; X64-NEXT:    retq
  ;
@@ -67,7 +62,9 @@ define double @not_so_fast_mul_add(double %x) {
  ; X86-NEXT:    fldl {{[0-9]+}}(%esp)
  ; X86-NEXT:    fld %st(0)
  ; X86-NEXT:    fmull {{\.LCPI.*}}
-; X86-NEXT:    fadd %st(0), %st(1)
+; X86-NEXT:    fxch %st(1)
+; X86-NEXT:    fmull {{\.LCPI.*}}
+; X86-NEXT:    fxch %st(1)
  ; X86-NEXT:    fstpl mul1
  ; X86-NEXT:    retl
    %m = fmul double %x, 4.2
diff --git a/test/CodeGen/X86/fp-fold.ll b/test/CodeGen/X86/fp-fold.ll

index 3435443..b8e30a4 100644 (file)
--- a/test/CodeGen/X86/fp-fold.ll
+++ b/test/CodeGen/X86/fp-fold.ll
@@ -17,18 +17,33 @@ define float @fadd_zero(float %x) {
  }
  
  define float @fadd_negzero(float %x) {
-; STRICT-LABEL: fadd_negzero:
-; STRICT:       # %bb.0:
-; STRICT-NEXT:    addss {{.*}}(%rip), %xmm0
-; STRICT-NEXT:    retq
-;
-; UNSAFE-LABEL: fadd_negzero:
-; UNSAFE:       # %bb.0:
-; UNSAFE-NEXT:    retq
+; ANY-LABEL: fadd_negzero:
+; ANY:       # %bb.0:
+; ANY-NEXT:    retq
    %r = fadd float %x, -0.0
    ret float %r
  }
  
+define float @fadd_produce_zero(float %x) {
+; ANY-LABEL: fadd_produce_zero:
+; ANY:       # %bb.0:
+; ANY-NEXT:    xorps %xmm0, %xmm0
+; ANY-NEXT:    retq
+  %neg = fsub nsz float 0.0, %x
+  %r = fadd nnan float %neg, %x
+  ret float %r
+}
+
+define float @fadd_reassociate(float %x) {
+; ANY-LABEL: fadd_reassociate:
+; ANY:       # %bb.0:
+; ANY-NEXT:    addss {{.*}}(%rip), %xmm0
+; ANY-NEXT:    retq
+  %sum = fadd float %x, 8.0
+  %r = fadd reassoc nsz float %sum, 12.0
+  ret float %r
+}
+
  define float @fadd_negzero_nsz(float %x) {
  ; ANY-LABEL: fadd_negzero_nsz:
  ; ANY:       # %bb.0:
author	Michael Berg <michael_c_berg@apple.com>
	Mon, 18 Jun 2018 23:44:59 +0000 (23:44 +0000)
committer	Michael Berg <michael_c_berg@apple.com>
	Mon, 18 Jun 2018 23:44:59 +0000 (23:44 +0000)
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
lib/CodeGen/SelectionDAG/SelectionDAG.cpp		patch \| blob \| history
test/CodeGen/AArch64/fadd-combines.ll		patch \| blob \| history
test/CodeGen/AMDGPU/fadd.ll		patch \| blob \| history
test/CodeGen/X86/fmf-flags.ll		patch \| blob \| history
test/CodeGen/X86/fp-fold.ll		patch \| blob \| history