Remove the rest of the *_sfp Neon instruction patterns.

author Bob Wilson <bob.wilson@apple.com>

Mon, 13 Dec 2010 23:02:37 +0000 (23:02 +0000)

committer Bob Wilson <bob.wilson@apple.com>

Mon, 13 Dec 2010 23:02:37 +0000 (23:02 +0000)
author Bob Wilson <bob.wilson@apple.com>
Mon, 13 Dec 2010 23:02:37 +0000 (23:02 +0000)
committer Bob Wilson <bob.wilson@apple.com>
Mon, 13 Dec 2010 23:02:37 +0000 (23:02 +0000)
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp

index c82ae7e..35c4b0d 100644 (file)
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -58,8 +58,6 @@ static const ARM_MLxEntry ARM_MLxTable[] = {
    { ARM::VMLSS,       ARM::VMULS,       ARM::VSUBS,      false,  false },
    { ARM::VMLAD,       ARM::VMULD,       ARM::VADDD,      false,  false },
    { ARM::VMLSD,       ARM::VMULD,       ARM::VSUBD,      false,  false },
-  { ARM::VMLAfd_sfp,  ARM::VMULfd_sfp,  ARM::VADDfd_sfp, false,  false },
-  { ARM::VMLSfd_sfp,  ARM::VMULfd_sfp,  ARM::VSUBfd_sfp, false,  false },
    { ARM::VNMLAS,      ARM::VNMULS,      ARM::VSUBS,      true,   false },
    { ARM::VNMLSS,      ARM::VMULS,       ARM::VSUBS,      true,   false },
    { ARM::VNMLAD,      ARM::VNMULD,      ARM::VSUBD,      true,   false },
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td

index c4b590c..b95e02f 100644 (file)
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -1667,7 +1667,7 @@ def SubReg_i32_lane : SDNodeXForm<imm, [{
  // Instruction Classes
  //===----------------------------------------------------------------------===//
  
-// Basic 2-register operations: single-, double- and quad-register.
+// Basic 2-register operations: double- and quad-register.
  class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
             bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
             string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
@@ -1736,13 +1736,7 @@ class N2VQShuffle<bits<2> op19_18, bits<5> op11_7,
          (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm",
          "$src1 = $Vd, $src2 = $Vm", []>;
  
-// Basic 3-register operations: single-, double- and quad-register.
-class N3VS<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
-           string OpcodeStr, string Dt>
-  : N3V<op24, op23, op21_20, op11_8, 0, op4,
-        (outs DPR_VFP2:$Vd), (ins DPR_VFP2:$Vn, DPR_VFP2:$Vm), N3RegFrm,
-        IIC_VBIND, OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", []>;
-
+// Basic 3-register operations: double- and quad-register.
  class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
             InstrItinClass itin, string OpcodeStr, string Dt,
             ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
@@ -1912,13 +1906,7 @@ class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
    let isCommutable = 0;
  }
  
-// Multiply-Add/Sub operations: single-, double- and quad-register.
-class N3VSMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
-                InstrItinClass itin, string OpcodeStr, string Dt>
-  : N3V<op24, op23, op21_20, op11_8, 0, op4, (outs DPR_VFP2:$Vd),
-        (ins DPR_VFP2:$src1, DPR_VFP2:$Vn, DPR_VFP2:$Vm), N3RegFrm, itin,
-        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", []>;
-
+// Multiply-Add/Sub operations: double- and quad-register.
  class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
                  InstrItinClass itin, string OpcodeStr, string Dt,
                  ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode>
@@ -4678,83 +4666,47 @@ def  VTBX4Pseudo
  class N2VSPat<SDNode OpNode, NeonI Inst>
    : NEONFPPat<(f32 (OpNode SPR:$a)),
                (EXTRACT_SUBREG
-               (v2f32 (COPY_TO_REGCLASS
-                (Inst (INSERT_SUBREG
+               (v2f32 (COPY_TO_REGCLASS (Inst
+                (INSERT_SUBREG
                   (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
                   SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>;
  
  class N3VSPat<SDNode OpNode, NeonI Inst>
    : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
-              (EXTRACT_SUBREG (v2f32
-                                 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
-                                                      SPR:$a, ssub_0),
-                                       (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
-                                                      SPR:$b, ssub_0))),
-                              ssub_0)>;
+              (EXTRACT_SUBREG
+               (v2f32 (COPY_TO_REGCLASS (Inst
+                (INSERT_SUBREG
+                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+                 SPR:$a, ssub_0),
+                (INSERT_SUBREG
+                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+                 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
  
  class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
    : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
-              (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
-                                                   SPR:$acc, ssub_0),
-                                    (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
-                                                   SPR:$a, ssub_0),
-                                    (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
-                                                   SPR:$b, ssub_0)),
-                              ssub_0)>;
-
-// These need separate instructions because they must use DPR_VFP2 register
-// class which have SPR sub-registers.
-
-// Vector Add Operations used for single-precision FP
-let neverHasSideEffects = 1 in
-def VADDfd_sfp : N3VS<0,0,0b00,0b1101,0, "vadd", "f32">;
-def : N3VSPat<fadd, VADDfd_sfp>;
-
-// Vector Sub Operations used for single-precision FP
-let neverHasSideEffects = 1 in
-def VSUBfd_sfp : N3VS<0,0,0b10,0b1101,0, "vsub", "f32">;
-def : N3VSPat<fsub, VSUBfd_sfp>;
-
-// Vector Multiply Operations used for single-precision FP
-let neverHasSideEffects = 1 in
-def VMULfd_sfp : N3VS<1,0,0b00,0b1101,1, "vmul", "f32">;
-def : N3VSPat<fmul, VMULfd_sfp>;
-
-// Vector Multiply-Accumulate/Subtract used for single-precision FP
-// vml[as].f32 can cause 4-8 cycle stalls in following ASIMD instructions, so
-// we want to avoid them for now. e.g., alternating vmla/vadd instructions.
-
-let neverHasSideEffects = 1 in
-def VMLAfd_sfp : N3VSMulOp<0,0,0b00,0b1101,1, IIC_VMACD, "vmla", "f32">;
-def : N3VSMulOpPat<fmul, fadd, VMLAfd_sfp>,
-      Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
+              (EXTRACT_SUBREG
+               (v2f32 (COPY_TO_REGCLASS (Inst
+                (INSERT_SUBREG
+                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+                 SPR:$acc, ssub_0),
+                (INSERT_SUBREG
+                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+                 SPR:$a, ssub_0),
+                (INSERT_SUBREG
+                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+                 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
  
-let neverHasSideEffects = 1 in
-def VMLSfd_sfp : N3VSMulOp<0,0,0b10,0b1101,1, IIC_VMACD, "vmls", "f32">;
-def : N3VSMulOpPat<fmul, fsub, VMLSfd_sfp>,
+def : N3VSPat<fadd, VADDfd>;
+def : N3VSPat<fsub, VSUBfd>;
+def : N3VSPat<fmul, VMULfd>;
+def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
+      Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
+def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
        Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
-
-// Vector Absolute used for single-precision FP
  def : N2VSPat<fabs, VABSfd>;
-
-// Vector Negate used for single-precision FP
  def : N2VSPat<fneg, VNEGfd>;
-
-// Vector Maximum used for single-precision FP
-let neverHasSideEffects = 1 in
-def VMAXfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$Vd),
-                     (ins DPR_VFP2:$Vn, DPR_VFP2:$Vm), N3RegFrm, IIC_VBIND,
-                     "vmax", "f32", "$Vd, $Vn, $Vm", "", []>;
-def : N3VSPat<NEONfmax, VMAXfd_sfp>;
-
-// Vector Minimum used for single-precision FP
-let neverHasSideEffects = 1 in
-def VMINfd_sfp : N3V<0, 0, 0b10, 0b1111, 0, 0, (outs DPR_VFP2:$Vd),
-                     (ins DPR_VFP2:$Vn, DPR_VFP2:$Vm), N3RegFrm, IIC_VBIND,
-                     "vmin", "f32", "$Vd, $Vn, $Vm", "", []>;
-def : N3VSPat<NEONfmin, VMINfd_sfp>;
-
-// Vector Convert between single-precision FP and integer
+def : N3VSPat<NEONfmax, VMAXfd>;
+def : N3VSPat<NEONfmin, VMINfd>;
  def : N2VSPat<arm_ftosi, VCVTf2sd>;
  def : N2VSPat<arm_ftoui, VCVTf2ud>;
  def : N2VSPat<arm_sitof, VCVTs2fd>;
diff --git a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll

index c169fb3..b8c8cb1 100644 (file)
--- a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
+++ b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
@@ -15,9 +15,6 @@ bb.nph:                                           ; preds = %bb5
  
  ; Loop preheader
  ; CHECK: vmov.f32
-; CHECK: vsub.f32
-; CHECK: vadd.f32
-; CHECK: vmul.f32
  bb7:                                              ; preds = %bb9, %bb.nph
    %s1.02 = phi float [ undef, %bb.nph ], [ %35, %bb9 ] ; <float> [#uses=3]
    %tmp79 = add i32 undef, undef                   ; <i32> [#uses=1]
@@ -73,8 +70,6 @@ bb8:                                              ; preds = %bb8, %bb7
    br i1 %34, label %bb8, label %bb9
  
  bb9:                                              ; preds = %bb8
-; CHECK: %bb9
-; CHECK: vmov.f32
    %35 = fadd float 0.000000e+00, undef            ; <float> [#uses=1]
    br label %bb7
  }
diff --git a/utils/TableGen/ARMDecoderEmitter.cpp b/utils/TableGen/ARMDecoderEmitter.cpp

index c4e78f3..10e507f 100644 (file)
--- a/utils/TableGen/ARMDecoderEmitter.cpp
+++ b/utils/TableGen/ARMDecoderEmitter.cpp
@@ -1613,11 +1613,6 @@ ARMDEBackend::populateInstruction(const CodeGenInstruction &CGI,
          Name == "VNEGScc")
        return false;
  
-    // Ignore the *_sfp instructions when decoding.  They are used by the
-    // compiler to implement scalar floating point operations using vector
-    // operations in order to work around some performance issues.
-    if (Name.find("_sfp") != std::string::npos) return false;
-
      // LDMIA_RET is a special case of LDM (Load Multiple) where the registers
      // loaded include the PC, causing a branch to a loaded address.  Ignore
      // the LDMIA_RET instruction when decoding.
author	Bob Wilson <bob.wilson@apple.com>
	Mon, 13 Dec 2010 23:02:37 +0000 (23:02 +0000)
committer	Bob Wilson <bob.wilson@apple.com>
	Mon, 13 Dec 2010 23:02:37 +0000 (23:02 +0000)
lib/Target/ARM/ARMBaseInstrInfo.cpp		patch \| blob \| history
lib/Target/ARM/ARMInstrNEON.td		patch \| blob \| history
test/CodeGen/Thumb2/cross-rc-coalescing-2.ll		patch \| blob \| history
utils/TableGen/ARMDecoderEmitter.cpp		patch \| blob \| history