Add support for AVX to materialize +0.0 when doing scalar FP.

author Nate Begeman <natebegeman@mac.com>

Thu, 9 Dec 2010 21:43:51 +0000 (21:43 +0000)

committer Nate Begeman <natebegeman@mac.com>

Thu, 9 Dec 2010 21:43:51 +0000 (21:43 +0000)
author Nate Begeman <natebegeman@mac.com>
Thu, 9 Dec 2010 21:43:51 +0000 (21:43 +0000)
committer Nate Begeman <natebegeman@mac.com>
Thu, 9 Dec 2010 21:43:51 +0000 (21:43 +0000)
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp

index 06c35b4..73654d3 100644 (file)
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -2420,9 +2420,11 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
        Alignment = 16;
        break;
      case X86::FsFLD0SD:
+    case X86::VFsFLD0SD:
        Alignment = 8;
        break;
      case X86::FsFLD0SS:
+    case X86::VFsFLD0SS:
        Alignment = 4;
        break;
      default:
@@ -2486,9 +2488,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
      MachineConstantPool &MCP = *MF.getConstantPool();
      const Type *Ty;
      unsigned Opc = LoadMI->getOpcode();
-    if (Opc == X86::FsFLD0SS)
+    if (Opc == X86::FsFLD0SS || Opc == X86::VFsFLD0SS)
        Ty = Type::getFloatTy(MF.getFunction()->getContext());
-    else if (Opc == X86::FsFLD0SD)
+    else if (Opc == X86::FsFLD0SD || Opc == X86::VFsFLD0SD)
        Ty = Type::getDoubleTy(MF.getFunction()->getContext());
      else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY)
        Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8);
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td

index 73a6202..b954299 100644 (file)
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -1440,6 +1440,12 @@ def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
  def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
                   [(set FR64:$dst, fpimm0)]>,
                 Requires<[HasSSE2]>, TB, OpSize;
+def VFsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
+                  [(set FR32:$dst, fp32imm0)]>,
+                  Requires<[HasAVX]>, TB, OpSize, VEX_4V;
+def VFsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
+                  [(set FR64:$dst, fpimm0)]>,
+                  Requires<[HasAVX]>, TB, OpSize, VEX_4V;
  }
  
  // Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp

index 3ca87ed..cbe6db2 100644 (file)
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -374,6 +374,8 @@ ReSimplify:
    case X86::MOV32r0:      LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break;
    case X86::FsFLD0SS:      LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
    case X86::FsFLD0SD:      LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
+  case X86::VFsFLD0SS:     LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
+  case X86::VFsFLD0SD:     LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
    case X86::V_SET0PS:      LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break;
    case X86::V_SET0PD:      LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break;
    case X86::V_SET0PI:      LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
author	Nate Begeman <natebegeman@mac.com>
	Thu, 9 Dec 2010 21:43:51 +0000 (21:43 +0000)
committer	Nate Begeman <natebegeman@mac.com>
	Thu, 9 Dec 2010 21:43:51 +0000 (21:43 +0000)
lib/Target/X86/X86InstrInfo.cpp		patch \| blob \| history
lib/Target/X86/X86InstrSSE.td		patch \| blob \| history
lib/Target/X86/X86MCInstLower.cpp		patch \| blob \| history