Add support for legalizing UINT_TO_FP of vectors on platforms which do

author Nadav Rotem <nadav.rotem@intel.com>

Sat, 19 Mar 2011 13:09:10 +0000 (13:09 +0000)

committer Nadav Rotem <nadav.rotem@intel.com>

Sat, 19 Mar 2011 13:09:10 +0000 (13:09 +0000)
author Nadav Rotem <nadav.rotem@intel.com>
Sat, 19 Mar 2011 13:09:10 +0000 (13:09 +0000)
committer Nadav Rotem <nadav.rotem@intel.com>
Sat, 19 Mar 2011 13:09:10 +0000 (13:09 +0000)
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp

index 167dbe0..5d0f923 100644 (file)
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -58,6 +58,9 @@ class VectorLegalizer {
    SDValue UnrollVSETCC(SDValue Op);
    // Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB
    // isn't legal.
+  // Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
+  // SINT_TO_FLOAT and SHR on vectors isn't legal.
+  SDValue ExpandUINT_TO_FLOAT(SDValue Op);
    SDValue ExpandFNEG(SDValue Op);
    // Implements vector promotion; this is essentially just bitcasting the
    // operands to a different type and bitcasting the result back to the
@@ -207,7 +210,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
      // FALL THROUGH
    }
    case TargetLowering::Expand:
-    if (Node->getOpcode() == ISD::FNEG)
+    if (Node->getOpcode() == ISD::UINT_TO_FP)
+      Result = ExpandUINT_TO_FLOAT(Op);
+    else if (Node->getOpcode() == ISD::FNEG)
        Result = ExpandFNEG(Op);
      else if (Node->getOpcode() == ISD::VSETCC)
        Result = UnrollVSETCC(Op);
@@ -251,6 +256,48 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
    return DAG.getNode(ISD::BITCAST, dl, VT, Op);
  }
  
+SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
+
+
+  EVT VT = Op.getOperand(0).getValueType();
+  DebugLoc DL = Op.getDebugLoc();
+
+  // Make sure that the SINT_TO_FP and SRL instructions are available.
+  if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, VT) ||
+      !TLI.isOperationLegalOrCustom(ISD::SRL, VT))
+      return DAG.UnrollVectorOp(Op.getNode());
+
+ EVT SVT = VT.getScalarType();
+  assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) &&
+      "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
+
+  unsigned BW = SVT.getSizeInBits();
+  SDValue HalfWord = DAG.getConstant(BW/2, VT);
+
+  // Constants to clear the upper part of the word.
+  // Notice that we can also use SHL+SHR, but using a constant is slightly
+  // faster on x86.
+  uint64_t HWMask = (SVT.getSizeInBits()==64)?0x00000000FFFFFFFF:0x0000FFFF;
+  SDValue HalfWordMask = DAG.getConstant(HWMask, VT);
+
+  // Two to the power of half-word-size.
+  SDValue TWOHW = DAG.getConstantFP((1<<(BW/2)), Op.getValueType());
+
+  // Clear upper part of LO, lower HI
+  SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
+  SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask);
+
+  // Convert hi and lo to floats
+  // Convert the hi part back to the upper values
+  SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI);
+          fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW);
+  SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO);
+
+  // Add the two halves
+  return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO);
+}
+
+
  SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
    if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
      SDValue Zero = DAG.getConstantFP(-0.0, Op.getValueType());
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index a4d01a1..576c879 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -927,6 +927,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
      // Can turn SHL into an integer multiply.
      setOperationAction(ISD::SHL,                MVT::v4i32, Custom);
      setOperationAction(ISD::SHL,                MVT::v16i8, Custom);
+    setOperationAction(ISD::SRL,                MVT::v4i32, Legal);
  
      // i8 and i16 vectors are custom , because the source register and source
      // source memory operand types are not the same width.  f32 vectors are
diff --git a/test/CodeGen/X86/vec_uint_to_fp.ll b/test/CodeGen/X86/vec_uint_to_fp.ll

new file mode 100644 (file)

index 0000000..39e7d71
--- /dev/null
+++ b/test/CodeGen/X86/vec_uint_to_fp.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mcpu=sandybridge | FileCheck %s
+
+; Test that we are not lowering uinttofp to scalars
+define <4 x float> @test1(<4 x i32> %A) nounwind {
+; CHECK: test1:
+; CHECK-NOT: cvtsd2ss
+; CHECK: ret
+  %C = uitofp <4 x i32> %A to <4 x float>
+  ret <4 x float> %C
+}
+
author	Nadav Rotem <nadav.rotem@intel.com>
	Sat, 19 Mar 2011 13:09:10 +0000 (13:09 +0000)
committer	Nadav Rotem <nadav.rotem@intel.com>
	Sat, 19 Mar 2011 13:09:10 +0000 (13:09 +0000)
lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/vec_uint_to_fp.ll	[new file with mode: 0644]	patch \| blob