//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
+// MMX specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+// Low word of MMX to GPR.
+def MMX_X86movd2w : SDNode<"X86ISD::MMX_MOVD2W", SDTypeProfile<1, 1,
+ [SDTCisVT<0, i32>, SDTCisVT<1, x86mmx>]>>;
+// GPR to low word of MMX.
+def MMX_X86movw2d : SDNode<"X86ISD::MMX_MOVW2D", SDTypeProfile<1, 1,
+ [SDTCisVT<0, x86mmx>, SDTCisVT<1, i32>]>>;
+
+//===----------------------------------------------------------------------===//
// MMX Pattern Fragments
//===----------------------------------------------------------------------===//
def load_mmx : PatFrag<(ops node:$ptr), (x86mmx (load node:$ptr))>;
+def load_mvmmx : PatFrag<(ops node:$ptr),
+ (x86mmx (MMX_X86movw2d (load node:$ptr)))>;
def bc_mmx : PatFrag<(ops node:$in), (x86mmx (bitconvert node:$in))>;
//===----------------------------------------------------------------------===//
def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>, SDTCisVT<3, i8>]>;
+def SDTFPBinOpRound : SDTypeProfile<1, 3, [ // fadd_round, fmul_round, etc.
+ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0>, SDTCisInt<3>]>;
+
def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
+def SDTFmaRound : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
+ SDTCisSameAs<1,2>, SDTCisSameAs<1,3>, SDTCisInt<4>]>;
def STDFp1SrcRm : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>,
SDTCisVec<0>, SDTCisInt<2>]>;
+def STDFp2SrcRm : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
+ SDTCisVec<0>, SDTCisInt<3>]>;
+def STDFp3SrcRm : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
+ SDTCisVec<0>, SDTCisInt<3>, SDTCisInt<4>]>;
def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>;
def X86VAlign : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>;
def X86Addsub : SDNode<"X86ISD::ADDSUB", SDTFPBinOp>;
+def X86faddRnd : SDNode<"X86ISD::FADD_RND", SDTFPBinOpRound>;
+def X86fsubRnd : SDNode<"X86ISD::FSUB_RND", SDTFPBinOpRound>;
+def X86fmulRnd : SDNode<"X86ISD::FMUL_RND", SDTFPBinOpRound>;
+def X86fdivRnd : SDNode<"X86ISD::FDIV_RND", SDTFPBinOpRound>;
+
def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>;
def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>;
def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFma>;
def X86Fmaddsub : SDNode<"X86ISD::FMADDSUB", SDTFma>;
def X86Fmsubadd : SDNode<"X86ISD::FMSUBADD", SDTFma>;
+def X86FmaddRnd : SDNode<"X86ISD::FMADD_RND", SDTFmaRound>;
+def X86FnmaddRnd : SDNode<"X86ISD::FNMADD_RND", SDTFmaRound>;
+def X86FmsubRnd : SDNode<"X86ISD::FMSUB_RND", SDTFmaRound>;
+def X86FnmsubRnd : SDNode<"X86ISD::FNMSUB_RND", SDTFmaRound>;
+def X86FmaddsubRnd : SDNode<"X86ISD::FMADDSUB_RND", SDTFmaRound>;
+def X86FmsubaddRnd : SDNode<"X86ISD::FMSUBADD_RND", SDTFmaRound>;
+
def X86rsqrt28 : SDNode<"X86ISD::RSQRT28", STDFp1SrcRm>;
def X86rcp28 : SDNode<"X86ISD::RCP28", STDFp1SrcRm>;
-def X86exp2 : SDNode<"X86ISD::EXP2", STDFp1SrcRm>;
+def X86exp2 : SDNode<"X86ISD::EXP2", STDFp1SrcRm>;
+
+def X86rsqrt28s : SDNode<"X86ISD::RSQRT28", STDFp2SrcRm>;
+def X86rcp28s : SDNode<"X86ISD::RCP28", STDFp2SrcRm>;
+def X86RndScale : SDNode<"X86ISD::RNDSCALE", STDFp3SrcRm>;
+def X86mgather : SDNode<"X86ISD::GATHER", SDTypeProfile<1, 3,
+ [SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>]>>;
def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>,
def X86pcmpistri : SDNode<"X86ISD::PCMPISTRI", SDT_PCMPISTRI>;
def X86pcmpestri : SDNode<"X86ISD::PCMPESTRI", SDT_PCMPESTRI>;
+def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,
+ SDTCisVec<3>, SDTCisVec<1>, SDTCisInt<1>]>, []>;
+def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 3>,
+ SDTCisVec<3>, SDTCisVec<1>, SDTCisInt<1>]>, []>;
+
//===----------------------------------------------------------------------===//
// SSE Complex Patterns
//===----------------------------------------------------------------------===//
def extloadv4f32 : PatFrag<(ops node:$ptr), (v4f64 (extloadvf32 node:$ptr))>;
def extloadv8f32 : PatFrag<(ops node:$ptr), (v8f64 (extloadvf32 node:$ptr))>;
+// These are needed to match a scalar load that is used in a vector-only
+// math instruction such as the FP logical ops: andps, andnps, orps, xorps.
+// The memory operand is required to be a 128-bit load, so it must be converted
+// from a vector to a scalar.
+def loadf32_128 : PatFrag<(ops node:$ptr),
+ (f32 (vector_extract (loadv4f32 node:$ptr), (iPTR 0)))>;
+def loadf64_128 : PatFrag<(ops node:$ptr),
+ (f64 (vector_extract (loadv2f64 node:$ptr), (iPTR 0)))>;
+
// Like 'store', but always requires 128-bit vector alignment.
def alignedstore : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
// setting a feature bit in the processor (on startup, for example).
// Opteron 10h and later implement such a feature.
def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return Subtarget->hasVectorUAMem()
+ return Subtarget->hasSSEUnalignedMem()
|| cast<LoadSDNode>(N)->getAlignment() >= 16;
}]>;
-def memop4 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return Subtarget->hasVectorUAMem()
- || cast<LoadSDNode>(N)->getAlignment() >= 4;
-}]>;
-
-def memop8 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return Subtarget->hasVectorUAMem()
- || cast<LoadSDNode>(N)->getAlignment() >= 8;
-}]>;
-
def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>;
def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>;
def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
-// 256-bit memop pattern fragments
-// NOTE: all 256-bit integer vector loads are promoted to v4i64
-def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>;
-def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>;
-def memopv4i64 : PatFrag<(ops node:$ptr), (v4i64 (memop node:$ptr))>;
+// These are needed to match a scalar memop that is used in a vector-only
+// math instruction such as the FP logical ops: andps, andnps, orps, xorps.
+// The memory operand is required to be a 128-bit load, so it must be converted
+// from a vector to a scalar.
+def memopfsf32_128 : PatFrag<(ops node:$ptr),
+ (f32 (vector_extract (memopv4f32 node:$ptr), (iPTR 0)))>;
+def memopfsf64_128 : PatFrag<(ops node:$ptr),
+ (f64 (vector_extract (memopv2f64 node:$ptr), (iPTR 0)))>;
-// 512-bit memop pattern fragments
-def memopv16f32 : PatFrag<(ops node:$ptr), (v16f32 (memop4 node:$ptr))>;
-def memopv8f64 : PatFrag<(ops node:$ptr), (v8f64 (memop8 node:$ptr))>;
-def memopv16i32 : PatFrag<(ops node:$ptr), (v16i32 (memop4 node:$ptr))>;
-def memopv8i64 : PatFrag<(ops node:$ptr), (v8i64 (memop8 node:$ptr))>;
// SSSE3 uses MMX registers for some instructions. They aren't aligned on a
// 16-byte boundary.
return false;
}]>;
+def mgatherv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_gather node:$src1, node:$src2, node:$src3) , [{
+ //if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
+ // return (Mgt->getIndex().getValueType() == MVT::v8i32 ||
+ // Mgt->getBasePtr().getValueType() == MVT::v8i32);
+ //return false;
+ return N != 0;
+}]>;
+
+def mgatherv8i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_gather node:$src1, node:$src2, node:$src3) , [{
+ //if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
+ // return (Mgt->getIndex().getValueType() == MVT::v8i64 ||
+ // Mgt->getBasePtr().getValueType() == MVT::v8i64);
+ //return false;
+ return N != 0;
+}]>;
+def mgatherv16i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_gather node:$src1, node:$src2, node:$src3) , [{
+ //if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
+ // return (Mgt->getIndex().getValueType() == MVT::v16i32 ||
+ // Mgt->getBasePtr().getValueType() == MVT::v16i32);
+ //return false;
+ return N != 0;
+}]>;
+
+def mscatterv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_scatter node:$src1, node:$src2, node:$src3) , [{
+ //if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
+ // return (Sc->getIndex().getValueType() == MVT::v8i32 ||
+ // Sc->getBasePtr().getValueType() == MVT::v8i32);
+ //return false;
+ return N != 0;
+}]>;
+
+def mscatterv8i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_scatter node:$src1, node:$src2, node:$src3) , [{
+ //if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
+ // return (Sc->getIndex().getValueType() == MVT::v8i64 ||
+ // Sc->getBasePtr().getValueType() == MVT::v8i64);
+ //return false;
+ return N != 0;
+}]>;
+def mscatterv16i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_scatter node:$src1, node:$src2, node:$src3) , [{
+ //if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
+ // return (Sc->getIndex().getValueType() == MVT::v16i32 ||
+ // Sc->getBasePtr().getValueType() == MVT::v16i32);
+ //return false;
+ return N != 0;
+}]>;
+
// 128-bit bitconvert pattern fragments
def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;