return DAG.getVectorShuffle(VT, DL, V1, V2, BlendMask);
}
-/// \brief Try to lower a vector shuffle as a byte rotation.
-///
-/// SSSE3 has a generic PALIGNR instruction in x86 that will do an arbitrary
-/// byte-rotation of the concatenation of two vectors; pre-SSSE3 can use
-/// a PSRLDQ/PSLLDQ/POR pattern to get a similar effect. This routine will
-/// try to generically lower a vector shuffle through such an pattern. It
-/// does not check for the profitability of lowering either as PALIGNR or
-/// PSRLDQ/PSLLDQ/POR, only whether the mask is valid to lower in that form.
-/// This matches shuffle vectors that look like:
-///
-/// v8i16 [11, 12, 13, 14, 15, 0, 1, 2]
+/// \brief Try to lower a vector shuffle as a rotation.
///
-/// Essentially it concatenates V1 and V2, shifts right by some number of
-/// elements, and takes the low elements as the result. Note that while this is
-/// specified as a *right shift* because x86 is little-endian, it is a *left
-/// rotate* of the vector lanes.
-static int matchVectorShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2,
- ArrayRef<int> Mask) {
- // Don't accept any shuffles with zero elements.
- if (any_of(Mask, [](int M) { return M == SM_SentinelZero; }))
- return -1;
-
- // PALIGNR works on 128-bit lanes.
- SmallVector<int, 16> RepeatedMask;
- if (!is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask))
- return -1;
-
- int NumElts = RepeatedMask.size();
+/// This is used for support PALIGNR for SSSE3 or VALIGND/Q for AVX512.
+static int matchVectorShuffleAsRotate(SDValue &V1, SDValue &V2,
+ ArrayRef<int> Mask) {
+ int NumElts = Mask.size();
// We need to detect various ways of spelling a rotation:
// [11, 12, 13, 14, 15, 0, 1, 2]
int Rotation = 0;
SDValue Lo, Hi;
for (int i = 0; i < NumElts; ++i) {
- int M = RepeatedMask[i];
+ int M = Mask[i];
assert((M == SM_SentinelUndef || (0 <= M && M < (2*NumElts))) &&
"Unexpected mask index.");
if (M < 0)
V1 = Lo;
V2 = Hi;
+ return Rotation;
+}
+
+/// \brief Try to lower a vector shuffle as a byte rotation.
+///
+/// SSSE3 has a generic PALIGNR instruction in x86 that will do an arbitrary
+/// byte-rotation of the concatenation of two vectors; pre-SSSE3 can use
+/// a PSRLDQ/PSLLDQ/POR pattern to get a similar effect. This routine will
+/// try to generically lower a vector shuffle through such an pattern. It
+/// does not check for the profitability of lowering either as PALIGNR or
+/// PSRLDQ/PSLLDQ/POR, only whether the mask is valid to lower in that form.
+/// This matches shuffle vectors that look like:
+///
+/// v8i16 [11, 12, 13, 14, 15, 0, 1, 2]
+///
+/// Essentially it concatenates V1 and V2, shifts right by some number of
+/// elements, and takes the low elements as the result. Note that while this is
+/// specified as a *right shift* because x86 is little-endian, it is a *left
+/// rotate* of the vector lanes.
+static int matchVectorShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2,
+ ArrayRef<int> Mask) {
+ // Don't accept any shuffles with zero elements.
+ if (any_of(Mask, [](int M) { return M == SM_SentinelZero; }))
+ return -1;
+
+ // PALIGNR works on 128-bit lanes.
+ SmallVector<int, 16> RepeatedMask;
+ if (!is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask))
+ return -1;
+
+ int Rotation = matchVectorShuffleAsRotate(V1, V2, RepeatedMask);
+ if (Rotation <= 0)
+ return -1;
+
// PALIGNR rotates bytes, so we need to scale the
// rotation based on how many bytes are in the vector lane.
+ int NumElts = RepeatedMask.size();
int Scale = 16 / NumElts;
return Rotation * Scale;
}
DAG.getNode(ISD::OR, DL, MVT::v16i8, LoShift, HiShift));
}
+/// \brief Try to lower a vector shuffle as a dword/qword rotation.
+///
+/// AVX512 has a VALIGND/VALIGNQ instructions that will do an arbitrary
+/// rotation of the concatenation of two vectors; This routine will
+/// try to generically lower a vector shuffle through such an pattern.
+///
+/// Essentially it concatenates V1 and V2, shifts right by some number of
+/// elements, and takes the low elements as the result. Note that while this is
+/// specified as a *right shift* because x86 is little-endian, it is a *left
+/// rotate* of the vector lanes.
+static SDValue lowerVectorShuffleAsRotate(const SDLoc &DL, MVT VT,
+ SDValue V1, SDValue V2,
+ ArrayRef<int> Mask,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ assert((VT.getScalarType() == MVT::i32 || VT.getScalarType() == MVT::i64) &&
+ "Only 32-bit and 64-bit elements are supported!");
+
+ // 128/256-bit vectors are only supported with VLX.
+ assert((Subtarget.hasVLX() || (!VT.is128BitVector() && !VT.is256BitVector()))
+ && "VLX required for 128/256-bit vectors");
+
+ SDValue Lo = V1, Hi = V2;
+ int Rotation = matchVectorShuffleAsRotate(Lo, Hi, Mask);
+ if (Rotation <= 0)
+ return SDValue();
+
+ return DAG.getNode(X86ISD::VALIGN, DL, VT, Lo, Hi,
+ DAG.getConstant(Rotation, DL, MVT::i8));
+}
+
/// \brief Try to lower a vector shuffle as a bit shift (shifts in zeros).
///
/// Attempts to match a shuffle mask against the PSLL(W/D/Q/DQ) and
Zeroable, Subtarget, DAG))
return Shift;
+ // If we have VLX support, we can use VALIGN.
+ if (Subtarget.hasVLX())
+ if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v4i64, V1, V2,
+ Mask, Subtarget, DAG))
+ return Rotate;
+
+ // Try to use PALIGNR.
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v4i64, V1, V2,
Mask, Subtarget, DAG))
return Rotate;
Zeroable, Subtarget, DAG))
return Shift;
+ // If we have VLX support, we can use VALIGN.
+ if (Subtarget.hasVLX())
+ if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v8i32, V1, V2,
+ Mask, Subtarget, DAG))
+ return Rotate;
+
// Try to use byte rotation instructions.
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG))
Zeroable, Subtarget, DAG))
return Shift;
+ // Try to use VALIGN.
+ if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v8i64, V1, V2,
+ Mask, Subtarget, DAG))
+ return Rotate;
+
+ // Try to use PALIGNR.
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v8i64, V1, V2,
Mask, Subtarget, DAG))
return Rotate;
Zeroable, Subtarget, DAG))
return Shift;
+ // Try to use VALIGN.
+ if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v16i32, V1, V2,
+ Mask, Subtarget, DAG))
+ return Rotate;
+
// Try to use byte rotation instructions.
if (Subtarget.hasBWI())
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(