[AVX-512] Add support for lowering shuffles to VALIGND/VALIGNQ

[android-x86/external-llvm.git] / lib / Target / X86 / X86ISelLowering.cpp
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 5f4a29d..cd1ce17 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -7788,34 +7788,12 @@ static SDValue lowerVectorShuffleAsDecomposedShuffleBlend(const SDLoc &DL,
    return DAG.getVectorShuffle(VT, DL, V1, V2, BlendMask);
  }
  
-/// \brief Try to lower a vector shuffle as a byte rotation.
-///
-/// SSSE3 has a generic PALIGNR instruction in x86 that will do an arbitrary
-/// byte-rotation of the concatenation of two vectors; pre-SSSE3 can use
-/// a PSRLDQ/PSLLDQ/POR pattern to get a similar effect. This routine will
-/// try to generically lower a vector shuffle through such an pattern. It
-/// does not check for the profitability of lowering either as PALIGNR or
-/// PSRLDQ/PSLLDQ/POR, only whether the mask is valid to lower in that form.
-/// This matches shuffle vectors that look like:
-///
-///   v8i16 [11, 12, 13, 14, 15, 0, 1, 2]
+/// \brief Try to lower a vector shuffle as a rotation.
  ///
-/// Essentially it concatenates V1 and V2, shifts right by some number of
-/// elements, and takes the low elements as the result. Note that while this is
-/// specified as a *right shift* because x86 is little-endian, it is a *left
-/// rotate* of the vector lanes.
-static int matchVectorShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2,
-                                          ArrayRef<int> Mask) {
-  // Don't accept any shuffles with zero elements.
-  if (any_of(Mask, [](int M) { return M == SM_SentinelZero; }))
-    return -1;
-
-  // PALIGNR works on 128-bit lanes.
-  SmallVector<int, 16> RepeatedMask;
-  if (!is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask))
-    return -1;
-
-  int NumElts = RepeatedMask.size();
+/// This is used for support PALIGNR for SSSE3 or VALIGND/Q for AVX512.
+static int matchVectorShuffleAsRotate(SDValue &V1, SDValue &V2,
+                                      ArrayRef<int> Mask) {
+  int NumElts = Mask.size();
  
    // We need to detect various ways of spelling a rotation:
    //   [11, 12, 13, 14, 15,  0,  1,  2]
@@ -7827,7 +7805,7 @@ static int matchVectorShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2,
    int Rotation = 0;
    SDValue Lo, Hi;
    for (int i = 0; i < NumElts; ++i) {
-    int M = RepeatedMask[i];
+    int M = Mask[i];
      assert((M == SM_SentinelUndef || (0 <= M && M < (2*NumElts))) &&
             "Unexpected mask index.");
      if (M < 0)
@@ -7879,8 +7857,43 @@ static int matchVectorShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2,
    V1 = Lo;
    V2 = Hi;
  
+  return Rotation;
+}
+
+/// \brief Try to lower a vector shuffle as a byte rotation.
+///
+/// SSSE3 has a generic PALIGNR instruction in x86 that will do an arbitrary
+/// byte-rotation of the concatenation of two vectors; pre-SSSE3 can use
+/// a PSRLDQ/PSLLDQ/POR pattern to get a similar effect. This routine will
+/// try to generically lower a vector shuffle through such an pattern. It
+/// does not check for the profitability of lowering either as PALIGNR or
+/// PSRLDQ/PSLLDQ/POR, only whether the mask is valid to lower in that form.
+/// This matches shuffle vectors that look like:
+///
+///   v8i16 [11, 12, 13, 14, 15, 0, 1, 2]
+///
+/// Essentially it concatenates V1 and V2, shifts right by some number of
+/// elements, and takes the low elements as the result. Note that while this is
+/// specified as a *right shift* because x86 is little-endian, it is a *left
+/// rotate* of the vector lanes.
+static int matchVectorShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2,
+                                          ArrayRef<int> Mask) {
+  // Don't accept any shuffles with zero elements.
+  if (any_of(Mask, [](int M) { return M == SM_SentinelZero; }))
+    return -1;
+
+  // PALIGNR works on 128-bit lanes.
+  SmallVector<int, 16> RepeatedMask;
+  if (!is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask))
+    return -1;
+
+  int Rotation = matchVectorShuffleAsRotate(V1, V2, RepeatedMask);
+  if (Rotation <= 0)
+    return -1;
+
    // PALIGNR rotates bytes, so we need to scale the
    // rotation based on how many bytes are in the vector lane.
+  int NumElts = RepeatedMask.size();
    int Scale = 16 / NumElts;
    return Rotation * Scale;
  }
@@ -7931,6 +7944,37 @@ static SDValue lowerVectorShuffleAsByteRotate(const SDLoc &DL, MVT VT,
                          DAG.getNode(ISD::OR, DL, MVT::v16i8, LoShift, HiShift));
  }
  
+/// \brief Try to lower a vector shuffle as a dword/qword rotation.
+///
+/// AVX512 has a VALIGND/VALIGNQ instructions that will do an arbitrary
+/// rotation of the concatenation of two vectors; This routine will
+/// try to generically lower a vector shuffle through such an pattern.
+///
+/// Essentially it concatenates V1 and V2, shifts right by some number of
+/// elements, and takes the low elements as the result. Note that while this is
+/// specified as a *right shift* because x86 is little-endian, it is a *left
+/// rotate* of the vector lanes.
+static SDValue lowerVectorShuffleAsRotate(const SDLoc &DL, MVT VT,
+                                          SDValue V1, SDValue V2,
+                                          ArrayRef<int> Mask,
+                                          const X86Subtarget &Subtarget,
+                                          SelectionDAG &DAG) {
+  assert((VT.getScalarType() == MVT::i32 || VT.getScalarType() == MVT::i64) &&
+         "Only 32-bit and 64-bit elements are supported!");
+
+  // 128/256-bit vectors are only supported with VLX.
+  assert((Subtarget.hasVLX() || (!VT.is128BitVector() && !VT.is256BitVector()))
+         && "VLX required for 128/256-bit vectors");
+
+  SDValue Lo = V1, Hi = V2;
+  int Rotation = matchVectorShuffleAsRotate(Lo, Hi, Mask);
+  if (Rotation <= 0)
+    return SDValue();
+
+  return DAG.getNode(X86ISD::VALIGN, DL, VT, Lo, Hi,
+                     DAG.getConstant(Rotation, DL, MVT::i8));
+}
+
  /// \brief Try to lower a vector shuffle as a bit shift (shifts in zeros).
  ///
  /// Attempts to match a shuffle mask against the PSLL(W/D/Q/DQ) and
@@ -11505,6 +11549,13 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
                                                  Zeroable, Subtarget, DAG))
      return Shift;
  
+  // If we have VLX support, we can use VALIGN.
+  if (Subtarget.hasVLX())
+    if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v4i64, V1, V2,
+                                                    Mask, Subtarget, DAG))
+      return Rotate;
+
+  // Try to use PALIGNR.
    if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v4i64, V1, V2,
                                                        Mask, Subtarget, DAG))
      return Rotate;
@@ -11666,6 +11717,12 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
                                                  Zeroable, Subtarget, DAG))
      return Shift;
  
+  // If we have VLX support, we can use VALIGN.
+  if (Subtarget.hasVLX())
+    if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v8i32, V1, V2,
+                                                    Mask, Subtarget, DAG))
+      return Rotate;
+
    // Try to use byte rotation instructions.
    if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
            DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG))
@@ -12094,6 +12151,12 @@ static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
                                                  Zeroable, Subtarget, DAG))
      return Shift;
  
+  // Try to use VALIGN.
+  if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v8i64, V1, V2,
+                                                  Mask, Subtarget, DAG))
+    return Rotate;
+
+  // Try to use PALIGNR.
    if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v8i64, V1, V2,
                                                        Mask, Subtarget, DAG))
      return Rotate;
@@ -12143,6 +12206,11 @@ static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
                                                  Zeroable, Subtarget, DAG))
      return Shift;
  
+  // Try to use VALIGN.
+  if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v16i32, V1, V2,
+                                                  Mask, Subtarget, DAG))
+    return Rotate;
+
    // Try to use byte rotation instructions.
    if (Subtarget.hasBWI())
      if (SDValue Rotate = lowerVectorShuffleAsByteRotate(