[InstCombine] fold shuffles of insert_subvectors

author Sanjay Patel <spatel@rotateright.com>

Wed, 22 May 2019 00:32:25 +0000 (00:32 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Wed, 22 May 2019 00:32:25 +0000 (00:32 +0000)
author Sanjay Patel <spatel@rotateright.com>
Wed, 22 May 2019 00:32:25 +0000 (00:32 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Wed, 22 May 2019 00:32:25 +0000 (00:32 +0000)
diff --git a/docs/CodeGenerator.rst b/docs/CodeGenerator.rst

index e7f96e8..4753acd 100644 (file)
--- a/docs/CodeGenerator.rst
+++ b/docs/CodeGenerator.rst
@@ -912,6 +912,31 @@ A target implementation tells the legalizer which operations are not supported
  (and which of the above three actions to take) by calling the
  ``setOperationAction`` method in its ``TargetLowering`` constructor.
  
+If a target has legal vector types, it is expected to produce efficient machine
+code for common forms of the shufflevector IR instruction using those types.
+This may require custom legalization for SelectionDAG vector operations that
+are created from the shufflevector IR. The shufflevector forms that should be
+handled include:
+
+* Vector select --- Each element of the vector is chosen from either of the
+corresponding elements of the 2 input vectors. This operation may also be
+known as a "blend" or "bitwise select" in target assembly. This type of shuffle
+maps directly to the ``shuffle_vector`` SelectionDAG node.
+
+* Insert subvector --- A vector is placed into a longer vector type starting
+at index 0. This type of shuffle maps directly to the ``insert_subvector``
+SelectionDAG node with the ``index`` operand set to 0.
+
+* Extract subvector --- A vector is pulled from a longer vector type starting
+at index 0. This type of shuffle maps directly to the ``extract_subvector``
+SelectionDAG node with the ``index`` operand set to 0.
+
+* Splat --- All elements of the vector have identical scalar elements. This
+operation may also be known as a "broadcast" or "duplicate" in target assembly.
+The shufflevector IR instruction may change the vector length, so this operation
+may map to multiple SelectionDAG nodes including ``shuffle_vector``,
+``concat_vectors``, ``insert_subvector``, and ``extract_subvector``.
+
  Prior to the existence of the Legalize passes, we required that every target
  `selector`_ supported and handled every operator and type even if they are not
  natively supported.  The introduction of the Legalize phases allows all of the
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp

index e76f41b..ecc4df1 100644 (file)
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -1622,6 +1622,55 @@ static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf) {
    return nullptr;
  }
  
+static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) {
+  // Match the operands as identity with padding (also known as concatenation
+  // with undef) shuffles of the same source type. The backend is expected to
+  // recreate these concatenations from a shuffle of narrow operands.
+  auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
+  auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
+  if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
+      !Shuffle1 || !Shuffle1->isIdentityWithPadding())
+    return nullptr;
+
+  // We limit this transform to power-of-2 types because we expect that the
+  // backend can convert the simplified IR patterns to identical nodes as the
+  // original IR.
+  // TODO: If we can verify that behavior for arbitrary types, the power-of-2
+  // checks can be removed.
+  Value *X = Shuffle0->getOperand(0);
+  Value *Y = Shuffle1->getOperand(0);
+  if (X->getType() != Y->getType() ||
+      !isPowerOf2_32(Shuf.getType()->getVectorNumElements()) ||
+      !isPowerOf2_32(Shuffle0->getType()->getVectorNumElements()) ||
+      !isPowerOf2_32(X->getType()->getVectorNumElements()) ||
+      isa<UndefValue>(X) || isa<UndefValue>(Y))
+    return nullptr;
+  assert(isa<UndefValue>(Shuffle0->getOperand(1)) &&
+         isa<UndefValue>(Shuffle1->getOperand(1)) &&
+         "Unexpected operand for identity shuffle");
+
+  // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
+  // operands directly by adjusting the shuffle mask to account for the narrower
+  // types:
+  // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
+  int NarrowElts = X->getType()->getVectorNumElements();
+  int WideElts = Shuffle0->getType()->getVectorNumElements();
+  assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
+
+  Type *I32Ty = IntegerType::getInt32Ty(Shuf.getContext());
+  SmallVector<int, 16> Mask = Shuf.getShuffleMask();
+  SmallVector<Constant *, 16> NewMask(Mask.size(), UndefValue::get(I32Ty));
+  for (int i = 0, e = Mask.size(); i != e; ++i) {
+    if (Mask[i] == -1)
+      continue;
+    if (Mask[i] < WideElts)
+      NewMask[i] = ConstantInt::get(I32Ty, Mask[i]);
+    else
+      NewMask[i] = ConstantInt::get(I32Ty, Mask[i] - (WideElts - NarrowElts));
+  }
+  return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask));
+}
+
  Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
    Value *LHS = SVI.getOperand(0);
    Value *RHS = SVI.getOperand(1);
@@ -1676,10 +1725,12 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
    if (Instruction *I = foldIdentityExtractShuffle(SVI))
      return I;
  
-  // This transform has the potential to lose undef knowledge, so it is
+  // These transforms have the potential to lose undef knowledge, so they are
    // intentionally placed after SimplifyDemandedVectorElts().
    if (Instruction *I = foldShuffleWithInsert(SVI))
      return I;
+  if (Instruction *I = foldIdentityPaddedShuffles(SVI))
+    return I;
  
    if (VWidth == LHSWidth) {
      // Analyze the shuffle, are the LHS or RHS and identity shuffles?
diff --git a/test/Transforms/InstCombine/vec_shuffle.ll b/test/Transforms/InstCombine/vec_shuffle.ll

index 821c082..3dcd7d7 100644 (file)
--- a/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/test/Transforms/InstCombine/vec_shuffle.ll
@@ -1140,6 +1140,8 @@ define <2 x i1> @PR40734(<1 x i1> %x, <4 x i1> %y) {
    ret <2 x i1> %r
  }
  
+; Negative test - do not transform non-power-of-2 unless we know the backend handles these sequences identically.
+
  define <7 x i8> @insert_subvector_shuffles(<3 x i8> %x, <3 x i8> %y) {
  ; CHECK-LABEL: @insert_subvector_shuffles(
  ; CHECK-NEXT:    [[S1:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> undef, <7 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -1155,9 +1157,7 @@ define <7 x i8> @insert_subvector_shuffles(<3 x i8> %x, <3 x i8> %y) {
  
  define <8 x i8> @insert_subvector_shuffles_pow2elts(<2 x i8> %x, <2 x i8> %y) {
  ; CHECK-LABEL: @insert_subvector_shuffles_pow2elts(
-; CHECK-NEXT:    [[S1:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[S2:%.*]] = shufflevector <2 x i8> [[Y:%.*]], <2 x i8> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[S3:%.*]] = shufflevector <8 x i8> [[S1]], <8 x i8> [[S2]], <8 x i32> <i32 0, i32 8, i32 1, i32 undef, i32 8, i32 1, i32 9, i32 0>
+; CHECK-NEXT:    [[S3:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <8 x i32> <i32 0, i32 2, i32 1, i32 undef, i32 2, i32 1, i32 3, i32 0>
  ; CHECK-NEXT:    ret <8 x i8> [[S3]]
  ;
    %s1 = shufflevector <2 x i8> %x, <2 x i8> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -1167,6 +1167,7 @@ define <8 x i8> @insert_subvector_shuffles_pow2elts(<2 x i8> %x, <2 x i8> %y) {
  }
  
  ; The last shuffle may change the vector type.
+; Negative test - do not transform non-power-of-2 unless we know the backend handles these sequences identically.
  
  define <2 x i8> @insert_subvector_shuffles_narrowing(<3 x i8> %x, <3 x i8> %y) {
  ; CHECK-LABEL: @insert_subvector_shuffles_narrowing(
@@ -1183,9 +1184,7 @@ define <2 x i8> @insert_subvector_shuffles_narrowing(<3 x i8> %x, <3 x i8> %y) {
  
  define <2 x i8> @insert_subvector_shuffles_narrowing_pow2elts(<4 x i8> %x, <4 x i8> %y) {
  ; CHECK-LABEL: @insert_subvector_shuffles_narrowing_pow2elts(
-; CHECK-NEXT:    [[S1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[S2:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[S3:%.*]] = shufflevector <8 x i8> [[S1]], <8 x i8> [[S2]], <2 x i32> <i32 0, i32 8>
+; CHECK-NEXT:    [[S3:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <2 x i32> <i32 0, i32 4>
  ; CHECK-NEXT:    ret <2 x i8> [[S3]]
  ;
    %s1 = shufflevector <4 x i8> %x, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -1198,9 +1197,7 @@ define <2 x i8> @insert_subvector_shuffles_narrowing_pow2elts(<4 x i8> %x, <4 x
  
  define <4 x double> @insert_subvector_shuffles_identity(<2 x double> %x) {
  ; CHECK-LABEL: @insert_subvector_shuffles_identity(
-; CHECK-NEXT:    [[S1:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> undef, <4 x i32> <i32 undef, i32 1, i32 undef, i32 undef>
-; CHECK-NEXT:    [[S2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[S3:%.*]] = shufflevector <4 x double> [[S2]], <4 x double> [[S1]], <4 x i32> <i32 0, i32 5, i32 undef, i32 undef>
+; CHECK-NEXT:    [[S3:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
  ; CHECK-NEXT:    ret <4 x double> [[S3]]
  ;
    %s1 = shufflevector <2 x double> %x, <2 x double> undef, <4 x i32> <i32 undef, i32 1, i32 undef, i32 undef>
author	Sanjay Patel <spatel@rotateright.com>
	Wed, 22 May 2019 00:32:25 +0000 (00:32 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Wed, 22 May 2019 00:32:25 +0000 (00:32 +0000)
docs/CodeGenerator.rst		patch \| blob \| history
lib/Transforms/InstCombine/InstCombineVectorOps.cpp		patch \| blob \| history
test/Transforms/InstCombine/vec_shuffle.ll		patch \| blob \| history