[x86] Fix PR22706 where we would incorrectly try lower a v32i8 dynamic

author Chandler Carruth <chandlerc@gmail.com>

Thu, 26 Feb 2015 22:15:34 +0000 (22:15 +0000)

committer Chandler Carruth <chandlerc@gmail.com>

Thu, 26 Feb 2015 22:15:34 +0000 (22:15 +0000)
author Chandler Carruth <chandlerc@gmail.com>
Thu, 26 Feb 2015 22:15:34 +0000 (22:15 +0000)
committer Chandler Carruth <chandlerc@gmail.com>
Thu, 26 Feb 2015 22:15:34 +0000 (22:15 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 656b32e..ecf0857 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -10126,24 +10126,31 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
    if (!Subtarget->hasSSE41())
      return SDValue();
  
-  // Some types for vselect were previously set to Expand, not Legal or
-  // Custom. Return an empty SDValue so we fall-through to Expand, after
-  // the Custom lowering phase.
-  MVT VT = Op.getSimpleValueType();
-  switch (VT.SimpleTy) {
+  // Only some types will be legal on some subtargets. If we can emit a legal
+  // VSELECT-matching blend, return Op, and but if we need to expand, return
+  // a null value.
+  switch (Op.getSimpleValueType().SimpleTy) {
    default:
-    break;
+    // Most of the vector types have blends past SSE4.1.
+    return Op;
+
+  case MVT::v32i8:
+    // The byte blends for AVX vectors were introduced only in AVX2.
+    if (Subtarget->hasAVX2())
+      return Op;
+
+    return SDValue();
+
    case MVT::v8i16:
    case MVT::v16i16:
+    // AVX-512 BWI and VLX features support VSELECT with i16 elements.
      if (Subtarget->hasBWI() && Subtarget->hasVLX())
-      break;
+      return Op;
+
+    // FIXME: We should custom lower this by fixing the condition and using i8
+    // blends.
      return SDValue();
    }
-
-  // We couldn't create a "Blend with immediate" node.
-  // This node should still be legal, but we'll have to emit a blendv*
-  // instruction.
-  return Op;
  }
  
  static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
@@ -20784,7 +20791,17 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
      // lowered.
      if (!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
        return SDValue();
-    if (!Subtarget->hasSSE41() || VT == MVT::v16i16 || VT == MVT::v8i16)
+    // FIXME: We don't support i16-element blends currently. We could and
+    // should support them by making *all* the bits in the condition be set
+    // rather than just the high bit and using an i8-element blend.
+    if (VT.getScalarType() == MVT::i16)
+      return SDValue();
+    // Dynamic blending was only available from SSE4.1 onward.
+    if (VT.getSizeInBits() == 128 && !Subtarget->hasSSE41())
+      return SDValue();
+    // Byte blends are only available in AVX2
+    if (VT.getSizeInBits() == 256 && VT.getScalarType() == MVT::i8 &&
+        !Subtarget->hasAVX2())
        return SDValue();
  
      assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
diff --git a/test/CodeGen/X86/vselect-avx.ll b/test/CodeGen/X86/vselect-avx.ll

index 26b00db..02a9ef4 100644 (file)
--- a/test/CodeGen/X86/vselect-avx.ll
+++ b/test/CodeGen/X86/vselect-avx.ll
@@ -79,3 +79,14 @@ define void @test3(<4 x i32> %induction30, <4 x i16>* %tmp16, <4 x i16>* %tmp17,
    store <4 x i16> %predphi, <4 x i16>* %tmp17, align 8
   ret void
  }
+
+; We shouldn't try to lower this directly using VSELECT because we don't have
+; vpblendvb in AVX1, only in AVX2. Instead, it should be expanded.
+;
+; CHECK-LABEL: PR22706:
+; CHECK: vpcmpgtb
+; CHECK: vpcmpgtb
+define <32 x i8> @PR22706(<32 x i1> %x) {
+  %tmp = select <32 x i1> %x, <32 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <32 x i8> <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+  ret <32 x i8> %tmp
+}
author	Chandler Carruth <chandlerc@gmail.com>
	Thu, 26 Feb 2015 22:15:34 +0000 (22:15 +0000)
committer	Chandler Carruth <chandlerc@gmail.com>
	Thu, 26 Feb 2015 22:15:34 +0000 (22:15 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/vselect-avx.ll		patch \| blob \| history