[InstCombine] Support vector splats in transformZExtICmp

author Craig Topper <craig.topper@intel.com>

Tue, 29 Aug 2017 18:58:13 +0000 (18:58 +0000)

committer Craig Topper <craig.topper@intel.com>

Tue, 29 Aug 2017 18:58:13 +0000 (18:58 +0000)
author Craig Topper <craig.topper@intel.com>
Tue, 29 Aug 2017 18:58:13 +0000 (18:58 +0000)
committer Craig Topper <craig.topper@intel.com>
Tue, 29 Aug 2017 18:58:13 +0000 (18:58 +0000)
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp

index 2fb5ce3..f7be0f9 100644 (file)
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -772,13 +772,13 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
    // If we are just checking for a icmp eq of a single bit and zext'ing it
    // to an integer, then shift the bit to the appropriate place and then
    // cast to integer to avoid the comparison.
-  if (ConstantInt *Op1C = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
-    const APInt &Op1CV = Op1C->getValue();
+  const APInt *Op1CV;
+  if (match(ICI->getOperand(1), m_APInt(Op1CV))) {
  
      // zext (x <s  0) to i32 --> x>>u31      true if signbit set.
      // zext (x >s -1) to i32 --> (x>>u31)^1  true if signbit clear.
-    if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV.isNullValue()) ||
-        (ICI->getPredicate() == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) {
+    if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isNullValue()) ||
+        (ICI->getPredicate() == ICmpInst::ICMP_SGT && Op1CV->isAllOnesValue())) {
        if (!DoTransform) return ICI;
  
        Value *In = ICI->getOperand(0);
@@ -804,7 +804,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
      // zext (X != 0) to i32 --> X>>1     iff X has only the 2nd bit set.
      // zext (X != 1) to i32 --> X^1      iff X has only the low bit set.
      // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
-    if ((Op1CV.isNullValue() || Op1CV.isPowerOf2()) &&
+    if ((Op1CV->isNullValue() || Op1CV->isPowerOf2()) &&
          // This only works for EQ and NE
          ICI->isEquality()) {
        // If Op1C some other power of two, convert:
@@ -815,7 +815,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
          if (!DoTransform) return ICI;
  
          bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE;
-        if (!Op1CV.isNullValue() && (Op1CV != KnownZeroMask)) {
+        if (!Op1CV->isNullValue() && (*Op1CV != KnownZeroMask)) {
            // (X&4) == 2 --> false
            // (X&4) != 2 --> true
            Constant *Res = ConstantInt::get(Type::getInt1Ty(CI.getContext()),
@@ -833,7 +833,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
                                    In->getName() + ".lobit");
          }
  
-        if (!Op1CV.isNullValue() == isNE) { // Toggle the low bit.
+        if (!Op1CV->isNullValue() == isNE) { // Toggle the low bit.
            Constant *One = ConstantInt::get(In->getType(), 1);
            In = Builder.CreateXor(In, One);
          }
diff --git a/test/Transforms/InstCombine/compare-signs.ll b/test/Transforms/InstCombine/compare-signs.ll

index fd59b84..5d69d35 100644 (file)
--- a/test/Transforms/InstCombine/compare-signs.ll
+++ b/test/Transforms/InstCombine/compare-signs.ll
@@ -48,6 +48,22 @@ define i32 @test3(i32 %a, i32 %b) nounwind readnone {
    ret i32 %t3
  }
  
+; TODO this should optimize but doesn't due to missing vector support in InstCombiner::foldICmpEquality.
+define <2 x i32> @test3vec(<2 x i32> %a, <2 x i32> %b) nounwind readnone {
+; CHECK-LABEL: @test3vec(
+; CHECK-NEXT:    [[T0:%.*]] = lshr <2 x i32> [[A:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[T1:%.*]] = lshr <2 x i32> [[B:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[T2:%.*]] = icmp eq <2 x i32> [[T0]], [[T1]]
+; CHECK-NEXT:    [[T3:%.*]] = zext <2 x i1> [[T2]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[T3]]
+;
+  %t0 = lshr <2 x i32> %a, <i32 31, i32 31>
+  %t1 = lshr <2 x i32> %b, <i32 31, i32 31>
+  %t2 = icmp eq <2 x i32> %t0, %t1
+  %t3 = zext <2 x i1> %t2 to <2 x i32>
+  ret <2 x i32> %t3
+}
+
  ; Variation on @test3: checking the 2nd bit in a situation where the 5th bit
  ; is one, not zero.
  define i32 @test3i(i32 %a, i32 %b) nounwind readnone {
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll

index a12f420..d0ee8e0 100644 (file)
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -13,6 +13,16 @@ define i32 @test1(i32 %X) {
    ret i32 %b
  }
  
+define <2 x i32> @test1vec(<2 x i32> %X) {
+; CHECK-LABEL: @test1vec(
+; CHECK-NEXT:    [[X_LOBIT:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    ret <2 x i32> [[X_LOBIT]]
+;
+  %a = icmp slt <2 x i32> %X, zeroinitializer
+  %b = zext <2 x i1> %a to <2 x i32>
+  ret <2 x i32> %b
+}
+
  define i32 @test2(i32 %X) {
  ; CHECK-LABEL: @test2(
  ; CHECK-NEXT:    [[X_LOBIT:%.*]] = lshr i32 %X, 31
@@ -24,6 +34,17 @@ define i32 @test2(i32 %X) {
    ret i32 %b
  }
  
+define <2 x i32> @test2vec(<2 x i32> %X) {
+; CHECK-LABEL: @test2vec(
+; CHECK-NEXT:    [[X_LOBIT:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[X_LOBIT_NOT:%.*]] = xor <2 x i32> [[X_LOBIT]], <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i32> [[X_LOBIT_NOT]]
+;
+  %a = icmp ult <2 x i32> %X, <i32 -2147483648, i32 -2147483648>
+  %b = zext <2 x i1> %a to <2 x i32>
+  ret <2 x i32> %b
+}
+
  define i32 @test3(i32 %X) {
  ; CHECK-LABEL: @test3(
  ; CHECK-NEXT:    [[X_LOBIT:%.*]] = ashr i32 %X, 31
diff --git a/test/Transforms/InstCombine/set.ll b/test/Transforms/InstCombine/set.ll

index f9f48cf..ebd8a43 100644 (file)
--- a/test/Transforms/InstCombine/set.ll
+++ b/test/Transforms/InstCombine/set.ll
@@ -282,6 +282,17 @@ define i32 @test20(i32 %A) {
    ret i32 %D
  }
  
+define <2 x i32> @test20vec(<2 x i32> %A) {
+; CHECK-LABEL: @test20vec(
+; CHECK-NEXT:    [[B:%.*]] = and <2 x i32> [[A:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i32> [[B]]
+;
+  %B = and <2 x i32> %A, <i32 1, i32 1>
+  %C = icmp ne <2 x i32> %B, zeroinitializer
+  %D = zext <2 x i1> %C to <2 x i32>
+  ret <2 x i32> %D
+}
+
  define i32 @test21(i32 %a) {
  ; CHECK-LABEL: @test21(
  ; CHECK-NEXT:    [[TMP_6:%.*]] = lshr i32 %a, 2
@@ -294,6 +305,18 @@ define i32 @test21(i32 %a) {
    ret i32 %retval
  }
  
+define <2 x i32> @test21vec(<2 x i32> %a) {
+; CHECK-LABEL: @test21vec(
+; CHECK-NEXT:    [[TMP_6:%.*]] = lshr <2 x i32> [[A:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[TMP_6_LOBIT:%.*]] = and <2 x i32> [[TMP_6]], <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i32> [[TMP_6_LOBIT]]
+;
+  %tmp.6 = and <2 x i32> %a, <i32 4, i32 4>
+  %not.tmp.7 = icmp ne <2 x i32> %tmp.6, zeroinitializer
+  %retval = zext <2 x i1> %not.tmp.7 to <2 x i32>
+  ret <2 x i32> %retval
+}
+
  define i1 @test22(i32 %A, i32 %X) {
  ; CHECK-LABEL: @test22(
  ; CHECK-NEXT:    ret i1 true
@@ -318,6 +341,18 @@ define i32 @test23(i32 %a) {
    ret i32 %tmp.3
  }
  
+define <2 x i32> @test23vec(<2 x i32> %a) {
+; CHECK-LABEL: @test23vec(
+; CHECK-NEXT:    [[TMP_1:%.*]] = and <2 x i32> [[A:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[TMP_1]], <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i32> [[TMP1]]
+;
+  %tmp.1 = and <2 x i32> %a, <i32 1, i32 1>
+  %tmp.2 = icmp eq <2 x i32> %tmp.1, zeroinitializer
+  %tmp.3 = zext <2 x i1> %tmp.2 to <2 x i32>
+  ret <2 x i32> %tmp.3
+}
+
  define i32 @test24(i32 %a) {
  ; CHECK-LABEL: @test24(
  ; CHECK-NEXT:    [[TMP_1:%.*]] = lshr i32 %a, 2
@@ -332,6 +367,20 @@ define i32 @test24(i32 %a) {
    ret i32 %tmp.3
  }
  
+define <2 x i32> @test24vec(<2 x i32> %a) {
+; CHECK-LABEL: @test24vec(
+; CHECK-NEXT:    [[TMP_1:%.*]] = lshr <2 x i32> [[A:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[TMP_1_LOBIT:%.*]] = and <2 x i32> [[TMP_1]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[TMP_1_LOBIT]], <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i32> [[TMP1]]
+;
+  %tmp1 = and <2 x i32> %a, <i32 4, i32 4>
+  %tmp.1 = lshr <2 x i32> %tmp1, <i32 2, i32 2>
+  %tmp.2 = icmp eq <2 x i32> %tmp.1, zeroinitializer
+  %tmp.3 = zext <2 x i1> %tmp.2 to <2 x i32>
+  ret <2 x i32> %tmp.3
+}
+
  define i1 @test25(i32 %A) {
  ; CHECK-LABEL: @test25(
  ; CHECK-NEXT:    ret i1 false
author	Craig Topper <craig.topper@intel.com>
	Tue, 29 Aug 2017 18:58:13 +0000 (18:58 +0000)
committer	Craig Topper <craig.topper@intel.com>
	Tue, 29 Aug 2017 18:58:13 +0000 (18:58 +0000)
lib/Transforms/InstCombine/InstCombineCasts.cpp		patch \| blob \| history
test/Transforms/InstCombine/compare-signs.ll		patch \| blob \| history
test/Transforms/InstCombine/icmp.ll		patch \| blob \| history
test/Transforms/InstCombine/set.ll		patch \| blob \| history