From 66b84ccb05ef8b029844ed51f8355111aed991b1 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Wed, 24 Aug 2016 22:22:06 +0000
Subject: [PATCH] [InstCombine] use m_APInt to allow icmp eq/ne (shr X, C2), C
 folds for splat constant vectors

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@279677 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineCompares.cpp | 35 ++++++++++++----------
 test/Transforms/InstCombine/apint-shift.ll         | 13 ++++----
 test/Transforms/InstCombine/cast.ll                |  4 +--
 test/Transforms/InstCombine/icmp.ll                | 20 +++++++++++++
 test/Transforms/InstCombine/shift.ll               | 17 ++++-------
 5 files changed, 51 insertions(+), 38 deletions(-)
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 0d23ea0a948..e1b23500bd8 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1988,17 +1988,14 @@ Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp,
   if (Cmp.isEquality() && Shr->isExact() && Shr->hasOneUse() && *C == 0)
     return new ICmpInst(Pred, X, Cmp.getOperand(1));
 
-  // FIXME: This check restricts all folds under here to scalar types.
-  // Handle equality comparisons of shift-by-constant.
-  ConstantInt *ShAmt = dyn_cast<ConstantInt>(Shr->getOperand(1));
-  if (!ShAmt)
+  const APInt *ShiftAmt;
+  if (!match(Shr->getOperand(1), m_APInt(ShiftAmt)))
     return nullptr;
 
-  // Check that the shift amount is in range.  If not, don't perform
-  // undefined shifts.  When the shift is visited it will be
-  // simplified.
-  uint32_t TypeBits = C->getBitWidth();
-  uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
+  // Check that the shift amount is in range. If not, don't perform undefined
+  // shifts. When the shift is visited it will be simplified.
+  unsigned TypeBits = C->getBitWidth();
+  unsigned ShAmtVal = ShiftAmt->getLimitedValue(TypeBits);
   if (ShAmtVal >= TypeBits || ShAmtVal == 0)
     return nullptr;
 
@@ -2015,6 +2012,11 @@ Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp,
     if (IsAShr && (!Shr->isExact() || ShAmtVal == TypeBits - 1))
       return nullptr;
 
+    // FIXME: This check restricts this fold to scalar types.
+    ConstantInt *ShAmt = dyn_cast<ConstantInt>(Shr->getOperand(1));
+    if (!ShAmt)
+      return nullptr;
+
     // Revisit the shift (to delete it).
     Worklist.Add(Shr);
 
@@ -2041,6 +2043,8 @@ Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp,
     return Res;
   }
 
+  // Handle equality comparisons of shift-by-constant.
+
   // If the comparison constant changes with the shift, the comparison cannot
   // succeed (bits of the comparison constant cannot match the shifted value).
   // This should be known by InstSimplify and already be folded to true/false.
@@ -2051,15 +2055,14 @@ Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp,
   // Check if the bits shifted out are known to be zero. If so, we can compare
   // against the unshifted value:
   //  (X & 4) >> 1 == 2  --> (X & 4) == 4.
-  ConstantInt *ShiftedCmpRHS = Builder->getInt(*C << ShAmtVal);
-  if (Shr->hasOneUse() && Shr->isExact())
-    return new ICmpInst(Pred, X, ShiftedCmpRHS);
-
+  Constant *ShiftedCmpRHS = ConstantInt::get(Shr->getType(), *C << ShAmtVal);
   if (Shr->hasOneUse()) {
-    // Otherwise strength reduce the shift into an and.
-    APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
-    Constant *Mask = Builder->getInt(Val);
+    if (Shr->isExact())
+      return new ICmpInst(Pred, X, ShiftedCmpRHS);
 
+    // Otherwise strength reduce the shift into an 'and'.
+    APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
+    Constant *Mask = ConstantInt::get(Shr->getType(), Val);
     Value *And = Builder->CreateAnd(X, Mask, Shr->getName() + ".mask");
     return new ICmpInst(Pred, And, ShiftedCmpRHS);
   }
diff --git a/test/Transforms/InstCombine/apint-shift.ll b/test/Transforms/InstCombine/apint-shift.ll
index 01d83f35af3..3e1699ab158 100644
--- a/test/Transforms/InstCombine/apint-shift.ll
+++ b/test/Transforms/InstCombine/apint-shift.ll
@@ -230,11 +230,10 @@ define i1 @test17(i106 %A) {
   ret i1 %C
 }
 
-; FIXME: Vectors should fold too.
 define <2 x i1> @test17vec(<2 x i106> %A) {
 ; CHECK-LABEL: @test17vec(
-; CHECK-NEXT:    [[B:%.*]] = lshr <2 x i106> %A, <i106 3, i106 3>
-; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i106> [[B]], <i106 1234, i106 1234>
+; CHECK-NEXT:    [[B_MASK:%.*]] = and <2 x i106> %A, <i106 -8, i106 -8>
+; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i106> [[B_MASK]], <i106 9872, i106 9872>
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %B = lshr <2 x i106> %A, <i106 3, i106 3>
@@ -261,11 +260,9 @@ define i1 @test19(i37 %A) {
   ret i1 %C
 }
 
-; FIXME: Vectors should fold too.
 define <2 x i1> @test19vec(<2 x i37> %A) {
 ; CHECK-LABEL: @test19vec(
-; CHECK-NEXT:    [[B:%.*]] = ashr <2 x i37> %A, <i37 2, i37 2>
-; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i37> [[B]], zeroinitializer
+; CHECK-NEXT:    [[C:%.*]] = icmp ult <2 x i37> %A, <i37 4, i37 4>
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %B = ashr <2 x i37> %A, <i37 2, i37 2>
@@ -286,8 +283,8 @@ define i1 @test19a(i39 %A) {
 ; FIXME: Vectors should fold too.
 define <2 x i1> @test19a_vec(<2 x i39> %A) {
 ; CHECK-LABEL: @test19a_vec(
-; CHECK-NEXT:    [[B:%.*]] = ashr <2 x i39> %A, <i39 2, i39 2>
-; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i39> [[B]], <i39 -1, i39 -1>
+; CHECK-NEXT:    [[B_MASK:%.*]] = and <2 x i39> %A, <i39 -4, i39 -4>
+; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i39> [[B_MASK]], <i39 -4, i39 -4>
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %B = ashr <2 x i39> %A, <i39 2, i39 2>
diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll
index 9837ca9eefb..b4f14e115b6 100644
--- a/test/Transforms/InstCombine/cast.ll
+++ b/test/Transforms/InstCombine/cast.ll
@@ -414,11 +414,9 @@ define i1 @test36(i32 %a) {
   ret i1 %d
 }
 
-; FIXME: The trunc is removed, but the icmp+lshr fold is missing.
 define <2 x i1> @test36vec(<2 x i32> %a) {
 ; CHECK-LABEL: @test36vec(
-; CHECK-NEXT:    [[B:%.*]] = lshr <2 x i32> %a, <i32 31, i32 31>
-; CHECK-NEXT:    [[D:%.*]] = icmp eq <2 x i32> [[B]], zeroinitializer
+; CHECK-NEXT:    [[D:%.*]] = icmp sgt <2 x i32> %a, <i32 -1, i32 -1>
 ; CHECK-NEXT:    ret <2 x i1> [[D]]
 ;
   %b = lshr <2 x i32> %a, <i32 31, i32 31>
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index 780529c9de6..a8d55b13c26 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -518,6 +518,26 @@ define <2 x i1> @test40vec(<2 x i32> %X, <2 x i32> %Y) {
   ret <2 x i1> %B
 }
 
+define i1 @shr_exact(i132 %x) {
+; CHECK-LABEL: @shr_exact(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i132 %x, 32
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %sh = ashr exact i132 %x, 4
+  %cmp = icmp eq i132 %sh, 2
+  ret i1 %cmp
+}
+
+define <2 x i1> @shr_exact_vec(<2 x i132> %x) {
+; CHECK-LABEL: @shr_exact_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i132> %x, <i132 32, i132 32>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %sh = lshr exact <2 x i132> %x, <i132 4, i132 4>
+  %cmp = icmp ne <2 x i132> %sh, <i132 2, i132 2>
+  ret <2 x i1> %cmp
+}
+
 ; PR9343 #3
 define i1 @test41(i32 %X, i32 %Y) {
 ; CHECK-LABEL: @test41(
diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll
index 537e5cd951d..a91038f9eff 100644
--- a/test/Transforms/InstCombine/shift.ll
+++ b/test/Transforms/InstCombine/shift.ll
@@ -320,11 +320,10 @@ define i1 @test17(i32 %A) {
   ret i1 %C
 }
 
-; FIXME: Vectors should fold the same way.
 define <2 x i1> @test17vec(<2 x i32> %A) {
 ; CHECK-LABEL: @test17vec(
-; CHECK-NEXT:    [[B:%.*]] = lshr <2 x i32> %A, <i32 3, i32 3>
-; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i32> [[B]], <i32 1234, i32 1234>
+; CHECK-NEXT:    [[B_MASK:%.*]] = and <2 x i32> %A, <i32 -8, i32 -8>
+; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i32> [[B_MASK]], <i32 9872, i32 9872>
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %B = lshr <2 x i32> %A, <i32 3, i32 3>
@@ -353,11 +352,9 @@ define i1 @test19(i32 %A) {
   ret i1 %C
 }
 
-; FIXME: Vectors should fold the same way.
 define <2 x i1> @test19vec(<2 x i32> %A) {
 ; CHECK-LABEL: @test19vec(
-; CHECK-NEXT:    [[B:%.*]] = ashr <2 x i32> %A, <i32 2, i32 2>
-; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i32> [[B]], zeroinitializer
+; CHECK-NEXT:    [[C:%.*]] = icmp ult <2 x i32> %A, <i32 4, i32 4>
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %B = ashr <2 x i32> %A, <i32 2, i32 2>
@@ -379,8 +376,8 @@ define i1 @test19a(i32 %A) {
 ; FIXME: Vectors should fold the same way.
 define <2 x i1> @test19a_vec(<2 x i32> %A) {
 ; CHECK-LABEL: @test19a_vec(
-; CHECK-NEXT:    [[B:%.*]] = ashr <2 x i32> %A, <i32 2, i32 2>
-; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i32> [[B]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[B_MASK:%.*]] = and <2 x i32> %A, <i32 -4, i32 -4>
+; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i32> [[B_MASK]], <i32 -4, i32 -4>
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %B = ashr <2 x i32> %A, <i32 2, i32 2>
@@ -506,11 +503,9 @@ define i1 @test28(i8 %x) {
   ret i1 %cmp
 }
 
-; FIXME: Vectors should fold the same way.
 define <2 x i1> @test28vec(<2 x i8> %x) {
 ; CHECK-LABEL: @test28vec(
-; CHECK-NEXT:    [[SHR:%.*]] = lshr <2 x i8> %x, <i8 7, i8 7>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i8> [[SHR]], zeroinitializer
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> %x, zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %shr = lshr <2 x i8> %x, <i8 7, i8 7>
-- 
2.11.0