[InstCombine] allow icmp (div X, Y), C folds for splat constant vectors

author Sanjay Patel <spatel@rotateright.com>

Wed, 31 Aug 2016 21:57:21 +0000 (21:57 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Wed, 31 Aug 2016 21:57:21 +0000 (21:57 +0000)
author Sanjay Patel <spatel@rotateright.com>
Wed, 31 Aug 2016 21:57:21 +0000 (21:57 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Wed, 31 Aug 2016 21:57:21 +0000 (21:57 +0000)
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp

index 91ea83e..a564771 100644 (file)
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -35,11 +35,6 @@ using namespace PatternMatch;
  // How many times is a select replaced by one of its operands?
  STATISTIC(NumSel, "Number of select opts");
  
-// Initialization Routines
-
-static ConstantInt *getOne(Constant *C) {
-  return ConstantInt::get(cast<IntegerType>(C->getType()), 1);
-}
  
  static ConstantInt *ExtractElement(Constant *V, Constant *Idx) {
    return cast<ConstantInt>(ConstantExpr::getExtractElement(V, Idx));
@@ -2001,33 +1996,28 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp,
    assert(!(DivIsSigned && C2->isAllOnesValue()) &&
           "The overflow computation will fail.");
  
-  // FIXME: These checks restrict all folds under here to scalar types.
-  ConstantInt *RHS = dyn_cast<ConstantInt>(Cmp.getOperand(1));
-  if (!RHS)
-    return nullptr;
-
-  ConstantInt *DivRHS = dyn_cast<ConstantInt>(Div->getOperand(1));
-  if (!DivRHS)
-    return nullptr;
+  // TODO: We could do all of the computations below using APInt.
+  Constant *CmpRHS = cast<Constant>(Cmp.getOperand(1));
+  Constant *DivRHS = cast<Constant>(Div->getOperand(1));
  
-  // Compute Prod = CI * DivRHS. We are essentially solving an equation
-  // of form X/C2=C. We solve for X by multiplying C2 (DivRHS) and
-  // C (CI). By solving for X we can turn this into a range check
-  // instead of computing a divide.
-  Constant *Prod = ConstantExpr::getMul(RHS, DivRHS);
+  // Compute Prod = CmpRHS * DivRHS. We are essentially solving an equation of
+  // form X / C2 = C. We solve for X by multiplying C2 (DivRHS) and C (CmpRHS).
+  // By solving for X, we can turn this into a range check instead of computing
+  // a divide.
+  Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS);
  
-  // Determine if the product overflows by seeing if the product is
-  // not equal to the divide. Make sure we do the same kind of divide
-  // as in the LHS instruction that we're folding.
-  bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) :
-                 ConstantExpr::getUDiv(Prod, DivRHS)) != RHS;
+  // Determine if the product overflows by seeing if the product is not equal to
+  // the divide. Make sure we do the same kind of divide as in the LHS
+  // instruction that we're folding.
+  bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS)
+                             : ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS;
  
-  // Get the ICmp opcode
    ICmpInst::Predicate Pred = Cmp.getPredicate();
  
    // If the division is known to be exact, then there is no remainder from the
    // divide, so the covered range size is unit, otherwise it is the divisor.
-  ConstantInt *RangeSize = Div->isExact() ? getOne(Prod) : DivRHS;
+  Constant *RangeSize =
+      Div->isExact() ? ConstantInt::get(Div->getType(), 1) : DivRHS;
  
    // Figure out the interval that is being checked.  For example, a comparison
    // like "X /u 5 == 0" is really checking that X is in the interval [0, 5).
@@ -2048,7 +2038,7 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp,
        // to the same result value.
        HiOverflow = AddWithOverflow(HiBound, LoBound, RangeSize, false);
      }
-  } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0.
+  } else if (C2->isStrictlyPositive()) { // Divisor is > 0.
      if (*C == 0) {       // (X / pos) op 0
        // Can't overflow.  e.g.  X/2 op 0 --> [-1, 2)
        LoBound = ConstantExpr::getNeg(SubOne(RangeSize));
@@ -2063,17 +2053,17 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp,
        HiBound = AddOne(Prod);
        LoOverflow = HiOverflow = ProdOV ? -1 : 0;
        if (!LoOverflow) {
-        ConstantInt *DivNeg =cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
+        Constant *DivNeg = ConstantExpr::getNeg(RangeSize);
          LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0;
        }
      }
-  } else if (DivRHS->isNegative()) { // Divisor is < 0.
+  } else if (C2->isNegative()) { // Divisor is < 0.
      if (Div->isExact())
-      RangeSize = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
+      RangeSize = ConstantExpr::getNeg(RangeSize);
      if (*C == 0) {       // (X / neg) op 0
        // e.g. X/-5 op 0  --> [-4, 5)
        LoBound = AddOne(RangeSize);
-      HiBound = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
+      HiBound = ConstantExpr::getNeg(RangeSize);
        if (HiBound == DivRHS) {     // -INTMIN = INTMIN
          HiOverflow = 1;            // [INTMIN+1, overflow)
          HiBound = nullptr;         // e.g. X/INTMIN = 0 --> X > INTMIN
@@ -2108,9 +2098,8 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp,
          return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
                              ICmpInst::ICMP_ULT, X, HiBound);
        return replaceInstUsesWith(
-          Cmp, insertRangeTest(X, cast<ConstantInt>(LoBound)->getValue(),
-                               cast<ConstantInt>(HiBound)->getValue(),
-                               DivIsSigned, true));
+          Cmp, insertRangeTest(X, LoBound->getUniqueInteger(),
+                               HiBound->getUniqueInteger(), DivIsSigned, true));
      case ICmpInst::ICMP_NE:
        if (LoOverflow && HiOverflow)
          return replaceInstUsesWith(Cmp, Builder->getTrue());
@@ -2120,10 +2109,10 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp,
        if (LoOverflow)
          return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
                              ICmpInst::ICMP_UGE, X, HiBound);
-      return replaceInstUsesWith(
-          Cmp, insertRangeTest(X, cast<ConstantInt>(LoBound)->getValue(),
-                               cast<ConstantInt>(HiBound)->getValue(),
-                               DivIsSigned, false));
+      return replaceInstUsesWith(Cmp,
+                                 insertRangeTest(X, LoBound->getUniqueInteger(),
+                                                 HiBound->getUniqueInteger(),
+                                                 DivIsSigned, false));
      case ICmpInst::ICMP_ULT:
      case ICmpInst::ICMP_SLT:
        if (LoOverflow == +1)   // Low bound is greater than input range.
diff --git a/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll b/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll

index 69b7c54..ffcfe26 100644 (file)
--- a/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll
+++ b/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll
@@ -14,11 +14,10 @@ define i1 @test(i32 %tmp6) {
    ret i1 %1
  }
  
-; FIXME: Vectors should fold the same way.
  define <2 x i1> @test_vec(<2 x i32> %tmp6) {
  ; CHECK-LABEL: @test_vec(
-; CHECK-NEXT:    [[TMP7:%.*]] = sdiv <2 x i32> %tmp6, <i32 12, i32 12>
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <2 x i32> [[TMP7]], <i32 -6, i32 -6>
+; CHECK-NEXT:    [[TMP6_OFF:%.*]] = add <2 x i32> %tmp6, <i32 83, i32 83>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <2 x i32> [[TMP6_OFF]], <i32 11, i32 11>
  ; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
  ;
    %tmp7 = sdiv <2 x i32> %tmp6, <i32 12, i32 12>
diff --git a/test/Transforms/InstCombine/div.ll b/test/Transforms/InstCombine/div.ll

index ece332a..a037607 100644 (file)
--- a/test/Transforms/InstCombine/div.ll
+++ b/test/Transforms/InstCombine/div.ll
@@ -72,12 +72,11 @@ define i1 @test7(i32 %A) {
    ret i1 %C
  }
  
-; FIXME: Vectors should fold the same way.
  define <2 x i1> @test7vec(<2 x i32> %A) {
  ; CHECK-LABEL: @test7vec(
-; CHECK-NEXT:    [[B:%.*]] = udiv <2 x i32> %A, <i32 10, i32 10>
-; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i32> [[B]], <i32 2, i32 2>
-; CHECK-NEXT:    ret <2 x i1> [[C]]
+; CHECK-NEXT:    [[A_OFF:%.*]] = add <2 x i32> %A, <i32 -20, i32 -20>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i32> [[A_OFF]], <i32 10, i32 10>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
  ;
    %B = udiv <2 x i32> %A, <i32 10, i32 10>
    %C = icmp eq <2 x i32> %B, <i32 2, i32 2>
@@ -95,11 +94,9 @@ define i1 @test8(i8 %A) {
    ret i1 %C
  }
  
-; FIXME: Vectors should fold the same way.
  define <2 x i1> @test8vec(<2 x i8> %A) {
  ; CHECK-LABEL: @test8vec(
-; CHECK-NEXT:    [[B:%.*]] = udiv <2 x i8> %A, <i8 123, i8 123>
-; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i8> [[B]], <i8 2, i8 2>
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt <2 x i8> %A, <i8 -11, i8 -11>
  ; CHECK-NEXT:    ret <2 x i1> [[C]]
  ;
    %B = udiv <2 x i8> %A, <i8 123, i8 123>
@@ -118,11 +115,9 @@ define i1 @test9(i8 %A) {
    ret i1 %C
  }
  
-; FIXME: Vectors should fold the same way.
  define <2 x i1> @test9vec(<2 x i8> %A) {
  ; CHECK-LABEL: @test9vec(
-; CHECK-NEXT:    [[B:%.*]] = udiv <2 x i8> %A, <i8 123, i8 123>
-; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i8> [[B]], <i8 2, i8 2>
+; CHECK-NEXT:    [[C:%.*]] = icmp ult <2 x i8> %A, <i8 -10, i8 -10>
  ; CHECK-NEXT:    ret <2 x i1> [[C]]
  ;
    %B = udiv <2 x i8> %A, <i8 123, i8 123>
diff --git a/test/Transforms/InstCombine/exact.ll b/test/Transforms/InstCombine/exact.ll

index 530d665..f93bc8e 100644 (file)
--- a/test/Transforms/InstCombine/exact.ll
+++ b/test/Transforms/InstCombine/exact.ll
@@ -159,8 +159,8 @@ define i1 @udiv_icmp1(i64 %X) {
  
  define <2 x i1> @udiv_icmp1_vec(<2 x i64> %X) {
  ; CHECK-LABEL: @udiv_icmp1_vec(
-; CHECK-NEXT:    [[B:%.*]] = icmp ugt <2 x i64> %X, <i64 4, i64 4>
-; CHECK-NEXT:    ret <2 x i1> [[B]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <2 x i64> %X, zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
  ;
    %A = udiv exact <2 x i64> %X, <i64 5, i64 5>
    %B = icmp ne <2 x i64> %A, zeroinitializer
@@ -177,10 +177,11 @@ define i1 @udiv_icmp2(i64 %X) {
    ret i1 %B
  }
  
+; FIXME: missing vector fold for ult 1 -> eq 0
  define <2 x i1> @udiv_icmp2_vec(<2 x i64> %X) {
  ; CHECK-LABEL: @udiv_icmp2_vec(
-; CHECK-NEXT:    [[B:%.*]] = icmp ult <2 x i64> %X, <i64 5, i64 5>
-; CHECK-NEXT:    ret <2 x i1> [[B]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i64> %X, <i64 1, i64 1>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
  ;
    %A = udiv exact <2 x i64> %X, <i64 5, i64 5>
    %B = icmp eq <2 x i64> %A, zeroinitializer
@@ -197,12 +198,11 @@ define i1 @sdiv_icmp1(i64 %X) {
    ret i1 %B
  }
  
-; FIXME: Vectors should fold too.
+; FIXME: missing vector fold for ult 1 -> eq 0
  define <2 x i1> @sdiv_icmp1_vec(<2 x i64> %X) {
  ; CHECK-LABEL: @sdiv_icmp1_vec(
-; CHECK-NEXT:    [[A:%.*]] = sdiv exact <2 x i64> %X, <i64 5, i64 5>
-; CHECK-NEXT:    [[B:%.*]] = icmp eq <2 x i64> [[A]], zeroinitializer
-; CHECK-NEXT:    ret <2 x i1> [[B]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i64> %X, <i64 1, i64 1>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
  ;
    %A = sdiv exact <2 x i64> %X, <i64 5, i64 5>
    %B = icmp eq <2 x i64> %A, zeroinitializer
@@ -219,12 +219,10 @@ define i1 @sdiv_icmp2(i64 %X) {
    ret i1 %B
  }
  
-; FIXME: Vectors should fold too.
  define <2 x i1> @sdiv_icmp2_vec(<2 x i64> %X) {
  ; CHECK-LABEL: @sdiv_icmp2_vec(
-; CHECK-NEXT:    [[A:%.*]] = sdiv exact <2 x i64> %X, <i64 5, i64 5>
-; CHECK-NEXT:    [[B:%.*]] = icmp eq <2 x i64> [[A]], <i64 1, i64 1>
-; CHECK-NEXT:    ret <2 x i1> [[B]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i64> %X, <i64 5, i64 5>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
  ;
    %A = sdiv exact <2 x i64> %X, <i64 5, i64 5>
    %B = icmp eq <2 x i64> %A, <i64 1, i64 1>
@@ -241,12 +239,10 @@ define i1 @sdiv_icmp3(i64 %X) {
    ret i1 %B
  }
  
-; FIXME: Vectors should fold too.
  define <2 x i1> @sdiv_icmp3_vec(<2 x i64> %X) {
  ; CHECK-LABEL: @sdiv_icmp3_vec(
-; CHECK-NEXT:    [[A:%.*]] = sdiv exact <2 x i64> %X, <i64 5, i64 5>
-; CHECK-NEXT:    [[B:%.*]] = icmp eq <2 x i64> [[A]], <i64 -1, i64 -1>
-; CHECK-NEXT:    ret <2 x i1> [[B]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i64> %X, <i64 -5, i64 -5>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
  ;
    %A = sdiv exact <2 x i64> %X, <i64 5, i64 5>
    %B = icmp eq <2 x i64> %A, <i64 -1, i64 -1>
@@ -263,12 +259,11 @@ define i1 @sdiv_icmp4(i64 %X) {
    ret i1 %B
  }
  
-; FIXME: Vectors should fold too.
+; FIXME: missing vector fold for ult 1 -> eq 0
  define <2 x i1> @sdiv_icmp4_vec(<2 x i64> %X) {
  ; CHECK-LABEL: @sdiv_icmp4_vec(
-; CHECK-NEXT:    [[A:%.*]] = sdiv exact <2 x i64> %X, <i64 -5, i64 -5>
-; CHECK-NEXT:    [[B:%.*]] = icmp eq <2 x i64> [[A]], zeroinitializer
-; CHECK-NEXT:    ret <2 x i1> [[B]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i64> %X, <i64 1, i64 1>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
  ;
    %A = sdiv exact <2 x i64> %X, <i64 -5, i64 -5>
    %B = icmp eq <2 x i64> %A, zeroinitializer
@@ -285,12 +280,10 @@ define i1 @sdiv_icmp5(i64 %X) {
    ret i1 %B
  }
  
-; FIXME: Vectors should fold too.
  define <2 x i1> @sdiv_icmp5_vec(<2 x i64> %X) {
  ; CHECK-LABEL: @sdiv_icmp5_vec(
-; CHECK-NEXT:    [[A:%.*]] = sdiv exact <2 x i64> %X, <i64 -5, i64 -5>
-; CHECK-NEXT:    [[B:%.*]] = icmp eq <2 x i64> [[A]], <i64 1, i64 1>
-; CHECK-NEXT:    ret <2 x i1> [[B]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i64> %X, <i64 -5, i64 -5>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
  ;
    %A = sdiv exact <2 x i64> %X, <i64 -5, i64 -5>
    %B = icmp eq <2 x i64> %A, <i64 1, i64 1>
@@ -307,12 +300,10 @@ define i1 @sdiv_icmp6(i64 %X) {
    ret i1 %B
  }
  
-; FIXME: Vectors should fold too.
  define <2 x i1> @sdiv_icmp6_vec(<2 x i64> %X) {
  ; CHECK-LABEL: @sdiv_icmp6_vec(
-; CHECK-NEXT:    [[A:%.*]] = sdiv exact <2 x i64> %X, <i64 -5, i64 -5>
-; CHECK-NEXT:    [[B:%.*]] = icmp eq <2 x i64> [[A]], <i64 -1, i64 -1>
-; CHECK-NEXT:    ret <2 x i1> [[B]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i64> %X, <i64 5, i64 5>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
  ;
    %A = sdiv exact <2 x i64> %X, <i64 -5, i64 -5>
    %B = icmp eq <2 x i64> %A, <i64 -1, i64 -1>
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll

index 93e0cdf..9ad8250 100644 (file)
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -282,11 +282,9 @@ define i1 @test23(i32 %x) {
    ret i1 %i4
  }
  
-; FIXME: Vectors should fold too.
  define <2 x i1> @test23vec(<2 x i32> %x) {
  ; CHECK-LABEL: @test23vec(
-; CHECK-NEXT:    [[I3:%.*]] = sdiv <2 x i32> %x, <i32 -1328634635, i32 -1328634635>
-; CHECK-NEXT:    [[I4:%.*]] = icmp eq <2 x i32> [[I3]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[I4:%.*]] = icmp sgt <2 x i32> %x, <i32 1328634634, i32 1328634634>
  ; CHECK-NEXT:    ret <2 x i1> [[I4]]
  ;
    %i3 = sdiv <2 x i32> %x, <i32 -1328634635, i32 -1328634635>
author	Sanjay Patel <spatel@rotateright.com>
	Wed, 31 Aug 2016 21:57:21 +0000 (21:57 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Wed, 31 Aug 2016 21:57:21 +0000 (21:57 +0000)
lib/Transforms/InstCombine/InstCombineCompares.cpp		patch \| blob \| history
test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll		patch \| blob \| history
test/Transforms/InstCombine/div.ll		patch \| blob \| history
test/Transforms/InstCombine/exact.ll		patch \| blob \| history
test/Transforms/InstCombine/icmp.ll		patch \| blob \| history