[InstCombine] Add range metadata to cttz/ctlz/ctpop intrinsic calls based on known...

author Craig Topper <craig.topper@intel.com>

Wed, 21 Jun 2017 16:32:35 +0000 (16:32 +0000)

committer Craig Topper <craig.topper@intel.com>

Wed, 21 Jun 2017 16:32:35 +0000 (16:32 +0000)
author Craig Topper <craig.topper@intel.com>
Wed, 21 Jun 2017 16:32:35 +0000 (16:32 +0000)
committer Craig Topper <craig.topper@intel.com>
Wed, 21 Jun 2017 16:32:35 +0000 (16:32 +0000)
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp

index c0830a5..dbed7ad 100644 (file)
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1409,6 +1409,47 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) {
      }
    }
  
+  // Add range metadata since known bits can't completely reflect what we know.
+  // TODO: Handle splat vectors.
+  auto *IT = dyn_cast<IntegerType>(Op0->getType());
+  if (IT && IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
+    Metadata *LowAndHigh[] = {
+        ConstantAsMetadata::get(ConstantInt::get(IT, DefiniteZeros)),
+        ConstantAsMetadata::get(ConstantInt::get(IT, PossibleZeros + 1))};
+    II.setMetadata(LLVMContext::MD_range,
+                   MDNode::get(II.getContext(), LowAndHigh));
+    return &II;
+  }
+
+  return nullptr;
+}
+
+static Instruction *foldCtpop(IntrinsicInst &II, InstCombiner &IC) {
+  assert(II.getIntrinsicID() == Intrinsic::ctpop &&
+         "Expected ctpop intrinsic");
+  Value *Op0 = II.getArgOperand(0);
+  // FIXME: Try to simplify vectors of integers.
+  auto *IT = dyn_cast<IntegerType>(Op0->getType());
+  if (!IT)
+    return nullptr;
+
+  unsigned BitWidth = IT->getBitWidth();
+  KnownBits Known(BitWidth);
+  IC.computeKnownBits(Op0, Known, 0, &II);
+
+  unsigned MinCount = Known.countMinPopulation();
+  unsigned MaxCount = Known.countMaxPopulation();
+
+  // Add range metadata since known bits can't completely reflect what we know.
+  if (IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
+    Metadata *LowAndHigh[] = {
+        ConstantAsMetadata::get(ConstantInt::get(IT, MinCount)),
+        ConstantAsMetadata::get(ConstantInt::get(IT, MaxCount + 1))};
+    II.setMetadata(LLVMContext::MD_range,
+                   MDNode::get(II.getContext(), LowAndHigh));
+    return &II;
+  }
+
    return nullptr;
  }
  
@@ -1981,6 +2022,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
        return I;
      break;
  
+  case Intrinsic::ctpop:
+    if (auto *I = foldCtpop(*II, *this))
+      return I;
+    break;
+
    case Intrinsic::uadd_with_overflow:
    case Intrinsic::sadd_with_overflow:
    case Intrinsic::umul_with_overflow:
diff --git a/test/Transforms/InstCombine/ctpop.ll b/test/Transforms/InstCombine/ctpop.ll

index d49a907..e8e3603 100644 (file)
--- a/test/Transforms/InstCombine/ctpop.ll
+++ b/test/Transforms/InstCombine/ctpop.ll
@@ -3,6 +3,8 @@
  
  declare i32 @llvm.ctpop.i32(i32)
  declare i8 @llvm.ctpop.i8(i8)
+declare i1 @llvm.ctpop.i1(i1)
+declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
  declare void @llvm.assume(i1)
  
  define i1 @test1(i32 %arg) {
@@ -44,7 +46,7 @@ define i1 @test3(i32 %arg) {
  ; Negative test for when we know nothing
  define i1 @test4(i8 %arg) {
  ; CHECK-LABEL: @test4(
-; CHECK-NEXT:    [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[ARG:%.*]])
+; CHECK-NEXT:    [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[ARG:%.*]]), !range ![[RANGE:[0-9]+]]
  ; CHECK-NEXT:    [[RES:%.*]] = icmp eq i8 [[CNT]], 2
  ; CHECK-NEXT:    ret i1 [[RES]]
  ;
@@ -55,16 +57,41 @@ define i1 @test4(i8 %arg) {
  
  ; Test when the number of possible known bits isn't one less than a power of 2
  ; and the compare value is greater but less than the next power of 2.
-; TODO: The icmp is unnecessary given the known bits of the input.
  define i1 @test5(i32 %arg) {
  ; CHECK-LABEL: @test5(
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[ARG:%.*]], 3
-; CHECK-NEXT:    [[CNT:%.*]] = call i32 @llvm.ctpop.i32(i32 [[AND]])
-; CHECK-NEXT:    [[RES:%.*]] = icmp eq i32 [[CNT]], 3
-; CHECK-NEXT:    ret i1 [[RES]]
+; CHECK-NEXT:    ret i1 false
  ;
    %and = and i32 %arg, 3
    %cnt = call i32 @llvm.ctpop.i32(i32 %and)
    %res = icmp eq i32 %cnt, 3
    ret i1 %res
  }
+
+; Test when the number of possible known bits isn't one less than a power of 2
+; and the compare value is greater but less than the next power of 2.
+; TODO: The icmp is unnecessary given the known bits of the input, but range
+; metadata doesn't support vectors
+define <2 x i1> @test5vec(<2 x i32> %arg) {
+; CHECK-LABEL: @test5vec(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[ARG:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    [[CNT:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[AND]])
+; CHECK-NEXT:    [[RES:%.*]] = icmp eq <2 x i32> [[CNT]], <i32 3, i32 3>
+; CHECK-NEXT:    ret <2 x i1> [[RES]]
+;
+  %and = and <2 x i32> %arg, <i32 3, i32 3>
+  %cnt = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %and)
+  %res = icmp eq <2 x i32> %cnt, <i32 3, i32 3>
+  ret <2 x i1> %res
+}
+
+; Make sure we don't add range metadata to i1 ctpop.
+define i1 @test6(i1 %arg) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[CNT:%.*]] = call i1 @llvm.ctpop.i1(i1 [[ARG:%.*]])
+; CHECK-NEXT:    ret i1 [[CNT]]
+;
+  %cnt = call i1 @llvm.ctpop.i1(i1 %arg)
+  ret i1 %cnt
+}
+
+; CHECK: ![[RANGE]] = !{i8 0, i8 9}
diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll

index 1b1ed60..c294d79 100644 (file)
--- a/test/Transforms/InstCombine/intrinsics.ll
+++ b/test/Transforms/InstCombine/intrinsics.ll
@@ -16,6 +16,8 @@ declare %ov.result.32 @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
  declare double @llvm.powi.f64(double, i32) nounwind readonly
  declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
  declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
+declare i1 @llvm.cttz.i1(i1, i1) nounwind readnone
+declare i1 @llvm.ctlz.i1(i1, i1) nounwind readnone
  declare i32 @llvm.ctpop.i32(i32) nounwind readnone
  declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) nounwind readnone
  declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
@@ -293,6 +295,16 @@ define <2 x i32> @cttz_vec(<2 x i32> %a) {
    ret <2 x i32> %count
  }
  
+; Make sure we don't add range metadata to i1 cttz.
+define i1 @cttz_i1(i1 %arg) {
+; CHECK-LABEL: @cttz_i1(
+; CHECK-NEXT:    [[CNT:%.*]] = call i1 @llvm.cttz.i1(i1 [[ARG:%.*]], i1 false) #2
+; CHECK-NEXT:    ret i1 [[CNT]]
+;
+  %cnt = call i1 @llvm.cttz.i1(i1 %arg, i1 false) nounwind readnone
+  ret i1 %cnt
+}
+
  define i1 @cttz_knownbits(i32 %arg) {
  ; CHECK-LABEL: @cttz_knownbits(
  ; CHECK-NEXT:    ret i1 false
@@ -316,7 +328,7 @@ define <2 x i1> @cttz_knownbits_vec(<2 x i32> %arg) {
  define i1 @cttz_knownbits2(i32 %arg) {
  ; CHECK-LABEL: @cttz_knownbits2(
  ; CHECK-NEXT:    [[OR:%.*]] = or i32 [[ARG:%.*]], 4
-; CHECK-NEXT:    [[CNT:%.*]] = call i32 @llvm.cttz.i32(i32 [[OR]], i1 true)
+; CHECK-NEXT:    [[CNT:%.*]] = call i32 @llvm.cttz.i32(i32 [[OR]], i1 true) #2, !range ![[CTTZ_RANGE:[0-9]+]]
  ; CHECK-NEXT:    [[RES:%.*]] = icmp eq i32 [[CNT]], 2
  ; CHECK-NEXT:    ret i1 [[RES]]
  ;
@@ -339,13 +351,9 @@ define <2 x i1> @cttz_knownbits2_vec(<2 x i32> %arg) {
    ret <2 x i1> %res
  }
  
-; TODO: The icmp is unnecessary given the known bits of the input.
  define i1 @cttz_knownbits3(i32 %arg) {
  ; CHECK-LABEL: @cttz_knownbits3(
-; CHECK-NEXT:    [[OR:%.*]] = or i32 [[ARG:%.*]], 4
-; CHECK-NEXT:    [[CNT:%.*]] = call i32 @llvm.cttz.i32(i32 [[OR]], i1 true) #2
-; CHECK-NEXT:    [[RES:%.*]] = icmp eq i32 [[CNT]], 3
-; CHECK-NEXT:    ret i1 [[RES]]
+; CHECK-NEXT:    ret i1 false
  ;
    %or = or i32 %arg, 4
    %cnt = call i32 @llvm.cttz.i32(i32 %or, i1 true) nounwind readnone
@@ -387,6 +395,16 @@ define <2 x i8> @ctlz_vec(<2 x i8> %a) {
    ret <2 x i8> %count
  }
  
+; Make sure we don't add range metadata to i1 ctlz.
+define i1 @ctlz_i1(i1 %arg) {
+; CHECK-LABEL: @ctlz_i1(
+; CHECK-NEXT:    [[CNT:%.*]] = call i1 @llvm.ctlz.i1(i1 [[ARG:%.*]], i1 false) #2
+; CHECK-NEXT:    ret i1 [[CNT]]
+;
+  %cnt = call i1 @llvm.ctlz.i1(i1 %arg, i1 false) nounwind readnone
+  ret i1 %cnt
+}
+
  define i1 @ctlz_knownbits(i8 %arg) {
  ; CHECK-LABEL: @ctlz_knownbits(
  ; CHECK-NEXT:    ret i1 false
@@ -410,7 +428,7 @@ define <2 x i1> @ctlz_knownbits_vec(<2 x i8> %arg) {
  define i1 @ctlz_knownbits2(i8 %arg) {
  ; CHECK-LABEL: @ctlz_knownbits2(
  ; CHECK-NEXT:    [[OR:%.*]] = or i8 [[ARG:%.*]], 32
-; CHECK-NEXT:    [[CNT:%.*]] = call i8 @llvm.ctlz.i8(i8 [[OR]], i1 true)
+; CHECK-NEXT:    [[CNT:%.*]] = call i8 @llvm.ctlz.i8(i8 [[OR]], i1 true) #2, !range ![[CTLZ_RANGE:[0-9]+]]
  ; CHECK-NEXT:    [[RES:%.*]] = icmp eq i8 [[CNT]], 2
  ; CHECK-NEXT:    ret i1 [[RES]]
  ;
@@ -433,13 +451,9 @@ define <2 x i1> @ctlz_knownbits2_vec(<2 x i8> %arg) {
    ret <2 x i1> %res
  }
  
-; TODO: The icmp is unnecessary given the known bits of the input.
  define i1 @ctlz_knownbits3(i8 %arg) {
  ; CHECK-LABEL: @ctlz_knownbits3(
-; CHECK-NEXT:    [[OR:%.*]] = or i8 [[ARG:%.*]], 32
-; CHECK-NEXT:    [[CNT:%.*]] = call i8 @llvm.ctlz.i8(i8 [[OR]], i1 true) #2
-; CHECK-NEXT:    [[RES:%.*]] = icmp eq i8 [[CNT]], 3
-; CHECK-NEXT:    ret i1 [[RES]]
+; CHECK-NEXT:    ret i1 false
  ;
    %or = or i8 %arg, 32
    %cnt = call i8 @llvm.ctlz.i8(i8 %or, i1 true) nounwind readnone
@@ -790,3 +804,6 @@ define void @nearbyint(double *%P) {
    store volatile double %C, double* %P
    ret void
  }
+
+; CHECK: [[CTTZ_RANGE]] = !{i32 0, i32 3}
+; CHECK: [[CTLZ_RANGE]] = !{i8 0, i8 3}
author	Craig Topper <craig.topper@intel.com>
	Wed, 21 Jun 2017 16:32:35 +0000 (16:32 +0000)
committer	Craig Topper <craig.topper@intel.com>
	Wed, 21 Jun 2017 16:32:35 +0000 (16:32 +0000)
lib/Transforms/InstCombine/InstCombineCalls.cpp		patch \| blob \| history
test/Transforms/InstCombine/ctpop.ll		patch \| blob \| history
test/Transforms/InstCombine/intrinsics.ll		patch \| blob \| history