}
}
+ // Add range metadata since known bits can't completely reflect what we know.
+ // TODO: Handle splat vectors.
+ auto *IT = dyn_cast<IntegerType>(Op0->getType());
+ if (IT && IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
+ Metadata *LowAndHigh[] = {
+ ConstantAsMetadata::get(ConstantInt::get(IT, DefiniteZeros)),
+ ConstantAsMetadata::get(ConstantInt::get(IT, PossibleZeros + 1))};
+ II.setMetadata(LLVMContext::MD_range,
+ MDNode::get(II.getContext(), LowAndHigh));
+ return &II;
+ }
+
+ return nullptr;
+}
+
+static Instruction *foldCtpop(IntrinsicInst &II, InstCombiner &IC) {
+ assert(II.getIntrinsicID() == Intrinsic::ctpop &&
+ "Expected ctpop intrinsic");
+ Value *Op0 = II.getArgOperand(0);
+ // FIXME: Try to simplify vectors of integers.
+ auto *IT = dyn_cast<IntegerType>(Op0->getType());
+ if (!IT)
+ return nullptr;
+
+ unsigned BitWidth = IT->getBitWidth();
+ KnownBits Known(BitWidth);
+ IC.computeKnownBits(Op0, Known, 0, &II);
+
+ unsigned MinCount = Known.countMinPopulation();
+ unsigned MaxCount = Known.countMaxPopulation();
+
+ // Add range metadata since known bits can't completely reflect what we know.
+ if (IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
+ Metadata *LowAndHigh[] = {
+ ConstantAsMetadata::get(ConstantInt::get(IT, MinCount)),
+ ConstantAsMetadata::get(ConstantInt::get(IT, MaxCount + 1))};
+ II.setMetadata(LLVMContext::MD_range,
+ MDNode::get(II.getContext(), LowAndHigh));
+ return &II;
+ }
+
return nullptr;
}
return I;
break;
+ case Intrinsic::ctpop:
+ if (auto *I = foldCtpop(*II, *this))
+ return I;
+ break;
+
case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow:
case Intrinsic::umul_with_overflow:
declare i32 @llvm.ctpop.i32(i32)
declare i8 @llvm.ctpop.i8(i8)
+declare i1 @llvm.ctpop.i1(i1)
+declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
declare void @llvm.assume(i1)
define i1 @test1(i32 %arg) {
; Negative test for when we know nothing
define i1 @test4(i8 %arg) {
; CHECK-LABEL: @test4(
-; CHECK-NEXT: [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[ARG:%.*]])
+; CHECK-NEXT: [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[ARG:%.*]]), !range ![[RANGE:[0-9]+]]
; CHECK-NEXT: [[RES:%.*]] = icmp eq i8 [[CNT]], 2
; CHECK-NEXT: ret i1 [[RES]]
;
; Test when the number of possible known bits isn't one less than a power of 2
; and the compare value is greater but less than the next power of 2.
-; TODO: The icmp is unnecessary given the known bits of the input.
define i1 @test5(i32 %arg) {
; CHECK-LABEL: @test5(
-; CHECK-NEXT: [[AND:%.*]] = and i32 [[ARG:%.*]], 3
-; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.ctpop.i32(i32 [[AND]])
-; CHECK-NEXT: [[RES:%.*]] = icmp eq i32 [[CNT]], 3
-; CHECK-NEXT: ret i1 [[RES]]
+; CHECK-NEXT: ret i1 false
;
%and = and i32 %arg, 3
%cnt = call i32 @llvm.ctpop.i32(i32 %and)
%res = icmp eq i32 %cnt, 3
ret i1 %res
}
+
+; Test when the number of possible known bits isn't one less than a power of 2
+; and the compare value is greater but less than the next power of 2.
+; TODO: The icmp is unnecessary given the known bits of the input, but range
+; metadata doesn't support vectors
+define <2 x i1> @test5vec(<2 x i32> %arg) {
+; CHECK-LABEL: @test5vec(
+; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[ARG:%.*]], <i32 3, i32 3>
+; CHECK-NEXT: [[CNT:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[AND]])
+; CHECK-NEXT: [[RES:%.*]] = icmp eq <2 x i32> [[CNT]], <i32 3, i32 3>
+; CHECK-NEXT: ret <2 x i1> [[RES]]
+;
+ %and = and <2 x i32> %arg, <i32 3, i32 3>
+ %cnt = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %and)
+ %res = icmp eq <2 x i32> %cnt, <i32 3, i32 3>
+ ret <2 x i1> %res
+}
+
+; Make sure we don't add range metadata to i1 ctpop.
+define i1 @test6(i1 %arg) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT: [[CNT:%.*]] = call i1 @llvm.ctpop.i1(i1 [[ARG:%.*]])
+; CHECK-NEXT: ret i1 [[CNT]]
+;
+ %cnt = call i1 @llvm.ctpop.i1(i1 %arg)
+ ret i1 %cnt
+}
+
+; CHECK: ![[RANGE]] = !{i8 0, i8 9}
declare double @llvm.powi.f64(double, i32) nounwind readonly
declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
+declare i1 @llvm.cttz.i1(i1, i1) nounwind readnone
+declare i1 @llvm.ctlz.i1(i1, i1) nounwind readnone
declare i32 @llvm.ctpop.i32(i32) nounwind readnone
declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) nounwind readnone
declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
ret <2 x i32> %count
}
+; Make sure we don't add range metadata to i1 cttz.
+define i1 @cttz_i1(i1 %arg) {
+; CHECK-LABEL: @cttz_i1(
+; CHECK-NEXT: [[CNT:%.*]] = call i1 @llvm.cttz.i1(i1 [[ARG:%.*]], i1 false) #2
+; CHECK-NEXT: ret i1 [[CNT]]
+;
+ %cnt = call i1 @llvm.cttz.i1(i1 %arg, i1 false) nounwind readnone
+ ret i1 %cnt
+}
+
define i1 @cttz_knownbits(i32 %arg) {
; CHECK-LABEL: @cttz_knownbits(
; CHECK-NEXT: ret i1 false
define i1 @cttz_knownbits2(i32 %arg) {
; CHECK-LABEL: @cttz_knownbits2(
; CHECK-NEXT: [[OR:%.*]] = or i32 [[ARG:%.*]], 4
-; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.cttz.i32(i32 [[OR]], i1 true)
+; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.cttz.i32(i32 [[OR]], i1 true) #2, !range ![[CTTZ_RANGE:[0-9]+]]
; CHECK-NEXT: [[RES:%.*]] = icmp eq i32 [[CNT]], 2
; CHECK-NEXT: ret i1 [[RES]]
;
ret <2 x i1> %res
}
-; TODO: The icmp is unnecessary given the known bits of the input.
define i1 @cttz_knownbits3(i32 %arg) {
; CHECK-LABEL: @cttz_knownbits3(
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[ARG:%.*]], 4
-; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.cttz.i32(i32 [[OR]], i1 true) #2
-; CHECK-NEXT: [[RES:%.*]] = icmp eq i32 [[CNT]], 3
-; CHECK-NEXT: ret i1 [[RES]]
+; CHECK-NEXT: ret i1 false
;
%or = or i32 %arg, 4
%cnt = call i32 @llvm.cttz.i32(i32 %or, i1 true) nounwind readnone
ret <2 x i8> %count
}
+; Make sure we don't add range metadata to i1 ctlz.
+define i1 @ctlz_i1(i1 %arg) {
+; CHECK-LABEL: @ctlz_i1(
+; CHECK-NEXT: [[CNT:%.*]] = call i1 @llvm.ctlz.i1(i1 [[ARG:%.*]], i1 false) #2
+; CHECK-NEXT: ret i1 [[CNT]]
+;
+ %cnt = call i1 @llvm.ctlz.i1(i1 %arg, i1 false) nounwind readnone
+ ret i1 %cnt
+}
+
define i1 @ctlz_knownbits(i8 %arg) {
; CHECK-LABEL: @ctlz_knownbits(
; CHECK-NEXT: ret i1 false
define i1 @ctlz_knownbits2(i8 %arg) {
; CHECK-LABEL: @ctlz_knownbits2(
; CHECK-NEXT: [[OR:%.*]] = or i8 [[ARG:%.*]], 32
-; CHECK-NEXT: [[CNT:%.*]] = call i8 @llvm.ctlz.i8(i8 [[OR]], i1 true)
+; CHECK-NEXT: [[CNT:%.*]] = call i8 @llvm.ctlz.i8(i8 [[OR]], i1 true) #2, !range ![[CTLZ_RANGE:[0-9]+]]
; CHECK-NEXT: [[RES:%.*]] = icmp eq i8 [[CNT]], 2
; CHECK-NEXT: ret i1 [[RES]]
;
ret <2 x i1> %res
}
-; TODO: The icmp is unnecessary given the known bits of the input.
define i1 @ctlz_knownbits3(i8 %arg) {
; CHECK-LABEL: @ctlz_knownbits3(
-; CHECK-NEXT: [[OR:%.*]] = or i8 [[ARG:%.*]], 32
-; CHECK-NEXT: [[CNT:%.*]] = call i8 @llvm.ctlz.i8(i8 [[OR]], i1 true) #2
-; CHECK-NEXT: [[RES:%.*]] = icmp eq i8 [[CNT]], 3
-; CHECK-NEXT: ret i1 [[RES]]
+; CHECK-NEXT: ret i1 false
;
%or = or i8 %arg, 32
%cnt = call i8 @llvm.ctlz.i8(i8 %or, i1 true) nounwind readnone
store volatile double %C, double* %P
ret void
}
+
+; CHECK: [[CTTZ_RANGE]] = !{i32 0, i32 3}
+; CHECK: [[CTLZ_RANGE]] = !{i8 0, i8 3}