}
}
+ // lshr i32 (X -nsw Y), 31 --> zext (X < Y)
+ Value *Y;
+ if (ShAmt == BitWidth - 1 &&
+ match(Op0, m_OneUse(m_NSWSub(m_Value(X), m_Value(Y)))))
+ return new ZExtInst(Builder.CreateICmpSLT(X, Y), Ty);
+
if (match(Op0, m_LShr(m_Value(X), m_APInt(ShOp1)))) {
unsigned AmtSum = ShAmt + ShOp1->getZExtValue();
// Oversized shifts are simplified to zero in InstSimplify.
return new SExtInst(NewSh, Ty);
}
+ // ashr i32 (X -nsw Y), 31 --> sext (X < Y)
+ Value *Y;
+ if (ShAmt == BitWidth - 1 &&
+ match(Op0, m_OneUse(m_NSWSub(m_Value(X), m_Value(Y)))))
+ return new SExtInst(Builder.CreateICmpSLT(X, Y), Ty);
+
// If the shifted-out value is known-zero, then this is an exact shift.
if (!I.isExact() &&
MaskedValueIsZero(Op0, APInt::getLowBitsSet(BitWidth, ShAmt), 0, &I)) {
define i32 @lshr_sub_nsw(i32 %x, i32 %y) {
; CHECK-LABEL: @lshr_sub_nsw(
-; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[SUB]], 31
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[SHR:%.*]] = zext i1 [[TMP1]] to i32
; CHECK-NEXT: ret i32 [[SHR]]
;
%sub = sub nsw i32 %x, %y
ret i32 %shr
}
+; negative test - must shift sign-bit
+
define i32 @lshr_sub_wrong_amount(i32 %x, i32 %y) {
; CHECK-LABEL: @lshr_sub_wrong_amount(
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[X:%.*]], [[Y:%.*]]
ret i32 %shr
}
+; negative test - must have nsw
+
define i32 @lshr_sub(i32 %x, i32 %y) {
; CHECK-LABEL: @lshr_sub(
; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[X:%.*]], [[Y:%.*]]
ret i32 %shr
}
+; negative test - one-use
+
define i32 @lshr_sub_nsw_extra_use(i32 %x, i32 %y, i32* %p) {
; CHECK-LABEL: @lshr_sub_nsw_extra_use(
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[X:%.*]], [[Y:%.*]]
define <3 x i42> @lshr_sub_nsw_splat(<3 x i42> %x, <3 x i42> %y) {
; CHECK-LABEL: @lshr_sub_nsw_splat(
-; CHECK-NEXT: [[SUB:%.*]] = sub nsw <3 x i42> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[SHR:%.*]] = lshr <3 x i42> [[SUB]], <i42 41, i42 41, i42 41>
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <3 x i42> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[SHR:%.*]] = zext <3 x i1> [[TMP1]] to <3 x i42>
; CHECK-NEXT: ret <3 x i42> [[SHR]]
;
%sub = sub nsw <3 x i42> %x, %y
define i17 @ashr_sub_nsw(i17 %x, i17 %y) {
; CHECK-LABEL: @ashr_sub_nsw(
-; CHECK-NEXT: [[SUB:%.*]] = sub nsw i17 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[SHR:%.*]] = ashr i17 [[SUB]], 16
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i17 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[SHR:%.*]] = sext i1 [[TMP1]] to i17
; CHECK-NEXT: ret i17 [[SHR]]
;
%sub = sub nsw i17 %x, %y
ret i17 %shr
}
+; negative test - must shift sign-bit
+
define i17 @ashr_sub_wrong_amount(i17 %x, i17 %y) {
; CHECK-LABEL: @ashr_sub_wrong_amount(
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i17 [[X:%.*]], [[Y:%.*]]
ret i17 %shr
}
+; negative test - must have nsw
+
define i32 @ashr_sub(i32 %x, i32 %y) {
; CHECK-LABEL: @ashr_sub(
; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[X:%.*]], [[Y:%.*]]
ret i32 %shr
}
+; negative test - one-use
+
define i32 @ashr_sub_nsw_extra_use(i32 %x, i32 %y, i32* %p) {
; CHECK-LABEL: @ashr_sub_nsw_extra_use(
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[X:%.*]], [[Y:%.*]]
define <3 x i43> @ashr_sub_nsw_splat(<3 x i43> %x, <3 x i43> %y) {
; CHECK-LABEL: @ashr_sub_nsw_splat(
-; CHECK-NEXT: [[SUB:%.*]] = sub nsw <3 x i43> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[SHR:%.*]] = ashr <3 x i43> [[SUB]], <i43 42, i43 42, i43 42>
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <3 x i43> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[SHR:%.*]] = sext <3 x i1> [[TMP1]] to <3 x i43>
; CHECK-NEXT: ret <3 x i43> [[SHR]]
;
%sub = sub nsw <3 x i43> %x, %y
define i8 @sub_ashr_and_i8(i8 %x, i8 %y) {
; CHECK-LABEL: @sub_ashr_and_i8(
-; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i8 [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: [[AND:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 0
; CHECK-NEXT: ret i8 [[AND]]
;
define i16 @sub_ashr_and_i16(i16 %x, i16 %y) {
; CHECK-LABEL: @sub_ashr_and_i16(
-; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i16 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i16 [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: [[AND:%.*]] = select i1 [[TMP1]], i16 [[X]], i16 0
; CHECK-NEXT: ret i16 [[AND]]
;
define i32 @sub_ashr_and_i32(i32 %x, i32 %y) {
; CHECK-LABEL: @sub_ashr_and_i32(
-; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: [[AND:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 0
; CHECK-NEXT: ret i32 [[AND]]
;
define i64 @sub_ashr_and_i64(i64 %x, i64 %y) {
; CHECK-LABEL: @sub_ashr_and_i64(
-; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i64 [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: [[AND:%.*]] = select i1 [[TMP1]], i64 [[X]], i64 0
; CHECK-NEXT: ret i64 [[AND]]
;
define i32 @sub_ashr_and_i32_nuw_nsw(i32 %x, i32 %y) {
; CHECK-LABEL: @sub_ashr_and_i32_nuw_nsw(
-; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: [[AND:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 0
; CHECK-NEXT: ret i32 [[AND]]
;
define i32 @sub_ashr_and_i32_commute(i32 %x, i32 %y) {
; CHECK-LABEL: @sub_ashr_and_i32_commute(
-; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: [[AND:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 0
; CHECK-NEXT: ret i32 [[AND]]
;
define <4 x i32> @sub_ashr_and_i32_vec(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: @sub_ashr_and_i32_vec(
-; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: [[AND:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[X]], <4 x i32> zeroinitializer
; CHECK-NEXT: ret <4 x i32> [[AND]]
;
define <4 x i32> @sub_ashr_and_i32_vec_nuw_nsw(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: @sub_ashr_and_i32_vec_nuw_nsw(
-; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: [[AND:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[X]], <4 x i32> zeroinitializer
; CHECK-NEXT: ret <4 x i32> [[AND]]
;
define <4 x i32> @sub_ashr_and_i32_vec_commute(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: @sub_ashr_and_i32_vec_commute(
-; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: [[AND:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[X]], <4 x i32> zeroinitializer
; CHECK-NEXT: ret <4 x i32> [[AND]]
;
define i32 @sub_ashr_and_i32_extra_use_and(i32 %x, i32 %y, i32* %p) {
; CHECK-LABEL: @sub_ashr_and_i32_extra_use_and(
-; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: [[AND:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 0
; CHECK-NEXT: store i32 [[AND]], i32* [[P:%.*]], align 4
; CHECK-NEXT: ret i32 [[AND]]
define i32 @sub_ashr_and_i32_extra_use_ashr(i32 %x, i32 %y, i32* %p) {
; CHECK-LABEL: @sub_ashr_and_i32_extra_use_ashr(
-; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[SUB]], 31
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT: [[SHR:%.*]] = sext i1 [[TMP1]] to i32
; CHECK-NEXT: store i32 [[SHR]], i32* [[P:%.*]], align 4
; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHR]], [[X]]
; CHECK-NEXT: ret i32 [[AND]]
define i8 @sub_ashr_or_i8(i8 %x, i8 %y) {
; CHECK-LABEL: @sub_ashr_or_i8(
-; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i8 [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: [[OR:%.*]] = select i1 [[TMP1]], i8 -1, i8 [[X]]
; CHECK-NEXT: ret i8 [[OR]]
;
define i16 @sub_ashr_or_i16(i16 %x, i16 %y) {
; CHECK-LABEL: @sub_ashr_or_i16(
-; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i16 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i16 [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: [[OR:%.*]] = select i1 [[TMP1]], i16 -1, i16 [[X]]
; CHECK-NEXT: ret i16 [[OR]]
;
define i32 @sub_ashr_or_i32(i32 %x, i32 %y) {
; CHECK-LABEL: @sub_ashr_or_i32(
-; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: [[OR:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[X]]
; CHECK-NEXT: ret i32 [[OR]]
;
define i64 @sub_ashr_or_i64(i64 %x, i64 %y) {
; CHECK-LABEL: @sub_ashr_or_i64(
-; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i64 [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: [[OR:%.*]] = select i1 [[TMP1]], i64 -1, i64 [[X]]
; CHECK-NEXT: ret i64 [[OR]]
;
define i32 @sub_ashr_or_i32_nuw_nsw(i32 %x, i32 %y) {
; CHECK-LABEL: @sub_ashr_or_i32_nuw_nsw(
-; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: [[OR:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[X]]
; CHECK-NEXT: ret i32 [[OR]]
;
define i32 @sub_ashr_or_i32_commute(i32 %x, i32 %y) {
; CHECK-LABEL: @sub_ashr_or_i32_commute(
-; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: [[OR:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[X]]
; CHECK-NEXT: ret i32 [[OR]]
;
define <4 x i32> @sub_ashr_or_i32_vec(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: @sub_ashr_or_i32_vec(
-; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> [[X]]
; CHECK-NEXT: ret <4 x i32> [[OR]]
;
define <4 x i32> @sub_ashr_or_i32_vec_nuw_nsw(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: @sub_ashr_or_i32_vec_nuw_nsw(
-; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> [[X]]
; CHECK-NEXT: ret <4 x i32> [[OR]]
;
define <4 x i32> @sub_ashr_or_i32_vec_commute(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: @sub_ashr_or_i32_vec_commute(
-; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> [[X]]
; CHECK-NEXT: ret <4 x i32> [[OR]]
;
define i32 @sub_ashr_or_i32_extra_use_or(i32 %x, i32 %y, i32* %p) {
; CHECK-LABEL: @sub_ashr_or_i32_extra_use_or(
-; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: [[OR:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[X]]
; CHECK-NEXT: store i32 [[OR]], i32* [[P:%.*]], align 4
; CHECK-NEXT: ret i32 [[OR]]
define i32 @sub_ashr_or_i32_extra_use_ashr(i32 %x, i32 %y, i32* %p) {
; CHECK-LABEL: @sub_ashr_or_i32_extra_use_ashr(
-; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[SUB]], 31
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT: [[SHR:%.*]] = sext i1 [[TMP1]] to i32
; CHECK-NEXT: store i32 [[SHR]], i32* [[P:%.*]], align 4
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[X]]
; CHECK-NEXT: ret i32 [[OR]]