MaxRecurse))
return V;
+ // Assuming the effective width of Y is not larger than A, i.e. all bits
+ // from X and Y are disjoint in (X << A) | Y,
+ // if the mask of this AND op covers all bits of X or Y, while it covers
+ // no bits from the other, we can bypass this AND op. E.g.,
+ // ((X << A) | Y) & Mask -> Y,
+ // if Mask = ((1 << effective_width_of(Y)) - 1)
+ // ((X << A) | Y) & Mask -> X << A,
+ // if Mask = ((1 << effective_width_of(X)) - 1) << A
+ // SimplifyDemandedBits in InstCombine can optimize the general case.
+ // This pattern aims to help other passes for a common case.
+ Value *Y, *XShifted;
+ if (match(Op1, m_APInt(Mask)) &&
+ match(Op0, m_c_Or(m_CombineAnd(m_NUWShl(m_Value(X), m_APInt(ShAmt)),
+ m_Value(XShifted)),
+ m_Value(Y)))) {
+ const unsigned ShftCnt = ShAmt->getZExtValue();
+ const KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ const unsigned Width = Op0->getType()->getScalarSizeInBits();
+ const unsigned EffWidthY = Width - YKnown.countMinLeadingZeros();
+ if (EffWidthY <= ShftCnt) {
+ const KnownBits XKnown = computeKnownBits(X, Q.DL, 0, Q.AC, Q.CxtI,
+ Q.DT);
+ const unsigned EffWidthX = Width - XKnown.countMinLeadingZeros();
+ const APInt EffBitsY = APInt::getLowBitsSet(Width, EffWidthY);
+ const APInt EffBitsX = APInt::getLowBitsSet(Width, EffWidthX) << ShftCnt;
+ // If the mask is extracting all bits from X or Y as is, we can skip
+ // this AND op.
+ if (EffBitsY.isSubsetOf(*Mask) && !EffBitsX.intersects(*Mask))
+ return Y;
+ if (EffBitsX.isSubsetOf(*Mask) && !EffBitsY.intersects(*Mask))
+ return XShifted;
+ }
+ }
+
return nullptr;
}
define i64 @shl_or_and1(i32 %a, i1 %b) {
; CHECK-LABEL: @shl_or_and1(
-; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[A:%.*]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[B:%.*]] to i64
-; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 32
-; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 1
-; CHECK-NEXT: ret i64 [[TMP5]]
+; CHECK-NEXT: ret i64 [[TMP2]]
;
%tmp1 = zext i32 %a to i64
%tmp2 = zext i1 %b to i64
define i64 @shl_or_and2(i32 %a, i1 %b) {
; CHECK-LABEL: @shl_or_and2(
; CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[B:%.*]] to i64
-; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[A:%.*]] to i64
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 32
-; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 4294967296
-; CHECK-NEXT: ret i64 [[TMP5]]
+; CHECK-NEXT: ret i64 [[TMP3]]
;
%tmp1 = zext i1 %b to i64
%tmp2 = zext i32 %a to i64
ret i64 %tmp5
}
-; concatinate two 32-bit integers and extract lower 32-bit
+; concatenate two 32-bit integers and extract lower 32-bit
define i64 @shl_or_and3(i32 %a, i32 %b) {
; CHECK-LABEL: @shl_or_and3(
-; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[A:%.*]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[B:%.*]] to i64
-; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 32
-; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 4294967295
-; CHECK-NEXT: ret i64 [[TMP5]]
+; CHECK-NEXT: ret i64 [[TMP2]]
;
%tmp1 = zext i32 %a to i64
%tmp2 = zext i32 %b to i64
ret i64 %tmp5
}
-; concatinate two 16-bit integers and extract higher 16-bit
+; concatenate two 16-bit integers and extract higher 16-bit
define i32 @shl_or_and4(i16 %a, i16 %b) {
; CHECK-LABEL: @shl_or_and4(
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP1]], 16
-; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], -65536
-; CHECK-NEXT: ret i32 [[TMP5]]
+; CHECK-NEXT: ret i32 [[TMP3]]
;
%tmp1 = zext i16 %a to i32
%tmp2 = zext i16 %b to i32
define i128 @shl_or_and5(i64 %a, i1 %b) {
; CHECK-LABEL: @shl_or_and5(
-; CHECK-NEXT: [[TMP1:%.*]] = zext i64 [[A:%.*]] to i128
; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[B:%.*]] to i128
-; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i128 [[TMP1]], 64
-; CHECK-NEXT: [[TMP4:%.*]] = or i128 [[TMP2]], [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = and i128 [[TMP4]], 1
-; CHECK-NEXT: ret i128 [[TMP5]]
+; CHECK-NEXT: ret i128 [[TMP2]]
;
%tmp1 = zext i64 %a to i128
%tmp2 = zext i1 %b to i128
define <2 x i64> @shl_or_and1v(<2 x i32> %a, <2 x i1> %b) {
; CHECK-LABEL: @shl_or_and1v(
-; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64>
; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i64>
-; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP1]], <i64 32, i64 32>
-; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i64> [[TMP3]], [[TMP2]]
-; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i64> [[TMP4]], <i64 1, i64 1>
-; CHECK-NEXT: ret <2 x i64> [[TMP5]]
+; CHECK-NEXT: ret <2 x i64> [[TMP2]]
;
%tmp1 = zext <2 x i32> %a to <2 x i64>
%tmp2 = zext <2 x i1> %b to <2 x i64>
define <2 x i64> @shl_or_and2v(<2 x i32> %a, <2 x i1> %b) {
; CHECK-LABEL: @shl_or_and2v(
; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i64>
-; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64>
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP1]], <i64 32, i64 32>
-; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i64> [[TMP2]], [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i64> [[TMP4]], <i64 4294967296, i64 4294967296>
-; CHECK-NEXT: ret <2 x i64> [[TMP5]]
+; CHECK-NEXT: ret <2 x i64> [[TMP3]]
;
%tmp1 = zext <2 x i1> %b to <2 x i64>
%tmp2 = zext <2 x i32> %a to <2 x i64>