Srl_imm)) {
assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
+ // Mask off the unnecessary bits of the AND immediate; normally
+ // DAGCombine will do this, but that might not happen if
+ // targetShrinkDemandedConstant chooses a different immediate.
+ And_imm &= -1U >> Srl_imm;
+
// Note: The width operand is encoded as width-1.
unsigned Width = countTrailingOnes(And_imm) - 1;
unsigned LSB = Srl_imm;
}
}
+bool
+ARMTargetLowering::targetShrinkDemandedConstant(SDValue Op,
+ const APInt &DemandedAPInt,
+ TargetLoweringOpt &TLO) const {
+ // Delay optimization, so we don't have to deal with illegal types, or block
+ // optimizations.
+ if (!TLO.LegalOps)
+ return false;
+
+ // Only optimize AND for now.
+ if (Op.getOpcode() != ISD::AND)
+ return false;
+
+ EVT VT = Op.getValueType();
+
+ // Ignore vectors.
+ if (VT.isVector())
+ return false;
+
+ assert(VT == MVT::i32 && "Unexpected integer type");
+
+ // Make sure the RHS really is a constant.
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (!C)
+ return false;
+
+ unsigned Mask = C->getZExtValue();
+
+ // If mask is zero, nothing to do.
+ if (!Mask)
+ return false;
+
+ unsigned Demanded = DemandedAPInt.getZExtValue();
+ unsigned ShrunkMask = Mask & Demanded;
+ unsigned ExpandedMask = Mask | ~Demanded;
+
+ auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
+ return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
+ };
+ auto UseMask = [this, Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
+ if (NewMask == Mask)
+ return true;
+ SDLoc DL(Op);
+ SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
+ SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
+ return TLO.CombineTo(Op, NewOp);
+ };
+
+ // Prefer uxtb mask.
+ if (IsLegalMask(0xFF))
+ return UseMask(0xFF);
+
+ // Prefer uxth mask.
+ if (IsLegalMask(0xFFFF))
+ return UseMask(0xFFFF);
+
+ // [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2.
+ // FIXME: Prefer a contiguous sequence of bits for other optimizations.
+ if (ShrunkMask < 256)
+ return UseMask(ShrunkMask);
+
+ // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2.
+ // FIXME: Prefer a contiguous sequence of bits for other optimizations.
+ if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256)
+ return UseMask(ExpandedMask);
+
+ // Potential improvements:
+ //
+ // We could try to recognize lsls+lsrs or lsrs+lsls pairs here.
+ // We could try to prefer Thumb1 immediates which can be lowered to a
+ // two-instruction sequence.
+ // We could try to recognize more legal ARM/Thumb2 immediates here.
+
+ return false;
+}
+
+
//===----------------------------------------------------------------------===//
// ARM Inline Assembly Support
//===----------------------------------------------------------------------===//
const SelectionDAG &DAG,
unsigned Depth) const override;
+ bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
+ TargetLoweringOpt &TLO) const override;
+
bool ExpandInlineAsm(CallInst *CI) const override;
; because we do not have the kill flag on R0.
; CHECK: mov.w [[R1:lr]], #7
; CHECK: add.w [[R0:r[0-9]+]], [[R1]], [[R0]], lsl #2
-; CHECK: bic [[R0]], [[R0]], #7
+; CHECK: bic [[R0]], [[R0]], #4
; CHECK: lsrs r4, [[R0]], #2
; CHECK: bl __chkstk
; CHECK: sub.w sp, sp, r4
}
; CHECK-SMALL-CODE: adds [[R4:r[0-9]+]], #7
-; CHECK-SMALL-CODE: bic [[R4]], [[R4]], #7
+; CHECK-SMALL-CODE: bic [[R4]], [[R4]], #4
; CHECK-SMALL-CODE: lsrs r4, [[R4]], #2
; CHECK-SMALL-CODE: bl __chkstk
; CHECK-SMALL-CODE: sub.w sp, sp, r4
; CHECK-LARGE-CODE: adds [[R4:r[0-9]+]], #7
-; CHECK-LARGE-CODE: bic [[R4]], [[R4]], #7
+; CHECK-LARGE-CODE: bic [[R4]], [[R4]], #4
; CHECK-LARGE-CODE: lsrs r4, [[R4]], #2
; CHECK-LARGE-CODE: movw [[IP:r[0-9]+]], :lower16:__chkstk
; CHECK-LARGE-CODE: movt [[IP]], :upper16:__chkstk
}
; CHECK-LABEL: i16_cmpz:
-; T1: movs r2, #127
-; T1-NEXT: lsls r2, r2, #9
-; T1-NEXT: ands r2, r0
-; T1-NEXT: lsrs r0, r2, #9
-; T2: and r0, r0, #65024
+; T1: uxth r0, r0
+; T1-NEXT: lsrs r0, r0, #9
+; T1-NEXT: bne
+; T2: uxth r0, r0
; T2-NEXT: movs r2, #0
; T2-NEXT: cmp.w r2, r0, lsr #9
define void @i16_cmpz(i16 %x, void (i32)* %foo) {
; LE-LABEL: i24_and_or:
; LE: @ %bb.0:
; LE-NEXT: ldrh r1, [r0]
-; LE-NEXT: mov r2, #16256
-; LE-NEXT: orr r2, r2, #49152
; LE-NEXT: orr r1, r1, #384
-; LE-NEXT: and r1, r1, r2
+; LE-NEXT: bic r1, r1, #127
; LE-NEXT: strh r1, [r0]
; LE-NEXT: mov pc, lr
;
; CHECK-NEXT: mov r1, #1
; CHECK-NEXT: tst r0, #1
; CHECK-NEXT: orr r1, r1, #65536
-; CHECK-NEXT: mov r0, r1
-; CHECK-NEXT: moveq r0, #23
-; CHECK-NEXT: and r0, r0, r1
+; CHECK-NEXT: moveq r1, #23
+; CHECK-NEXT: bic r0, r1, #22
; CHECK-NEXT: mov r1, #0
; CHECK-NEXT: mov pc, lr
%sel = select i1 %cond, i64 65537, i64 23
define void @truncated(i16 %a, i16* %p) {
; CHECK-T1-LABEL: truncated:
; CHECK-T1: @ %bb.0:
-; CHECK-T1-NEXT: ldr r2, .LCPI2_0
+; CHECK-T1-NEXT: movs r2, #128
+; CHECK-T1-NEXT: bics r0, r2
+; CHECK-T1-NEXT: strh r0, [r1]
+; CHECK-T1-NEXT: bx lr
+;
+; CHECK-T2-LABEL: truncated:
+; CHECK-T2: @ %bb.0:
+; CHECK-T2-NEXT: bic r0, r0, #128
+; CHECK-T2-NEXT: strh r0, [r1]
+; CHECK-T2-NEXT: bx lr
+ %and = and i16 %a, -129
+ store i16 %and, i16* %p
+ ret void
+}
+
+define void @truncated_neg2(i16 %a, i16* %p) {
+; CHECK-T1-LABEL: truncated_neg2:
+; CHECK-T1: @ %bb.0:
+; CHECK-T1-NEXT: movs r2, #1
+; CHECK-T1-NEXT: bics r0, r2
+; CHECK-T1-NEXT: strh r0, [r1]
+; CHECK-T1-NEXT: bx lr
+;
+; CHECK-T2-LABEL: truncated_neg2:
+; CHECK-T2: @ %bb.0:
+; CHECK-T2-NEXT: bic r0, r0, #1
+; CHECK-T2-NEXT: strh r0, [r1]
+; CHECK-T2-NEXT: bx lr
+ %and = and i16 %a, -2
+ store i16 %and, i16* %p
+ ret void
+}
+
+define void @truncated_neg256(i16 %a, i16* %p) {
+; CHECK-T1-LABEL: truncated_neg256:
+; CHECK-T1: @ %bb.0:
+; CHECK-T1-NEXT: movs r2, #255
+; CHECK-T1-NEXT: bics r0, r2
+; CHECK-T1-NEXT: strh r0, [r1]
+; CHECK-T1-NEXT: bx lr
+;
+; CHECK-T2-LABEL: truncated_neg256:
+; CHECK-T2: @ %bb.0:
+; CHECK-T2-NEXT: bic r0, r0, #255
+; CHECK-T2-NEXT: strh r0, [r1]
+; CHECK-T2-NEXT: bx lr
+ %and = and i16 %a, -256
+ store i16 %and, i16* %p
+ ret void
+}
+
+; FIXME: Thumb2 supports "bic r0, r0, #510"
+define void @truncated_neg511(i16 %a, i16* %p) {
+; CHECK-T1-LABEL: truncated_neg511:
+; CHECK-T1: @ %bb.0:
+; CHECK-T1-NEXT: ldr r2, .LCPI5_0
; CHECK-T1-NEXT: ands r2, r0
; CHECK-T1-NEXT: strh r2, [r1]
; CHECK-T1-NEXT: bx lr
; CHECK-T1-NEXT: .p2align 2
; CHECK-T1-NEXT: @ %bb.1:
-; CHECK-T1-NEXT: .LCPI2_0:
-; CHECK-T1-NEXT: .long 65407 @ 0xff7f
+; CHECK-T1-NEXT: .LCPI5_0:
+; CHECK-T1-NEXT: .long 65025 @ 0xfe01
;
-; CHECK-T2-LABEL: truncated:
+; CHECK-T2-LABEL: truncated_neg511:
; CHECK-T2: @ %bb.0:
-; CHECK-T2-NEXT: movw r2, #65407
+; CHECK-T2-NEXT: movw r2, #65025
; CHECK-T2-NEXT: ands r0, r2
; CHECK-T2-NEXT: strh r0, [r1]
; CHECK-T2-NEXT: bx lr
- %and = and i16 %a, -129
+ %and = and i16 %a, -511
store i16 %and, i16* %p
ret void
}
; CHECK-LABEL: test4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: lsls r0, r0, #4
-; CHECK-NEXT: movs r1, #127
+; CHECK-NEXT: movs r1, #112
; CHECK-NEXT: bics r0, r1
; CHECK-NEXT: bx lr
entry:
define i32 @test9(i32 %x) {
; CHECK-LABEL: test9:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: lsrs r1, r0, #2
-; CHECK-NEXT: ldr r0, .LCPI8_0
-; CHECK-NEXT: ands r0, r1
+; CHECK-NEXT: lsrs r0, r0, #2
+; CHECK-NEXT: movs r1, #1
+; CHECK-NEXT: bics r0, r1
; CHECK-NEXT: bx lr
-; CHECK-NEXT: .p2align 2
-; CHECK-NEXT: @ %bb.1:
-; CHECK-NEXT: .LCPI8_0:
-; CHECK-NEXT: .long 1073741822 @ 0x3ffffffe
entry:
%and = lshr i32 %x, 2
%shr = and i32 %and, 1073741822
ret i32 %shr
}
+
+define i32 @test10(i32 %x) {
+; CHECK-LABEL: test10:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: lsls r0, r0, #2
+; CHECK-NEXT: uxtb r0, r0
+; CHECK-NEXT: bx lr
+entry:
+ %0 = shl i32 %x, 2
+ %shr = and i32 %0, 255
+ ret i32 %shr
+}