Name: narrow_sub
%sub = sub i32 C1, %x
%r = trunc i32 %sub to i8
=>
%xn = trunc i32 %x to i8
%narrowC = trunc i32 C1 to i8
%r = sub i8 %narrowC, %xn
Name: narrow_add
%add = add i32 %x, C1
%r = trunc i32 %add to i8
=>
%xn = trunc i32 %x to i8
%narrowC = trunc i32 C1 to i8
%r = add i8 %xn, %narrowC
Name: narrow_mul
%mul = mul i32 %x, C1
%r = trunc i32 %mul to i8
=>
%xn = trunc i32 %x to i8
%narrowC = trunc i32 C1 to i8
%r = mul i8 %xn, %narrowC
http://rise4fun.com/Alive/QpS
This doesn't solve PR34046 (failure to recognize rotate):
https://bugs.llvm.org/show_bug.cgi?id=34046
...but it reduces an extra complication in the description examples
to a form that we can more easily match.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@310141
91177308-0d34-0410-b5e6-
96231b3b80d8
return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
}
+ if ((!isa<IntegerType>(SrcTy) || shouldChangeType(SrcTy, DestTy)) &&
+ Src->hasOneUse()) {
+ // Add/sub/mul can always be narrowed if we're killing the high bits.
+ // If one operand is a constant, then we're not generating more
+ // instructions to perform the narrower math op.
+ Value *X;
+ Constant *C;
+ if (match(Src, m_Add(m_Value(X), m_Constant(C)))) {
+ // trunc(add X, C) --> add(trunc X, C')
+ Value *TruncX = Builder.CreateTrunc(X, DestTy);
+ Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
+ return BinaryOperator::CreateAdd(TruncX, NarrowC);
+ }
+ if (match(Src, m_Mul(m_Value(X), m_Constant(C)))) {
+ // trunc(mul X, C) --> mul(trunc X, C')
+ Value *TruncX = Builder.CreateTrunc(X, DestTy);
+ Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
+ return BinaryOperator::CreateMul(TruncX, NarrowC);
+ }
+ if (match(Src, m_Sub(m_Constant(C), m_Value(X)))) {
+ // trunc(sub C, X) --> sub(C', trunc X)
+ Value *TruncX = Builder.CreateTrunc(X, DestTy);
+ Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
+ return BinaryOperator::CreateSub(NarrowC, TruncX);
+ }
+ }
+
// FIXME: Maybe combine the next two transforms to handle the no cast case
// more efficiently. Support vector types. Cleanup code by using m_OneUse.
define %s @test79(%s *%p, i64 %i, i32 %j) {
; CHECK-LABEL: @test79(
-; CHECK-NEXT: [[A:%.*]] = mul nsw i64 %i, 36
-; CHECK-NEXT: [[B:%.*]] = trunc i64 [[A]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 %i to i32
+; CHECK-NEXT: [[B:%.*]] = mul i32 [[TMP1]], 36
; CHECK-NEXT: [[C:%.*]] = mul i32 [[B]], %j
; CHECK-NEXT: [[Q:%.*]] = bitcast %s* %p to i8*
-; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[C]] to i64
-; CHECK-NEXT: [[PP:%.*]] = getelementptr inbounds i8, i8* [[Q]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[C]] to i64
+; CHECK-NEXT: [[PP:%.*]] = getelementptr inbounds i8, i8* [[Q]], i64 [[TMP2]]
; CHECK-NEXT: [[R:%.*]] = bitcast i8* [[PP]] to %s*
; CHECK-NEXT: [[L:%.*]] = load %s, %s* [[R]], align 4
; CHECK-NEXT: ret %s [[L]]
define i64 @test83(i16 %a, i64 %k) {
; CHECK-LABEL: @test83(
; CHECK-NEXT: [[CONV:%.*]] = sext i16 %a to i32
-; CHECK-NEXT: [[SUB:%.*]] = add i64 %k, 4294967295
-; CHECK-NEXT: [[SH_PROM:%.*]] = trunc i64 [[SUB]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 %k to i32
+; CHECK-NEXT: [[SH_PROM:%.*]] = add i32 [[TMP1]], -1
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[CONV]], [[SH_PROM]]
; CHECK-NEXT: [[SH_PROM1:%.*]] = zext i32 [[SHL]] to i64
; CHECK-NEXT: ret i64 [[SH_PROM1]]
define i32 @test_simplify13(i32 %x) {
; ALL-LABEL: @test_simplify13(
-; ALL-NEXT: [[CTTZ:%.*]] = call i32 @llvm.cttz.i32(i32 %x, i1 true)
+; ALL-NEXT: [[CTTZ:%.*]] = call i32 @llvm.cttz.i32(i32 %x, i1 true), !range !0
; ALL-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[CTTZ]], 1
; ALL-NEXT: [[TMP2:%.*]] = icmp eq i32 %x, 0
; ALL-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 0, i32 [[TMP1]]
; GENERIC-NEXT: ret i32 [[RET]]
;
; TARGET-LABEL: @test_simplify14(
-; TARGET-NEXT: [[CTTZ:%.*]] = call i32 @llvm.cttz.i32(i32 %x, i1 true)
+; TARGET-NEXT: [[CTTZ:%.*]] = call i32 @llvm.cttz.i32(i32 %x, i1 true), !range !0
; TARGET-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[CTTZ]], 1
; TARGET-NEXT: [[TMP2:%.*]] = icmp eq i32 %x, 0
; TARGET-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 0, i32 [[TMP1]]
; GENERIC-NEXT: ret i32 [[RET]]
;
; TARGET-LABEL: @test_simplify15(
-; TARGET-NEXT: [[CTTZ:%.*]] = call i64 @llvm.cttz.i64(i64 %x, i1 true)
-; TARGET-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[CTTZ]], 1
-; TARGET-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
+; TARGET-NEXT: [[CTTZ:%.*]] = call i64 @llvm.cttz.i64(i64 %x, i1 true), !range !1
+; TARGET-NEXT: [[TMP1:%.*]] = trunc i64 [[CTTZ]] to i32
+; TARGET-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 1
; TARGET-NEXT: [[TMP3:%.*]] = icmp eq i64 %x, 0
; TARGET-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
; TARGET-NEXT: ret i32 [[TMP4]]
define i32 @flsnotconst(i64 %z) {
; CHECK-LABEL: @flsnotconst(
; CHECK-NEXT: [[CTLZ:%.*]] = call i64 @llvm.ctlz.i64(i64 %z, i1 false), !range !0
-; CHECK-NEXT: [[TMP1:%.*]] = sub nsw i64 64, [[CTLZ]]
-; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[CTLZ]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = sub nsw i32 64, [[TMP1]]
; CHECK-NEXT: ret i32 [[TMP2]]
;
%goo = call i32 @flsl(i64 %z)
define <2 x i8> @narrow_add_vec_constant(<2 x i32> %x) {
; CHECK-LABEL: @narrow_add_vec_constant(
-; CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> %x, <i32 256, i32 -129>
-; CHECK-NEXT: [[TR:%.*]] = trunc <2 x i32> [[ADD]] to <2 x i8>
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> %x to <2 x i8>
+; CHECK-NEXT: [[TR:%.*]] = add <2 x i8> [[TMP1]], <i8 0, i8 127>
; CHECK-NEXT: ret <2 x i8> [[TR]]
;
%add = add <2 x i32> %x, <i32 256, i32 -129>
ret <2 x i8> %tr
}
+define <2 x i8> @narrow_mul_vec_constant(<2 x i32> %x) {
+; CHECK-LABEL: @narrow_mul_vec_constant(
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> %x to <2 x i8>
+; CHECK-NEXT: [[TR:%.*]] = mul <2 x i8> [[TMP1]], <i8 0, i8 127>
+; CHECK-NEXT: ret <2 x i8> [[TR]]
+;
+ %add = mul <2 x i32> %x, <i32 256, i32 -129>
+ %tr = trunc <2 x i32> %add to <2 x i8>
+ ret <2 x i8> %tr
+}
+
define <2 x i8> @narrow_sub_vec_constant(<2 x i32> %x) {
; CHECK-LABEL: @narrow_sub_vec_constant(
-; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i32> <i32 256, i32 -129>, %x
-; CHECK-NEXT: [[TR:%.*]] = trunc <2 x i32> [[SUB]] to <2 x i8>
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> %x to <2 x i8>
+; CHECK-NEXT: [[TR:%.*]] = sub <2 x i8> <i8 0, i8 127>, [[TMP1]]
; CHECK-NEXT: ret <2 x i8> [[TR]]
;
%sub = sub <2 x i32> <i32 256, i32 -129>, %x
define void @nocopy(i64 %val, i32 %limit, i32 *%ptr) {
; CHECK-LABEL: @nocopy
; CHECK-NOT: phi i32
-; CHECK: phi <16 x i32> [ %1, %entry ], [ %inc, %loop ]
+; CHECK: phi <16 x i32> [ %3, %entry ], [ %inc, %loop ]
entry:
%tempvector = insertelement <16 x i64> undef, i64 %val, i32 0
%vector = shufflevector <16 x i64> %tempvector, <16 x i64> undef, <16 x i32> zeroinitializer