[InstCombine] narrow truncated add/sub/mul with constant

author Sanjay Patel <spatel@rotateright.com>

Fri, 4 Aug 2017 22:30:34 +0000 (22:30 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Fri, 4 Aug 2017 22:30:34 +0000 (22:30 +0000)
author Sanjay Patel <spatel@rotateright.com>
Fri, 4 Aug 2017 22:30:34 +0000 (22:30 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Fri, 4 Aug 2017 22:30:34 +0000 (22:30 +0000)
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp

index e212ced..566a143 100644 (file)
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -558,6 +558,33 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
      return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
    }
  
+  if ((!isa<IntegerType>(SrcTy) || shouldChangeType(SrcTy, DestTy)) &&
+      Src->hasOneUse()) {
+    // Add/sub/mul can always be narrowed if we're killing the high bits.
+    // If one operand is a constant, then we're not generating more
+    // instructions to perform the narrower math op.
+    Value *X;
+    Constant *C;
+    if (match(Src, m_Add(m_Value(X), m_Constant(C)))) {
+      // trunc(add X, C) --> add(trunc X, C')
+      Value *TruncX = Builder.CreateTrunc(X, DestTy);
+      Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
+      return BinaryOperator::CreateAdd(TruncX, NarrowC);
+    }
+    if (match(Src, m_Mul(m_Value(X), m_Constant(C)))) {
+      // trunc(mul X, C) --> mul(trunc X, C')
+      Value *TruncX = Builder.CreateTrunc(X, DestTy);
+      Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
+      return BinaryOperator::CreateMul(TruncX, NarrowC);
+    }
+    if (match(Src, m_Sub(m_Constant(C), m_Value(X)))) {
+      // trunc(sub C, X) --> sub(C', trunc X)
+      Value *TruncX = Builder.CreateTrunc(X, DestTy);
+      Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
+      return BinaryOperator::CreateSub(NarrowC, TruncX);
+    }
+  }
+
    // FIXME: Maybe combine the next two transforms to handle the no cast case
    // more efficiently. Support vector types. Cleanup code by using m_OneUse.
  
diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll

index 1d3e223..d121f62 100644 (file)
--- a/test/Transforms/InstCombine/cast.ll
+++ b/test/Transforms/InstCombine/cast.ll
@@ -1126,12 +1126,12 @@ define %s @test78(%s *%p, i64 %i, i64 %j, i32 %k, i32 %l, i128 %m, i128 %n) {
  
  define %s @test79(%s *%p, i64 %i, i32 %j) {
  ; CHECK-LABEL: @test79(
-; CHECK-NEXT:    [[A:%.*]] = mul nsw i64 %i, 36
-; CHECK-NEXT:    [[B:%.*]] = trunc i64 [[A]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 %i to i32
+; CHECK-NEXT:    [[B:%.*]] = mul i32 [[TMP1]], 36
  ; CHECK-NEXT:    [[C:%.*]] = mul i32 [[B]], %j
  ; CHECK-NEXT:    [[Q:%.*]] = bitcast %s* %p to i8*
-; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[C]] to i64
-; CHECK-NEXT:    [[PP:%.*]] = getelementptr inbounds i8, i8* [[Q]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[C]] to i64
+; CHECK-NEXT:    [[PP:%.*]] = getelementptr inbounds i8, i8* [[Q]], i64 [[TMP2]]
  ; CHECK-NEXT:    [[R:%.*]] = bitcast i8* [[PP]] to %s*
  ; CHECK-NEXT:    [[L:%.*]] = load %s, %s* [[R]], align 4
  ; CHECK-NEXT:    ret %s [[L]]
@@ -1239,8 +1239,8 @@ define i64 @test82(i64 %A) nounwind {
  define i64 @test83(i16 %a, i64 %k) {
  ; CHECK-LABEL: @test83(
  ; CHECK-NEXT:    [[CONV:%.*]] = sext i16 %a to i32
-; CHECK-NEXT:    [[SUB:%.*]] = add i64 %k, 4294967295
-; CHECK-NEXT:    [[SH_PROM:%.*]] = trunc i64 [[SUB]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 %k to i32
+; CHECK-NEXT:    [[SH_PROM:%.*]] = add i32 [[TMP1]], -1
  ; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[CONV]], [[SH_PROM]]
  ; CHECK-NEXT:    [[SH_PROM1:%.*]] = zext i32 [[SHL]] to i64
  ; CHECK-NEXT:    ret i64 [[SH_PROM1]]
diff --git a/test/Transforms/InstCombine/ffs-1.ll b/test/Transforms/InstCombine/ffs-1.ll

index 5dcdae1..5be47ef 100644 (file)
--- a/test/Transforms/InstCombine/ffs-1.ll
+++ b/test/Transforms/InstCombine/ffs-1.ll
@@ -148,7 +148,7 @@ define i32 @test_simplify12() {
  
  define i32 @test_simplify13(i32 %x) {
  ; ALL-LABEL: @test_simplify13(
-; ALL-NEXT:    [[CTTZ:%.*]] = call i32 @llvm.cttz.i32(i32 %x, i1 true)
+; ALL-NEXT:    [[CTTZ:%.*]] = call i32 @llvm.cttz.i32(i32 %x, i1 true), !range !0
  ; ALL-NEXT:    [[TMP1:%.*]] = add nuw nsw i32 [[CTTZ]], 1
  ; ALL-NEXT:    [[TMP2:%.*]] = icmp eq i32 %x, 0
  ; ALL-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 0, i32 [[TMP1]]
@@ -164,7 +164,7 @@ define i32 @test_simplify14(i32 %x) {
  ; GENERIC-NEXT:    ret i32 [[RET]]
  ;
  ; TARGET-LABEL: @test_simplify14(
-; TARGET-NEXT:    [[CTTZ:%.*]] = call i32 @llvm.cttz.i32(i32 %x, i1 true)
+; TARGET-NEXT:    [[CTTZ:%.*]] = call i32 @llvm.cttz.i32(i32 %x, i1 true), !range !0
  ; TARGET-NEXT:    [[TMP1:%.*]] = add nuw nsw i32 [[CTTZ]], 1
  ; TARGET-NEXT:    [[TMP2:%.*]] = icmp eq i32 %x, 0
  ; TARGET-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 0, i32 [[TMP1]]
@@ -180,9 +180,9 @@ define i32 @test_simplify15(i64 %x) {
  ; GENERIC-NEXT:    ret i32 [[RET]]
  ;
  ; TARGET-LABEL: @test_simplify15(
-; TARGET-NEXT:    [[CTTZ:%.*]] = call i64 @llvm.cttz.i64(i64 %x, i1 true)
-; TARGET-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[CTTZ]], 1
-; TARGET-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
+; TARGET-NEXT:    [[CTTZ:%.*]] = call i64 @llvm.cttz.i64(i64 %x, i1 true), !range !1
+; TARGET-NEXT:    [[TMP1:%.*]] = trunc i64 [[CTTZ]] to i32
+; TARGET-NEXT:    [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 1
  ; TARGET-NEXT:    [[TMP3:%.*]] = icmp eq i64 %x, 0
  ; TARGET-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
  ; TARGET-NEXT:    ret i32 [[TMP4]]
diff --git a/test/Transforms/InstCombine/fls.ll b/test/Transforms/InstCombine/fls.ll

index 75a3520..bf87e6a 100644 (file)
--- a/test/Transforms/InstCombine/fls.ll
+++ b/test/Transforms/InstCombine/fls.ll
@@ -31,8 +31,8 @@ define i32 @myflsll() {
  define i32 @flsnotconst(i64 %z) {
  ; CHECK-LABEL: @flsnotconst(
  ; CHECK-NEXT:    [[CTLZ:%.*]] = call i64 @llvm.ctlz.i64(i64 %z, i1 false), !range !0
-; CHECK-NEXT:    [[TMP1:%.*]] = sub nsw i64 64, [[CTLZ]]
-; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[CTLZ]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = sub nsw i32 64, [[TMP1]]
  ; CHECK-NEXT:    ret i32 [[TMP2]]
  ;
    %goo = call i32 @flsl(i64 %z)
diff --git a/test/Transforms/InstCombine/trunc.ll b/test/Transforms/InstCombine/trunc.ll

index d606815..6d0e0ac 100644 (file)
--- a/test/Transforms/InstCombine/trunc.ll
+++ b/test/Transforms/InstCombine/trunc.ll
@@ -535,8 +535,8 @@ define <8 x i8> @wide_lengthening_splat(<4 x i16> %v) {
  
  define <2 x i8> @narrow_add_vec_constant(<2 x i32> %x) {
  ; CHECK-LABEL: @narrow_add_vec_constant(
-; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i32> %x, <i32 256, i32 -129>
-; CHECK-NEXT:    [[TR:%.*]] = trunc <2 x i32> [[ADD]] to <2 x i8>
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> %x to <2 x i8>
+; CHECK-NEXT:    [[TR:%.*]] = add <2 x i8> [[TMP1]], <i8 0, i8 127>
  ; CHECK-NEXT:    ret <2 x i8> [[TR]]
  ;
    %add = add <2 x i32> %x, <i32 256, i32 -129>
@@ -544,10 +544,21 @@ define <2 x i8> @narrow_add_vec_constant(<2 x i32> %x) {
    ret <2 x i8> %tr
  }
  
+define <2 x i8> @narrow_mul_vec_constant(<2 x i32> %x) {
+; CHECK-LABEL: @narrow_mul_vec_constant(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> %x to <2 x i8>
+; CHECK-NEXT:    [[TR:%.*]] = mul <2 x i8> [[TMP1]], <i8 0, i8 127>
+; CHECK-NEXT:    ret <2 x i8> [[TR]]
+;
+  %add = mul <2 x i32> %x, <i32 256, i32 -129>
+  %tr = trunc <2 x i32> %add to <2 x i8>
+  ret <2 x i8> %tr
+}
+
  define <2 x i8> @narrow_sub_vec_constant(<2 x i32> %x) {
  ; CHECK-LABEL: @narrow_sub_vec_constant(
-; CHECK-NEXT:    [[SUB:%.*]] = sub <2 x i32> <i32 256, i32 -129>, %x
-; CHECK-NEXT:    [[TR:%.*]] = trunc <2 x i32> [[SUB]] to <2 x i8>
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> %x to <2 x i8>
+; CHECK-NEXT:    [[TR:%.*]] = sub <2 x i8> <i8 0, i8 127>, [[TMP1]]
  ; CHECK-NEXT:    ret <2 x i8> [[TR]]
  ;
    %sub = sub <2 x i32> <i32 256, i32 -129>, %x
diff --git a/test/Transforms/InstCombine/vec_phi_extract.ll b/test/Transforms/InstCombine/vec_phi_extract.ll

index 9beb98c..ea610b0 100644 (file)
--- a/test/Transforms/InstCombine/vec_phi_extract.ll
+++ b/test/Transforms/InstCombine/vec_phi_extract.ll
@@ -56,7 +56,7 @@ ret:
  define void @nocopy(i64 %val, i32  %limit, i32 *%ptr) {
  ; CHECK-LABEL: @nocopy
  ; CHECK-NOT: phi i32
-; CHECK: phi <16 x i32> [ %1, %entry ], [ %inc, %loop ]
+; CHECK: phi <16 x i32> [ %3, %entry ], [ %inc, %loop ]
  entry:
    %tempvector = insertelement <16 x i64> undef, i64 %val, i32 0
    %vector = shufflevector <16 x i64> %tempvector, <16 x i64> undef, <16 x i32> zeroinitializer
author	Sanjay Patel <spatel@rotateright.com>
	Fri, 4 Aug 2017 22:30:34 +0000 (22:30 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Fri, 4 Aug 2017 22:30:34 +0000 (22:30 +0000)
lib/Transforms/InstCombine/InstCombineCasts.cpp		patch \| blob \| history
test/Transforms/InstCombine/cast.ll		patch \| blob \| history
test/Transforms/InstCombine/ffs-1.ll		patch \| blob \| history
test/Transforms/InstCombine/fls.ll		patch \| blob \| history
test/Transforms/InstCombine/trunc.ll		patch \| blob \| history
test/Transforms/InstCombine/vec_phi_extract.ll		patch \| blob \| history