From 7627d9c2297151bebe5b6ae06252d2434c8b445a Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Fri, 26 Dec 2014 09:50:35 +0000 Subject: [PATCH] InstCombine: Infer nuw for multiplies A multiply cannot unsigned wrap if there are bitwidth, or more, leading zero bits between the two operands. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@224849 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstCombine.h | 1 + .../InstCombine/InstCombineMulDivRem.cpp | 38 ++++++++++++++++++++++ test/Transforms/InstCombine/intrinsics.ll | 2 +- test/Transforms/InstCombine/mul.ll | 22 ++++++++++--- 4 files changed, 57 insertions(+), 6 deletions(-) diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index d6eb6d42d57..b4d1efc1a92 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -286,6 +286,7 @@ private: bool WillNotOverflowSignedSub(Value *LHS, Value *RHS, Instruction *CxtI); bool WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, Instruction *CxtI); bool WillNotOverflowSignedMul(Value *LHS, Value *RHS, Instruction *CxtI); + bool WillNotOverflowUnsignedMul(Value *LHS, Value *RHS, Instruction *CxtI); Value *EmitGEPOffset(User *GEP); Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN); Value *EvaluateInDifferentElementOrder(Value *V, ArrayRef Mask); diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 5beaf00c16e..d444d33ca8a 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -165,6 +165,39 @@ bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS, return false; } +/// \brief Return true if we can prove that: +/// (mul LHS, RHS) === (mul nuw LHS, RHS) +bool InstCombiner::WillNotOverflowUnsignedMul(Value *LHS, Value *RHS, + Instruction *CxtI) { + // Multiplying n * m significant bits yields a result of n + m significant + // bits. If the total number of significant bits does not exceed the + // result bit width (minus 1), there is no overflow. + // This means if we have enough leading zero bits in the operands + // we can guarantee that the result does not overflow. + // Ref: "Hacker's Delight" by Henry Warren + unsigned BitWidth = LHS->getType()->getScalarSizeInBits(); + APInt LHSKnownZero(BitWidth, 0); + APInt RHSKnownZero(BitWidth, 0); + APInt TmpKnownOne(BitWidth, 0); + computeKnownBits(LHS, LHSKnownZero, TmpKnownOne, 0, CxtI); + computeKnownBits(RHS, RHSKnownZero, TmpKnownOne, 0, CxtI); + // Note that underestimating the number of zero bits gives a more + // conservative answer. + unsigned ZeroBits = LHSKnownZero.countLeadingOnes() + + RHSKnownZero.countLeadingOnes(); + // First handle the easy case: if we have enough zero bits there's + // definitely no overflow. + if (ZeroBits >= BitWidth) + return true; + + // There is an ambiguous cases where there can be no overflow: + // ZeroBits == BitWidth - 1 + // However, determining overflow requires calculating the sign bit of + // LHS * RHS/2. + + return false; +} + Instruction *InstCombiner::visitMul(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); @@ -380,6 +413,11 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { I.setHasNoSignedWrap(true); } + if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedMul(Op0, Op1, &I)) { + Changed = true; + I.setHasNoUnsignedWrap(true); + } + return Changed ? &I : nullptr; } diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll index c745ea8a59b..eae14de6a31 100644 --- a/test/Transforms/InstCombine/intrinsics.ll +++ b/test/Transforms/InstCombine/intrinsics.ll @@ -148,7 +148,7 @@ define %ov.result.32 @smultest1_nsw(i32 %a, i32 %b) { %x = call %ov.result.32 @llvm.smul.with.overflow.i32(i32 %A, i32 %B) ret %ov.result.32 %x ; CHECK-LABEL: @smultest1_nsw -; CHECK: %x = mul nsw i32 %A, %B +; CHECK: %x = mul nuw nsw i32 %A, %B ; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0 ; CHECK-NEXT: ret %ov.result.32 %1 } diff --git a/test/Transforms/InstCombine/mul.ll b/test/Transforms/InstCombine/mul.ll index d19338a3c8f..a81ba726387 100644 --- a/test/Transforms/InstCombine/mul.ll +++ b/test/Transforms/InstCombine/mul.ll @@ -258,12 +258,24 @@ define i32 @test28(i32 %A) { define i64 @test29(i31 %A, i31 %B) { ; CHECK-LABEL: @test29( - %C = zext i31 %A to i64 - %D = zext i31 %B to i64 + %C = sext i31 %A to i64 + %D = sext i31 %B to i64 %E = mul i64 %C, %D ret i64 %E -; CHECK: %[[zext1:.*]] = zext i31 %A to i64 -; CHECK-NEXT: %[[zext2:.*]] = zext i31 %B to i64 -; CHECK-NEXT: %[[mul:.*]] = mul nsw i64 %[[zext1]], %[[zext2]] +; CHECK: %[[sext1:.*]] = sext i31 %A to i64 +; CHECK-NEXT: %[[sext2:.*]] = sext i31 %B to i64 +; CHECK-NEXT: %[[mul:.*]] = mul nsw i64 %[[sext1]], %[[sext2]] +; CHECK-NEXT: ret i64 %[[mul]] +} + +define i64 @test30(i32 %A, i32 %B) { +; CHECK-LABEL: @test30( + %C = zext i32 %A to i64 + %D = zext i32 %B to i64 + %E = mul i64 %C, %D + ret i64 %E +; CHECK: %[[zext1:.*]] = zext i32 %A to i64 +; CHECK-NEXT: %[[zext2:.*]] = zext i32 %B to i64 +; CHECK-NEXT: %[[mul:.*]] = mul nuw i64 %[[zext1]], %[[zext2]] ; CHECK-NEXT: ret i64 %[[mul]] } -- 2.11.0