From 1704efdd5fdf8c544304e043a3845e35c398fffa Mon Sep 17 00:00:00 2001 From: Shawn Landden Date: Sun, 26 May 2019 13:55:52 +0000 Subject: [PATCH] [SimplifyCFG] ReduceSwitchRange: Improve on the case where the SubThreshold doesn't trigger git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@361728 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/SimplifyCFG.cpp | 38 +++++++++++++++++++----------- test/Transforms/SimplifyCFG/rangereduce.ll | 8 +++---- 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index e5925545aca..524c3708e7b 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -5552,18 +5552,6 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, BestDistance = Values[I] - Values[I - 1]; } } - uint64_t Base = 0; - // Now transform the values such that they start at zero and ascend. - if (Values[BestIndex] >= SubThreshold) { - Base = Values[BestIndex]; - MadeChanges = true; - for (auto &V : Values) - V = (APInt(BitWidth, V) - Base).getLimitedValue(); - } - - // Now we have signed numbers that have been shifted so that, given enough - // precision, there are no negative values. Since the rest of the transform - // is bitwise only, we switch now to an unsigned representation. // This transform can be done speculatively because it is so cheap - it // results in a single rotate operation being inserted. @@ -5575,8 +5563,10 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, // one element and LLVM disallows duplicate cases, Shift is guaranteed to be // less than 64. unsigned Shift = 64; + // We need to store this from _before_ the transform + uint64_t BestIndexXor = Values[BestIndex]; for (auto &V : Values) - Shift = std::min(Shift, countTrailingZeros(V)); + Shift = std::min(Shift, countTrailingZeros(V ^ BestIndexXor)); assert(Shift < 64); if (Shift > 0) { MadeChanges = true; @@ -5584,6 +5574,26 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, V >>= Shift; } + // We Xor against Values[] (any element will do) because the if we do not + // start at zero, but also don't meet the SubThreshold, then we still might + // share common rights bits, and if this transform succeeds + // then we should insert the subtraction anyways, because the rotate trick + // below to avoid a branch needs the shifted away bits to be zero. + + // Now transform the values such that they start at zero and ascend. Do not + // do this if the shift reduces the lowest value to less than SubThreshold, + // or if the subtraction is less than SubThreshold and it does not enable a + // rotate. + uint64_t Base = 0; + if ((BestIndexXor >= SubThreshold && Shift == 0) || + (Shift > countTrailingZeros(BestIndexXor) && + Values[BestIndex] >= SubThreshold)) { + Base = BestIndexXor; + MadeChanges = true; + for (auto &V : Values) + V = (APInt(BitWidth, V) - Base).getLimitedValue(); + } + if (!MadeChanges) // We didn't do anything. return false; @@ -5614,7 +5624,7 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, for (auto Case : SI->cases()) { auto *Orig = Case.getCaseValue(); - auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base); + auto Sub = Orig->getValue() - Base; Case.setValue(cast(ConstantInt::get(Ty, Sub.lshr(Shift)))); } return true; diff --git a/test/Transforms/SimplifyCFG/rangereduce.ll b/test/Transforms/SimplifyCFG/rangereduce.ll index e9be26e94e4..804882fe761 100644 --- a/test/Transforms/SimplifyCFG/rangereduce.ll +++ b/test/Transforms/SimplifyCFG/rangereduce.ll @@ -234,10 +234,10 @@ three: define i8 @test7(i8 %a) optsize { ; CHECK-LABEL: @test7( -; CHECK-NEXT: [[TMP1:%.*]] = sub i8 [[A:%.*]], -36 -; CHECK-NEXT: [[TMP2:%.*]] = lshr i8 [[TMP1]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = shl i8 [[TMP1]], 6 -; CHECK-NEXT: [[TMP4:%.*]] = or i8 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr i8 [[A:%.*]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = shl i8 [[A]], 6 +; CHECK-NEXT: [[TMP3:%.*]] = or i8 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = sub i8 [[TMP3]], 55 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i8 [[TMP4]], 4 ; CHECK-NEXT: br i1 [[TMP5]], label [[SWITCH_LOOKUP:%.*]], label [[DEF:%.*]] ; CHECK: switch.lookup: -- 2.11.0