From fd14e4374d39167bdb5702504f064c775c9f363d Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Fri, 5 Aug 2016 08:22:29 +0000 Subject: [PATCH] [InstCombine] try to fold (select C, (sext A), B) into logical ops Summary: Turn (select C, (sext A), B) into (sext (select C, A, B')) when A is i1 and B is a compatible constant, also for zext instead of sext. This will then be further folded into logical operations. The transformation would be valid for non-i1 types as well, but other parts of InstCombine prefer to have sext from non-i1 as an operand of select. Motivated by the shader compiler frontend in Mesa for AMDGPU, which emits i32 for boolean operations. With this change, the boolean logic is fully recovered. Reviewers: majnemer, spatel, tstellarAMD Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D22747 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@277801 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstCombineSelect.cpp | 56 ++++++++++++++++ .../InstCombine/InstructionCombining.cpp | 2 +- test/Transforms/InstCombine/select-bitext.ll | 74 +++++++++++++++------- test/Transforms/InstCombine/vector-casts.ll | 5 +- 4 files changed, 109 insertions(+), 28 deletions(-) diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 018cdcc24d5..b76241a70a9 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -912,6 +912,37 @@ static Instruction *foldAddSubSelect(SelectInst &SI, return nullptr; } +/// If one of the operands is a sext/zext from i1 and the other is a constant, +/// we may be able to create an i1 select which can be further folded to +/// logical ops. +static Instruction *foldSelectExtConst(InstCombiner::BuilderTy &Builder, + SelectInst &SI, Instruction *EI, + const APInt &C, bool isExtTrueVal, + bool isSigned) { + Value *SmallVal = EI->getOperand(0); + Type *SmallType = SmallVal->getType(); + + // TODO Handle larger types as well? Note this requires adjusting + // FoldOpIntoSelect as well. + if (!SmallType->getScalarType()->isIntegerTy(1)) + return nullptr; + + if (C != 0 && (isSigned || C != 1) && + (!isSigned || !C.isAllOnesValue())) + return nullptr; + + Value *SmallConst = ConstantInt::get(SmallType, C.trunc(1)); + Value *TrueVal = isExtTrueVal ? SmallVal : SmallConst; + Value *FalseVal = isExtTrueVal ? SmallConst : SmallVal; + Value *Select = Builder.CreateSelect(SI.getOperand(0), TrueVal, FalseVal, + "fold." + SI.getName()); + + if (isSigned) + return new SExtInst(Select, SI.getType()); + + return new ZExtInst(Select, SI.getType()); +} + Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Value *CondVal = SI.getCondition(); Value *TrueVal = SI.getTrueValue(); @@ -1098,6 +1129,31 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (Instruction *IV = FoldSelectOpOp(SI, TI, FI)) return IV; + // (select C, (sext X), const) -> (sext (select C, X, const')) and + // variations thereof when extending from i1, as that allows further folding + // into logic ops. When the sext is from a larger type, we prefer to have it + // as an operand. + if (TI && + (TI->getOpcode() == Instruction::ZExt || TI->getOpcode() == Instruction::SExt)) { + bool IsSExt = TI->getOpcode() == Instruction::SExt; + const APInt *C; + if (match(FalseVal, m_APInt(C))) { + if (Instruction *IV = + foldSelectExtConst(*Builder, SI, TI, *C, true, IsSExt)) + return IV; + } + } + if (FI && + (FI->getOpcode() == Instruction::ZExt || FI->getOpcode() == Instruction::SExt)) { + bool IsSExt = FI->getOpcode() == Instruction::SExt; + const APInt *C; + if (match(TrueVal, m_APInt(C))) { + if (Instruction *IV = + foldSelectExtConst(*Builder, SI, FI, *C, false, IsSExt)) + return IV; + } + } + // See if we can fold the select into one of our operands. if (SelType->isIntOrIntVectorTy() || SelType->isFPOrFPVectorTy()) { if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal)) diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index daa22dafc59..7ffe34a3c26 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -790,7 +790,7 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { if (isa(TV) || isa(FV)) { // Bool selects with constant operands can be folded to logical ops. - if (SI->getType()->isIntegerTy(1)) return nullptr; + if (SI->getType()->getScalarType()->isIntegerTy(1)) return nullptr; // If it's a bitcast involving vectors, make sure it has the same number of // elements on both sides. diff --git a/test/Transforms/InstCombine/select-bitext.ll b/test/Transforms/InstCombine/select-bitext.ll index 49069cce854..fc379d28a73 100644 --- a/test/Transforms/InstCombine/select-bitext.ll +++ b/test/Transforms/InstCombine/select-bitext.ll @@ -3,8 +3,8 @@ define i32 @test_sext1(i1 %cca, i1 %ccb) { ; CHECK-LABEL: @test_sext1( -; CHECK-NEXT: [[CCAX:%.*]] = sext i1 %cca to i32 -; CHECK-NEXT: [[R:%.*]] = select i1 %ccb, i32 [[CCAX]], i32 0 +; CHECK-NEXT: [[FOLD_R:%.*]] = and i1 %ccb, %cca +; CHECK-NEXT: [[R:%.*]] = sext i1 [[FOLD_R]] to i32 ; CHECK-NEXT: ret i32 [[R]] ; %ccax = sext i1 %cca to i32 @@ -14,8 +14,8 @@ define i32 @test_sext1(i1 %cca, i1 %ccb) { define i32 @test_sext2(i1 %cca, i1 %ccb) { ; CHECK-LABEL: @test_sext2( -; CHECK-NEXT: [[CCAX:%.*]] = sext i1 %cca to i32 -; CHECK-NEXT: [[R:%.*]] = select i1 %ccb, i32 -1, i32 [[CCAX]] +; CHECK-NEXT: [[FOLD_R:%.*]] = or i1 %ccb, %cca +; CHECK-NEXT: [[R:%.*]] = sext i1 [[FOLD_R]] to i32 ; CHECK-NEXT: ret i32 [[R]] ; %ccax = sext i1 %cca to i32 @@ -25,8 +25,9 @@ define i32 @test_sext2(i1 %cca, i1 %ccb) { define i32 @test_sext3(i1 %cca, i1 %ccb) { ; CHECK-LABEL: @test_sext3( -; CHECK-NEXT: [[CCAX:%.*]] = sext i1 %cca to i32 -; CHECK-NEXT: [[R:%.*]] = select i1 %ccb, i32 0, i32 [[CCAX]] +; CHECK-NEXT: [[NOT_CCB:%.*]] = xor i1 %ccb, true +; CHECK-NEXT: [[FOLD_R:%.*]] = and i1 [[NOT_CCB]], %cca +; CHECK-NEXT: [[R:%.*]] = sext i1 [[FOLD_R]] to i32 ; CHECK-NEXT: ret i32 [[R]] ; %ccax = sext i1 %cca to i32 @@ -36,8 +37,9 @@ define i32 @test_sext3(i1 %cca, i1 %ccb) { define i32 @test_sext4(i1 %cca, i1 %ccb) { ; CHECK-LABEL: @test_sext4( -; CHECK-NEXT: [[CCAX:%.*]] = sext i1 %cca to i32 -; CHECK-NEXT: [[R:%.*]] = select i1 %ccb, i32 [[CCAX]], i32 -1 +; CHECK-NEXT: [[NOT_CCB:%.*]] = xor i1 %ccb, true +; CHECK-NEXT: [[FOLD_R:%.*]] = or i1 [[NOT_CCB]], %cca +; CHECK-NEXT: [[R:%.*]] = sext i1 [[FOLD_R]] to i32 ; CHECK-NEXT: ret i32 [[R]] ; %ccax = sext i1 %cca to i32 @@ -47,8 +49,8 @@ define i32 @test_sext4(i1 %cca, i1 %ccb) { define i32 @test_zext1(i1 %cca, i1 %ccb) { ; CHECK-LABEL: @test_zext1( -; CHECK-NEXT: [[CCAX:%.*]] = zext i1 %cca to i32 -; CHECK-NEXT: [[R:%.*]] = select i1 %ccb, i32 [[CCAX]], i32 0 +; CHECK-NEXT: [[FOLD_R:%.*]] = and i1 %ccb, %cca +; CHECK-NEXT: [[R:%.*]] = zext i1 [[FOLD_R]] to i32 ; CHECK-NEXT: ret i32 [[R]] ; %ccax = zext i1 %cca to i32 @@ -58,8 +60,8 @@ define i32 @test_zext1(i1 %cca, i1 %ccb) { define i32 @test_zext2(i1 %cca, i1 %ccb) { ; CHECK-LABEL: @test_zext2( -; CHECK-NEXT: [[CCAX:%.*]] = zext i1 %cca to i32 -; CHECK-NEXT: [[R:%.*]] = select i1 %ccb, i32 1, i32 [[CCAX]] +; CHECK-NEXT: [[FOLD_R:%.*]] = or i1 %ccb, %cca +; CHECK-NEXT: [[R:%.*]] = zext i1 [[FOLD_R]] to i32 ; CHECK-NEXT: ret i32 [[R]] ; %ccax = zext i1 %cca to i32 @@ -69,8 +71,9 @@ define i32 @test_zext2(i1 %cca, i1 %ccb) { define i32 @test_zext3(i1 %cca, i1 %ccb) { ; CHECK-LABEL: @test_zext3( -; CHECK-NEXT: [[CCAX:%.*]] = zext i1 %cca to i32 -; CHECK-NEXT: [[R:%.*]] = select i1 %ccb, i32 0, i32 [[CCAX]] +; CHECK-NEXT: [[NOT_CCB:%.*]] = xor i1 %ccb, true +; CHECK-NEXT: [[FOLD_R:%.*]] = and i1 [[NOT_CCB]], %cca +; CHECK-NEXT: [[R:%.*]] = zext i1 [[FOLD_R]] to i32 ; CHECK-NEXT: ret i32 [[R]] ; %ccax = zext i1 %cca to i32 @@ -80,8 +83,9 @@ define i32 @test_zext3(i1 %cca, i1 %ccb) { define i32 @test_zext4(i1 %cca, i1 %ccb) { ; CHECK-LABEL: @test_zext4( -; CHECK-NEXT: [[CCAX:%.*]] = zext i1 %cca to i32 -; CHECK-NEXT: [[R:%.*]] = select i1 %ccb, i32 [[CCAX]], i32 1 +; CHECK-NEXT: [[NOT_CCB:%.*]] = xor i1 %ccb, true +; CHECK-NEXT: [[FOLD_R:%.*]] = or i1 [[NOT_CCB]], %cca +; CHECK-NEXT: [[R:%.*]] = zext i1 [[FOLD_R]] to i32 ; CHECK-NEXT: ret i32 [[R]] ; %ccax = zext i1 %cca to i32 @@ -151,10 +155,10 @@ define i32 @test_op_op(i32 %a, i32 %b, i32 %c) { ret i32 %r } -define <2 x i32> @test_vectors1(<2 x i1> %cca, <2 x i1> %ccb) { -; CHECK-LABEL: @test_vectors1( -; CHECK-NEXT: [[CCAX:%.*]] = sext <2 x i1> %cca to <2 x i32> -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> %ccb, <2 x i32> [[CCAX]], <2 x i32> zeroinitializer +define <2 x i32> @test_vectors_sext(<2 x i1> %cca, <2 x i1> %ccb) { +; CHECK-LABEL: @test_vectors_sext( +; CHECK-NEXT: [[FOLD_R:%.*]] = and <2 x i1> %ccb, %cca +; CHECK-NEXT: [[R:%.*]] = sext <2 x i1> [[FOLD_R]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[R]] ; %ccax = sext <2 x i1> %cca to <2 x i32> @@ -162,13 +166,35 @@ define <2 x i32> @test_vectors1(<2 x i1> %cca, <2 x i1> %ccb) { ret <2 x i32> %r } -define <2 x i32> @test_vectors2(<2 x i1> %cca, i1 %ccb) { -; CHECK-LABEL: @test_vectors2( -; CHECK-NEXT: [[CCAX:%.*]] = sext <2 x i1> %cca to <2 x i32> -; CHECK-NEXT: [[R:%.*]] = select i1 %ccb, <2 x i32> [[CCAX]], <2 x i32> zeroinitializer +define <2 x i32> @test_vectors_zext(<2 x i1> %cca, <2 x i1> %ccb) { +; CHECK-LABEL: @test_vectors_zext( +; CHECK-NEXT: [[FOLD_R:%.*]] = and <2 x i1> %ccb, %cca +; CHECK-NEXT: [[R:%.*]] = zext <2 x i1> [[FOLD_R]] to <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[R]] +; + %ccax = zext <2 x i1> %cca to <2 x i32> + %r = select <2 x i1> %ccb, <2 x i32> %ccax, <2 x i32> + ret <2 x i32> %r +} + +define <2 x i32> @scalar_select_of_vectors_sext(<2 x i1> %cca, i1 %ccb) { +; CHECK-LABEL: @scalar_select_of_vectors_sext( +; CHECK-NEXT: [[FOLD_R:%.*]] = select i1 %ccb, <2 x i1> %cca, <2 x i1> zeroinitializer +; CHECK-NEXT: [[R:%.*]] = sext <2 x i1> [[FOLD_R]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[R]] ; %ccax = sext <2 x i1> %cca to <2 x i32> %r = select i1 %ccb, <2 x i32> %ccax, <2 x i32> ret <2 x i32> %r } + +define <2 x i32> @scalar_select_of_vectors_zext(<2 x i1> %cca, i1 %ccb) { +; CHECK-LABEL: @scalar_select_of_vectors_zext( +; CHECK-NEXT: [[FOLD_R:%.*]] = select i1 %ccb, <2 x i1> %cca, <2 x i1> zeroinitializer +; CHECK-NEXT: [[R:%.*]] = zext <2 x i1> [[FOLD_R]] to <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[R]] +; + %ccax = zext <2 x i1> %cca to <2 x i32> + %r = select i1 %ccb, <2 x i32> %ccax, <2 x i32> + ret <2 x i32> %r +} diff --git a/test/Transforms/InstCombine/vector-casts.ll b/test/Transforms/InstCombine/vector-casts.ll index af18b4cfbdd..a17f6d9a936 100644 --- a/test/Transforms/InstCombine/vector-casts.ll +++ b/test/Transforms/InstCombine/vector-casts.ll @@ -63,9 +63,8 @@ entry: ret <2 x i64> %conv ; CHECK-LABEL: @test5( -; CHECK: sext <4 x i1> %cmp to <4 x i32> -; The sext-and pair is canonicalized to a select. -; CHECK: select <4 x i1> %cmp4, <4 x i32> %sext, <4 x i32> zeroinitializer +; CHECK: %fold.and = and <4 x i1> %cmp4, %cmp +; CHECK: sext <4 x i1> %fold.and to <4 x i32> } -- 2.11.0