From: Jessica Paquette Date: Tue, 8 Dec 2020 17:34:42 +0000 (-0800) Subject: [AArch64][GlobalISel] Fold binops on the true side of G_SELECT X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=cd9a52b99e685e8a77dd85d25c7d1ec8b86b9f55;p=android-x86%2Fexternal-llvm-project.git [AArch64][GlobalISel] Fold binops on the true side of G_SELECT This implements the following folds: ``` G_SELECT cc, (G_SUB 0, %x), %false -> CSNEG %x, %false, inv_cc G_SELECT cc, (G_XOR x, -1), %false -> CSINV %x, %false, inv_cc ``` This is similar to the folds introduced in 5bc0bd05e6a8d788e08cdf3d154f3a33202aee53. In 5bc0bd05e6a8d788e08cdf3d154f3a33202aee53 I mentioned that we may prefer to do this in AArch64PostLegalizerLowering. I think that it's probably better to do this in the selector. The way we select G_SELECT depends on what register banks end up being assigned to it. If we did this in AArch64PostLegalizerLowering, then we'd end up checking *every* G_SELECT to see if it's worth swapping operands. Doing it in the selector allows us to restrict the optimization to only relevant G_SELECTs. Also fix up some comments in `TryFoldBinOpIntoSelect` which are kind of confusing IMO. Example IR: https://godbolt.org/z/3qPGca Differential Revision: https://reviews.llvm.org/D92860 --- diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 493167e284c..c447f75681e 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -1033,31 +1033,36 @@ AArch64InstructionSelector::emitSelect(Register Dst, Register True, // By default, we'll try and emit a CSEL. unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr; bool Optimized = false; - auto TryFoldBinOpIntoSelect = [&Opc, &False, Is32Bit, &MRI]() { + auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI](Register &Reg, + bool Invert) { // Attempt to fold: // - // sub = G_SUB 0, x - // select = G_SELECT cc, true, sub + // %sub = G_SUB 0, %x + // %select = G_SELECT cc, %reg, %sub // // Into: - // select = CSNEG true, x, cc + // %select = CSNEG %reg, %x, cc Register MatchReg; - if (mi_match(False, MRI, m_Neg(m_Reg(MatchReg)))) { + if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) { Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr; - False = MatchReg; + Reg = MatchReg; + if (Invert) + CC = AArch64CC::getInvertedCondCode(CC); return true; } // Attempt to fold: // - // xor = G_XOR x, -1 - // select = G_SELECT cc, true, xor + // %xor = G_XOR %x, -1 + // %select = G_SELECT cc, %reg, %xor // // Into: - // select = CSINV true, x, cc - if (mi_match(False, MRI, m_Not(m_Reg(MatchReg)))) { + // %select = CSINV %reg, %x, cc + if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) { Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; - False = MatchReg; + Reg = MatchReg; + if (Invert) + CC = AArch64CC::getInvertedCondCode(CC); return true; } @@ -1139,7 +1144,8 @@ AArch64InstructionSelector::emitSelect(Register Dst, Register True, return false; }; - Optimized |= TryFoldBinOpIntoSelect(); + Optimized |= TryFoldBinOpIntoSelect(False, /*Invert = */ false); + Optimized |= TryFoldBinOpIntoSelect(True, /*Invert = */ true); Optimized |= TryOptSelectCst(); auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC); constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir index 2d7928f906e..d2eee15c5cb 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir @@ -378,6 +378,36 @@ body: | ... --- +name: csneg_inverted_cc +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2 + ; G_SELECT cc, (G_SUB 0, %x), %false -> CSNEG %x, %false, inv_cc + + ; CHECK-LABEL: name: csneg_inverted_cc + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: %reg0:gpr32 = COPY $w0 + ; CHECK: %reg1:gpr32 = COPY $w1 + ; CHECK: %f:gpr32 = COPY $w2 + ; CHECK: [[ANDSWri:%[0-9]+]]:gpr32 = ANDSWri %reg0, 0, implicit-def $nzcv + ; CHECK: %select:gpr32 = CSNEGWr %reg1, %f, 0, implicit $nzcv + ; CHECK: $w0 = COPY %select + ; CHECK: RET_ReallyLR implicit $w0 + %reg0:gpr(s32) = COPY $w0 + %cond:gpr(s1) = G_TRUNC %reg0(s32) + %reg1:gpr(s32) = COPY $w1 + %f:gpr(s32) = COPY $w2 + %zero:gpr(s32) = G_CONSTANT i32 0 + %sub:gpr(s32) = G_SUB %zero(s32), %reg1 + %select:gpr(s32) = G_SELECT %cond(s1), %sub, %f + $w0 = COPY %select(s32) + RET_ReallyLR implicit $w0 + +... +--- name: csneg_s64 legalized: true regBankSelected: true @@ -469,6 +499,36 @@ body: | ... --- +name: csinv_inverted_cc +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2 + ; G_SELECT cc, (G_XOR x, -1), %false -> CSINV %x, %false, inv_cc + + ; CHECK-LABEL: name: csinv_inverted_cc + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: %reg0:gpr32 = COPY $w0 + ; CHECK: %reg1:gpr32 = COPY $w1 + ; CHECK: %f:gpr32 = COPY $w2 + ; CHECK: [[ANDSWri:%[0-9]+]]:gpr32 = ANDSWri %reg0, 0, implicit-def $nzcv + ; CHECK: %select:gpr32 = CSINVWr %reg1, %f, 0, implicit $nzcv + ; CHECK: $w0 = COPY %select + ; CHECK: RET_ReallyLR implicit $w0 + %reg0:gpr(s32) = COPY $w0 + %reg1:gpr(s32) = COPY $w1 + %cond:gpr(s1) = G_TRUNC %reg0(s32) + %f:gpr(s32) = COPY $w2 + %negative_one:gpr(s32) = G_CONSTANT i32 -1 + %xor:gpr(s32) = G_XOR %reg1(s32), %negative_one + %select:gpr(s32) = G_SELECT %cond(s1), %xor, %f + $w0 = COPY %select(s32) + RET_ReallyLR implicit $w0 + +... +--- name: csinv_s64 legalized: true regBankSelected: true