From b45bb28e1fe1e959de533be8e484f063f9f342cf Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 28 Feb 2017 23:27:33 +0000 Subject: [PATCH] [Hexagon] Generate extract instructions more aggressively git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@296537 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonBitSimplify.cpp | 202 +++++++++++++++++++++++++++++- test/CodeGen/Hexagon/bit-extract.ll | 75 +++++++++++ test/CodeGen/Hexagon/bit-validate-reg.ll | 5 +- test/CodeGen/Hexagon/fusedandshift.ll | 2 +- test/CodeGen/Hexagon/vect/vect-shuffle.ll | 2 +- 5 files changed, 282 insertions(+), 4 deletions(-) create mode 100644 test/CodeGen/Hexagon/bit-extract.ll diff --git a/lib/Target/Hexagon/HexagonBitSimplify.cpp b/lib/Target/Hexagon/HexagonBitSimplify.cpp index fda7f7fd884..79787463684 100644 --- a/lib/Target/Hexagon/HexagonBitSimplify.cpp +++ b/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -46,6 +46,12 @@ using namespace llvm; static cl::opt PreserveTiedOps("hexbit-keep-tied", cl::Hidden, cl::init(true), cl::desc("Preserve subregisters in tied operands")); +static cl::opt GenExtract("hexbit-extract", cl::Hidden, + cl::init(true), cl::desc("Generate extract instructions")); + +static cl::opt MaxExtract("hexbit-max-extract", cl::Hidden, + cl::init(UINT_MAX)); +static unsigned CountExtract = 0; namespace llvm { @@ -1765,6 +1771,8 @@ namespace { const BitTracker::RegisterCell &RC); bool simplifyTstbit(MachineInstr *MI, BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC); + bool simplifyExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD, + const BitTracker::RegisterCell &RC, const RegisterSet &AVs); const HexagonInstrInfo &HII; const HexagonRegisterInfo &HRI; @@ -2208,6 +2216,196 @@ bool BitSimplification::simplifyTstbit(MachineInstr *MI, return false; } +// Detect whether RD is a bitfield extract (sign- or zero-extended) of +// some register from the AVs set. Create a new corresponding instruction +// at the location of MI. The intent is to recognize situations where +// a sequence of instructions performs an operation that is equivalent to +// an extract operation, such as a shift left followed by a shift right. +bool BitSimplification::simplifyExtractLow(MachineInstr *MI, + BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC, + const RegisterSet &AVs) { + if (!GenExtract) + return false; + if (CountExtract >= MaxExtract) + return false; + CountExtract++; + + unsigned W = RC.width(); + unsigned RW = W; + unsigned Len; + bool Signed; + + // The code is mostly class-independent, except for the part that generates + // the extract instruction, and establishes the source register (in case it + // needs to use a subregister). + const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI); + if (FRC != &Hexagon::IntRegsRegClass && FRC != &Hexagon::DoubleRegsRegClass) + return false; + assert(RD.Sub == 0); + + // Observation: + // If the cell has a form of 00..0xx..x with k zeros and n remaining + // bits, this could be an extractu of the n bits, but it could also be + // an extractu of a longer field which happens to have 0s in the top + // bit positions. + // The same logic applies to sign-extended fields. + // + // Do not check for the extended extracts, since it would expand the + // search space quite a bit. The search may be expensive as it is. + + const BitTracker::BitValue &TopV = RC[W-1]; + + // Eliminate candidates that have self-referential bits, since they + // cannot be extracts from other registers. Also, skip registers that + // have compile-time constant values. + bool IsConst = true; + for (unsigned I = 0; I != W; ++I) { + const BitTracker::BitValue &V = RC[I]; + if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg == RD.Reg) + return false; + IsConst = IsConst && (V.is(0) || V.is(1)); + } + if (IsConst) + return false; + + if (TopV.is(0) || TopV.is(1)) { + bool S = TopV.is(1); + for (--W; W > 0 && RC[W-1].is(S); --W) + ; + Len = W; + Signed = S; + // The sign bit must be a part of the field being extended. + if (Signed) + ++Len; + } else { + // This could still be a sign-extended extract. + assert(TopV.Type == BitTracker::BitValue::Ref); + if (TopV.RefI.Reg == RD.Reg || TopV.RefI.Pos == W-1) + return false; + for (--W; W > 0 && RC[W-1] == TopV; --W) + ; + // The top bits of RC are copies of TopV. One occurrence of TopV will + // be a part of the field. + Len = W + 1; + Signed = true; + } + + // This would be just a copy. It should be handled elsewhere. + if (Len == RW) + return false; + + DEBUG({ + dbgs() << __func__ << " on reg: " << PrintReg(RD.Reg, &HRI, RD.Sub) + << ", MI: " << *MI; + dbgs() << "Cell: " << RC << '\n'; + dbgs() << "Expected bitfield size: " << Len << " bits, " + << (Signed ? "sign" : "zero") << "-extended\n"; + }); + + bool Changed = false; + + for (unsigned R = AVs.find_first(); R != 0; R = AVs.find_next(R)) { + const BitTracker::RegisterCell &SC = BT.lookup(R); + unsigned SW = SC.width(); + + // The source can be longer than the destination, as long as its size is + // a multiple of the size of the destination. Also, we would need to be + // able to refer to the subregister in the source that would be of the + // same size as the destination, but only check the sizes here. + if (SW < RW || (SW % RW) != 0) + continue; + + // The field can start at any offset in SC as long as it contains Len + // bits and does not cross subregister boundary (if the source register + // is longer than the destination). + unsigned Off = 0; + while (Off <= SW-Len) { + unsigned OE = (Off+Len)/RW; + if (OE != Off/RW) { + // The assumption here is that if the source (R) is longer than the + // destination, then the destination is a sequence of words of + // size RW, and each such word in R can be accessed via a subregister. + // + // If the beginning and the end of the field cross the subregister + // boundary, advance to the next subregister. + Off = OE*RW; + continue; + } + if (HBS::isEqual(RC, 0, SC, Off, Len)) + break; + ++Off; + } + + if (Off > SW-Len) + continue; + + // Found match. + unsigned ExtOpc = 0; + if (Off == 0) { + if (Len == 8) + ExtOpc = Signed ? Hexagon::A2_sxtb : Hexagon::A2_zxtb; + else if (Len == 16) + ExtOpc = Signed ? Hexagon::A2_sxth : Hexagon::A2_zxth; + else if (Len < 10 && !Signed) + ExtOpc = Hexagon::A2_andir; + } + if (ExtOpc == 0) { + ExtOpc = + Signed ? (RW == 32 ? Hexagon::S4_extract : Hexagon::S4_extractp) + : (RW == 32 ? Hexagon::S2_extractu : Hexagon::S2_extractup); + } + unsigned SR = 0; + // This only recognizes isub_lo and isub_hi. + if (RW != SW && RW*2 != SW) + continue; + if (RW != SW) + SR = (Off/RW == 0) ? Hexagon::isub_lo : Hexagon::isub_hi; + + if (!validateReg({R,SR}, ExtOpc, 1)) + continue; + + // Don't generate the same instruction as the one being optimized. + if (MI->getOpcode() == ExtOpc) { + // All possible ExtOpc's have the source in operand(1). + const MachineOperand &SrcOp = MI->getOperand(1); + if (SrcOp.getReg() == R) + continue; + } + + DebugLoc DL = MI->getDebugLoc(); + MachineBasicBlock &B = *MI->getParent(); + unsigned NewR = MRI.createVirtualRegister(FRC); + auto MIB = BuildMI(B, MI, DL, HII.get(ExtOpc), NewR) + .addReg(R, 0, SR); + switch (ExtOpc) { + case Hexagon::A2_sxtb: + case Hexagon::A2_zxtb: + case Hexagon::A2_sxth: + case Hexagon::A2_zxth: + break; + case Hexagon::A2_andir: + MIB.addImm((1u << Len) - 1); + break; + case Hexagon::S4_extract: + case Hexagon::S2_extractu: + case Hexagon::S4_extractp: + case Hexagon::S2_extractup: + MIB.addImm(Len) + .addImm(Off); + break; + default: + llvm_unreachable("Unexpected opcode"); + } + + HBS::replaceReg(RD.Reg, NewR, MRI); + BT.put(BitTracker::RegisterRef(NewR), RC); + Changed = true; + break; + } + + return Changed; +} + bool BitSimplification::processBlock(MachineBasicBlock &B, const RegisterSet &AVs) { if (!BT.reached(&B)) @@ -2245,12 +2443,14 @@ bool BitSimplification::processBlock(MachineBasicBlock &B, if (FRC->getID() == Hexagon::DoubleRegsRegClassID) { bool T = genPackhl(MI, RD, RC); + T = T || simplifyExtractLow(MI, RD, RC, AVB); Changed |= T; continue; } if (FRC->getID() == Hexagon::IntRegsRegClassID) { - bool T = genExtractHalf(MI, RD, RC); + bool T = simplifyExtractLow(MI, RD, RC, AVB); + T = T || genExtractHalf(MI, RD, RC); T = T || genCombineHalf(MI, RD, RC); T = T || genExtractLow(MI, RD, RC); Changed |= T; diff --git a/test/CodeGen/Hexagon/bit-extract.ll b/test/CodeGen/Hexagon/bit-extract.ll new file mode 100644 index 00000000000..ad7d05d2c23 --- /dev/null +++ b/test/CodeGen/Hexagon/bit-extract.ll @@ -0,0 +1,75 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +target triple = "hexagon" + +; CHECK-LABEL: ua +; CHECK: extractu(r0,#26,#0) +define i32 @ua(i32 %x) local_unnamed_addr #0 { +entry: + %shl = and i32 %x, 67108863 + ret i32 %shl +} + +; CHECK-LABEL: ub +; CHECK: extractu(r0,#16,#4) +define i32 @ub(i32 %x) local_unnamed_addr #0 { +entry: + %0 = lshr i32 %x, 4 + %shr = and i32 %0, 65535 + ret i32 %shr +} + +; CHECK-LABEL: uc +; CHECK: extractu(r0,#24,#0) +define i32 @uc(i32 %x) local_unnamed_addr #0 { +entry: + %shl = and i32 %x, 16777215 + ret i32 %shl +} + +; CHECK-LABEL: ud +; CHECK: extractu(r0,#16,#8) +define i32 @ud(i32 %x) local_unnamed_addr #0 { +entry: + %bf.lshr = lshr i32 %x, 8 + %bf.clear = and i32 %bf.lshr, 65535 + ret i32 %bf.clear +} + +; CHECK-LABEL: sa +; CHECK: extract(r0,#26,#0) +define i32 @sa(i32 %x) local_unnamed_addr #0 { +entry: + %shl = shl i32 %x, 6 + %shr = ashr exact i32 %shl, 6 + ret i32 %shr +} + +; CHECK-LABEL: sb +; CHECK: extract(r0,#16,#4) +define i32 @sb(i32 %x) local_unnamed_addr #0 { +entry: + %shl = shl i32 %x, 12 + %shr = ashr i32 %shl, 16 + ret i32 %shr +} + +; CHECK-LABEL: sc +; CHECK: extract(r0,#24,#0) +define i32 @sc(i32 %x) local_unnamed_addr #0 { +entry: + %shl = shl i32 %x, 8 + %shr = ashr exact i32 %shl, 8 + ret i32 %shr +} + +; CHECK-LABEL: sd +; CHECK: extract(r0,#16,#8) +define i32 @sd(i32 %x) local_unnamed_addr #0 { +entry: + %bf.shl = shl i32 %x, 8 + %bf.ashr = ashr i32 %bf.shl, 16 + ret i32 %bf.ashr +} + +attributes #0 = { noinline norecurse nounwind readnone "target-cpu"="hexagonv60" "target-features"="-hvx,-hvx-double,-long-calls" } diff --git a/test/CodeGen/Hexagon/bit-validate-reg.ll b/test/CodeGen/Hexagon/bit-validate-reg.ll index 16d4a5e4484..42eed97786c 100644 --- a/test/CodeGen/Hexagon/bit-validate-reg.ll +++ b/test/CodeGen/Hexagon/bit-validate-reg.ll @@ -1,10 +1,13 @@ -; RUN: llc -march=hexagon < %s | FileCheck %s +; RUN: llc -march=hexagon -hexbit-extract=0 < %s | FileCheck %s ; Make sure we don't generate zxtb to transfer a predicate register into ; a general purpose register. ; CHECK: r0 = p0 ; CHECK-NOT: zxtb(p +; CHECK-NOT: and(p +; CHECK-NOT: extract(p +; CHECK-NOT: extractu(p target triple = "hexagon" diff --git a/test/CodeGen/Hexagon/fusedandshift.ll b/test/CodeGen/Hexagon/fusedandshift.ll index 0310d440ffe..9abd366e691 100644 --- a/test/CodeGen/Hexagon/fusedandshift.ll +++ b/test/CodeGen/Hexagon/fusedandshift.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon -hexagon-extract=0 < %s | FileCheck %s +; RUN: llc -march=hexagon -hexagon-extract=0 -hexbit-extract=0 < %s | FileCheck %s ; Check that we generate fused logical and with shift instruction. ; Disable "extract" generation, since it may eliminate the and/lsr. diff --git a/test/CodeGen/Hexagon/vect/vect-shuffle.ll b/test/CodeGen/Hexagon/vect/vect-shuffle.ll index bd5b2b98169..27840bbd28d 100644 --- a/test/CodeGen/Hexagon/vect/vect-shuffle.ll +++ b/test/CodeGen/Hexagon/vect/vect-shuffle.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=hexagon -mcpu=hexagonv5 -disable-hsdr < %s | FileCheck %s ; Check that store is post-incremented. -; CHECK-NOT: extractu +; CHECK-NOT: extractu(r{{[0-9]+}},#32, ; CHECK-NOT: insert target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32" target triple = "hexagon" -- 2.11.0