From 14e60218b6893cf4ef3487e92a8b2c252d114e66 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 14 Jul 2015 17:07:24 +0000 Subject: [PATCH] [Hexagon] Generate "extract" instructions more aggressively Generate extract instructions (via intrinsics) before the DAG combiner folds shifts into unrecognizable forms. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@242163 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/CMakeLists.txt | 1 + lib/Target/Hexagon/HexagonGenExtract.cpp | 259 ++++++++++++++++++++++++++++ lib/Target/Hexagon/HexagonTargetMachine.cpp | 31 ++-- test/CodeGen/Hexagon/extract-basic.ll | 76 ++++++++ test/CodeGen/Hexagon/fusedandshift.ll | 3 +- 5 files changed, 356 insertions(+), 14 deletions(-) create mode 100644 lib/Target/Hexagon/HexagonGenExtract.cpp create mode 100644 test/CodeGen/Hexagon/extract-basic.ll diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt index 6a5f5f93026..33d87d15a86 100644 --- a/lib/Target/Hexagon/CMakeLists.txt +++ b/lib/Target/Hexagon/CMakeLists.txt @@ -22,6 +22,7 @@ add_llvm_target(HexagonCodeGen HexagonExpandPredSpillCode.cpp HexagonFixupHwLoops.cpp HexagonFrameLowering.cpp + HexagonGenExtract.cpp HexagonGenInsert.cpp HexagonHardwareLoops.cpp HexagonInstrInfo.cpp diff --git a/lib/Target/Hexagon/HexagonGenExtract.cpp b/lib/Target/Hexagon/HexagonGenExtract.cpp new file mode 100644 index 00000000000..4d32208bd5a --- /dev/null +++ b/lib/Target/Hexagon/HexagonGenExtract.cpp @@ -0,0 +1,259 @@ +//===--- HexagonGenExtract.cpp --------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +static cl::opt ExtractCutoff("extract-cutoff", cl::init(~0U), + cl::Hidden, cl::desc("Cutoff for generating \"extract\"" + " instructions")); + +// This prevents generating extract instructions that have the offset of 0. +// One of the reasons for "extract" is to put a sequence of bits in a regis- +// ter, starting at offset 0 (so that these bits can then be used by an +// "insert"). If the bits are already at offset 0, it is better not to gene- +// rate "extract", since logical bit operations can be merged into compound +// instructions (as opposed to "extract"). +static cl::opt NoSR0("extract-nosr0", cl::init(true), cl::Hidden, + cl::desc("No extract instruction with offset 0")); + +static cl::opt NeedAnd("extract-needand", cl::init(true), cl::Hidden, + cl::desc("Require & in extract patterns")); + +namespace llvm { + void initializeHexagonGenExtractPass(PassRegistry&); + FunctionPass *createHexagonGenExtract(); +} + + +namespace { + class HexagonGenExtract : public FunctionPass { + public: + static char ID; + HexagonGenExtract() : FunctionPass(ID), ExtractCount(0) { + initializeHexagonGenExtractPass(*PassRegistry::getPassRegistry()); + } + virtual const char *getPassName() const override { + return "Hexagon generate \"extract\" instructions"; + } + virtual bool runOnFunction(Function &F) override; + virtual void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + FunctionPass::getAnalysisUsage(AU); + } + private: + bool visitBlock(BasicBlock *B); + bool convert(Instruction *In); + + unsigned ExtractCount; + DominatorTree *DT; + }; + + char HexagonGenExtract::ID = 0; +} + +INITIALIZE_PASS_BEGIN(HexagonGenExtract, "hextract", "Hexagon generate " + "\"extract\" instructions", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(HexagonGenExtract, "hextract", "Hexagon generate " + "\"extract\" instructions", false, false) + + +bool HexagonGenExtract::convert(Instruction *In) { + using namespace PatternMatch; + Value *BF = 0; + ConstantInt *CSL = 0, *CSR = 0, *CM = 0; + BasicBlock *BB = In->getParent(); + LLVMContext &Ctx = BB->getContext(); + bool LogicalSR; + + // (and (shl (lshr x, #sr), #sl), #m) + LogicalSR = true; + bool Match = match(In, m_And(m_Shl(m_LShr(m_Value(BF), m_ConstantInt(CSR)), + m_ConstantInt(CSL)), + m_ConstantInt(CM))); + + if (!Match) { + // (and (shl (ashr x, #sr), #sl), #m) + LogicalSR = false; + Match = match(In, m_And(m_Shl(m_AShr(m_Value(BF), m_ConstantInt(CSR)), + m_ConstantInt(CSL)), + m_ConstantInt(CM))); + } + if (!Match) { + // (and (shl x, #sl), #m) + LogicalSR = true; + CSR = ConstantInt::get(Type::getInt32Ty(Ctx), 0); + Match = match(In, m_And(m_Shl(m_Value(BF), m_ConstantInt(CSL)), + m_ConstantInt(CM))); + if (Match && NoSR0) + return false; + } + if (!Match) { + // (and (lshr x, #sr), #m) + LogicalSR = true; + CSL = ConstantInt::get(Type::getInt32Ty(Ctx), 0); + Match = match(In, m_And(m_LShr(m_Value(BF), m_ConstantInt(CSR)), + m_ConstantInt(CM))); + } + if (!Match) { + // (and (ashr x, #sr), #m) + LogicalSR = false; + CSL = ConstantInt::get(Type::getInt32Ty(Ctx), 0); + Match = match(In, m_And(m_AShr(m_Value(BF), m_ConstantInt(CSR)), + m_ConstantInt(CM))); + } + if (!Match) { + CM = 0; + // (shl (lshr x, #sr), #sl) + LogicalSR = true; + Match = match(In, m_Shl(m_LShr(m_Value(BF), m_ConstantInt(CSR)), + m_ConstantInt(CSL))); + } + if (!Match) { + CM = 0; + // (shl (ashr x, #sr), #sl) + LogicalSR = false; + Match = match(In, m_Shl(m_AShr(m_Value(BF), m_ConstantInt(CSR)), + m_ConstantInt(CSL))); + } + if (!Match) + return false; + + Type *Ty = BF->getType(); + if (!Ty->isIntegerTy()) + return false; + unsigned BW = Ty->getPrimitiveSizeInBits(); + if (BW != 32 && BW != 64) + return false; + + uint32_t SR = CSR->getZExtValue(); + uint32_t SL = CSL->getZExtValue(); + + if (!CM) { + // If there was no and, and the shift left did not remove all potential + // sign bits created by the shift right, then extractu cannot reproduce + // this value. + if (!LogicalSR && (SR > SL)) + return false; + APInt A = APInt(BW, ~0ULL).lshr(SR).shl(SL); + CM = ConstantInt::get(Ctx, A); + } + + // CM is the shifted-left mask. Shift it back right to remove the zero + // bits on least-significant positions. + APInt M = CM->getValue().lshr(SL); + uint32_t T = M.countTrailingOnes(); + + // During the shifts some of the bits will be lost. Calculate how many + // of the original value will remain after shift right and then left. + uint32_t U = BW - std::max(SL, SR); + // The width of the extracted field is the minimum of the original bits + // that remain after the shifts and the number of contiguous 1s in the mask. + uint32_t W = std::min(U, T); + if (W == 0) + return false; + + // Check if the extracted bits are contained within the mask that it is + // and-ed with. The extract operation will copy these bits, and so the + // mask cannot any holes in it that would clear any of the bits of the + // extracted field. + if (!LogicalSR) { + // If the shift right was arithmetic, it could have included some 1 bits. + // It is still ok to generate extract, but only if the mask eliminates + // those bits (i.e. M does not have any bits set beyond U). + APInt C = APInt::getHighBitsSet(BW, BW-U); + if (M.intersects(C) || !APIntOps::isMask(W, M)) + return false; + } else { + // Check if M starts with a contiguous sequence of W times 1 bits. Get + // the low U bits of M (which eliminates the 0 bits shifted in on the + // left), and check if the result is APInt's "mask": + if (!APIntOps::isMask(W, M.getLoBits(U))) + return false; + } + + IRBuilder<> IRB(BB, In); + Intrinsic::ID IntId = (BW == 32) ? Intrinsic::hexagon_S2_extractu + : Intrinsic::hexagon_S2_extractup; + Module *Mod = BB->getParent()->getParent(); + Value *ExtF = Intrinsic::getDeclaration(Mod, IntId); + Value *NewIn = IRB.CreateCall(ExtF, {BF, IRB.getInt32(W), IRB.getInt32(SR)}); + if (SL != 0) + NewIn = IRB.CreateShl(NewIn, SL, CSL->getName()); + In->replaceAllUsesWith(NewIn); + return true; +} + + +bool HexagonGenExtract::visitBlock(BasicBlock *B) { + // Depth-first, bottom-up traversal. + DomTreeNode *DTN = DT->getNode(B); + typedef GraphTraits GTN; + typedef GTN::ChildIteratorType Iter; + for (Iter I = GTN::child_begin(DTN), E = GTN::child_end(DTN); I != E; ++I) + visitBlock((*I)->getBlock()); + + // Allow limiting the number of generated extracts for debugging purposes. + bool HasCutoff = ExtractCutoff.getPosition(); + unsigned Cutoff = ExtractCutoff; + + bool Changed = false; + BasicBlock::iterator I = std::prev(B->end()), NextI, Begin = B->begin(); + while (true) { + if (HasCutoff && (ExtractCount >= Cutoff)) + return Changed; + bool Last = (I == Begin); + if (!Last) + NextI = std::prev(I); + Instruction *In = &*I; + bool Done = convert(In); + if (HasCutoff && Done) + ExtractCount++; + Changed |= Done; + if (Last) + break; + I = NextI; + } + return Changed; +} + + +bool HexagonGenExtract::runOnFunction(Function &F) { + DT = &getAnalysis().getDomTree(); + bool Changed; + + // Traverse the function bottom-up, to see super-expressions before their + // sub-expressions. + BasicBlock *Entry = GraphTraits::getEntryNode(&F); + Changed = visitBlock(Entry); + + return Changed; +} + + +FunctionPass *llvm::createHexagonGenExtract() { + return new HexagonGenExtract(); +} diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index 7d9d02d49f4..8ada43fbe4f 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -43,6 +43,8 @@ static cl::opt EnableGenInsert("hexagon-insert", cl::init(true), static cl::opt EnableCommGEP("hexagon-commgep", cl::init(true), cl::Hidden, cl::ZeroOrMore, cl::desc("Enable commoning of GEP instructions")); +static cl::opt EnableGenExtract("hexagon-extract", cl::init(true), + cl::Hidden, cl::desc("Generate \"extract\" instructions")); /// HexagonTargetMachineModule - Note that this is used on hosts that /// cannot link in a library unless there are references into the @@ -66,24 +68,22 @@ SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler", createVLIWMachineSched); namespace llvm { + FunctionPass *createHexagonCFGOptimizer(); FunctionPass *createHexagonCommonGEP(); + FunctionPass *createHexagonCopyToCombine(); FunctionPass *createHexagonExpandCondsets(); - FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM, - CodeGenOpt::Level OptLevel); - FunctionPass *createHexagonDelaySlotFillerPass(const TargetMachine &TM); - FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM); - FunctionPass *createHexagonCFGOptimizer(); - - FunctionPass *createHexagonSplitConst32AndConst64(); FunctionPass *createHexagonExpandPredSpillCode(); + FunctionPass *createHexagonFixupHwLoops(); + FunctionPass *createHexagonGenExtract(); FunctionPass *createHexagonGenInsert(); FunctionPass *createHexagonHardwareLoops(); - FunctionPass *createHexagonPeephole(); - FunctionPass *createHexagonFixupHwLoops(); + FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM, + CodeGenOpt::Level OptLevel); FunctionPass *createHexagonNewValueJump(); - FunctionPass *createHexagonCopyToCombine(); FunctionPass *createHexagonPacketizer(); - FunctionPass *createHexagonNewValueJump(); + FunctionPass *createHexagonPeephole(); + FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM); + FunctionPass *createHexagonSplitConst32AndConst64(); } // end namespace llvm; /// HexagonTargetMachine ctor - Create an ILP32 architecture model. @@ -147,8 +147,13 @@ void HexagonPassConfig::addIRPasses() { bool NoOpt = (getOptLevel() == CodeGenOpt::None); addPass(createAtomicExpandPass(TM)); - if (!NoOpt && EnableCommGEP) - addPass(createHexagonCommonGEP()); + if (!NoOpt) { + if (EnableCommGEP) + addPass(createHexagonCommonGEP()); + // Replace certain combinations of shifts and ands with extracts. + if (EnableGenExtract) + addPass(createHexagonGenExtract()); + } } bool HexagonPassConfig::addInstSelector() { diff --git a/test/CodeGen/Hexagon/extract-basic.ll b/test/CodeGen/Hexagon/extract-basic.ll new file mode 100644 index 00000000000..c75125cedd3 --- /dev/null +++ b/test/CodeGen/Hexagon/extract-basic.ll @@ -0,0 +1,76 @@ +; RUN: llc -O2 -march=hexagon < %s | FileCheck %s + +; CHECK-DAG: extractu(r{{[0-9]*}}, #3, #4) +; CHECK-DAG: extractu(r{{[0-9]*}}, #8, #7) +; CHECK-DAG: extractu(r{{[0-9]*}}, #8, #16) + +; C source: +; typedef struct { +; unsigned x1:3; +; unsigned x2:7; +; unsigned x3:8; +; unsigned x4:12; +; unsigned x5:2; +; } structx_t; +; +; typedef struct { +; unsigned y1:4; +; unsigned y2:3; +; unsigned y3:9; +; unsigned y4:8; +; unsigned y5:8; +; } structy_t; +; +; void foo(structx_t *px, structy_t *py) { +; px->x1 = py->y1; +; px->x2 = py->y2; +; px->x3 = py->y3; +; px->x4 = py->y4; +; px->x5 = py->y5; +; } + +target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32" +target triple = "hexagon" + +%struct.structx_t = type { i8, i8, i8, i8 } +%struct.structy_t = type { i8, i8, i8, i8 } + +define void @foo(%struct.structx_t* nocapture %px, %struct.structy_t* nocapture %py) nounwind { +entry: + %0 = bitcast %struct.structy_t* %py to i32* + %1 = load i32, i32* %0, align 4 + %bf.value = and i32 %1, 7 + %2 = bitcast %struct.structx_t* %px to i32* + %3 = load i32, i32* %2, align 4 + %4 = and i32 %3, -8 + %5 = or i32 %4, %bf.value + store i32 %5, i32* %2, align 4 + %6 = load i32, i32* %0, align 4 + %7 = lshr i32 %6, 4 + %bf.clear1 = shl nuw nsw i32 %7, 3 + %8 = and i32 %bf.clear1, 56 + %9 = and i32 %5, -1017 + %10 = or i32 %8, %9 + store i32 %10, i32* %2, align 4 + %11 = load i32, i32* %0, align 4 + %12 = lshr i32 %11, 7 + %bf.value4 = shl i32 %12, 10 + %13 = and i32 %bf.value4, 261120 + %14 = and i32 %10, -262081 + %15 = or i32 %14, %13 + store i32 %15, i32* %2, align 4 + %16 = load i32, i32* %0, align 4 + %17 = lshr i32 %16, 16 + %bf.clear5 = shl i32 %17, 18 + %18 = and i32 %bf.clear5, 66846720 + %19 = and i32 %15, -1073480641 + %20 = or i32 %19, %18 + store i32 %20, i32* %2, align 4 + %21 = load i32, i32* %0, align 4 + %22 = lshr i32 %21, 24 + %23 = shl i32 %22, 30 + %24 = and i32 %20, 67107903 + %25 = or i32 %24, %23 + store i32 %25, i32* %2, align 4 + ret void +} diff --git a/test/CodeGen/Hexagon/fusedandshift.ll b/test/CodeGen/Hexagon/fusedandshift.ll index 59a1e1d84fc..414574aec40 100644 --- a/test/CodeGen/Hexagon/fusedandshift.ll +++ b/test/CodeGen/Hexagon/fusedandshift.ll @@ -1,5 +1,6 @@ -; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s +; RUN: llc -march=hexagon -hexagon-extract=0 < %s | FileCheck %s ; Check that we generate fused logical and with shift instruction. +; Disable "extract" generation, since it may eliminate the and/lsr. ; CHECK: r{{[0-9]+}} = and(#15, lsr(r{{[0-9]+}}, #{{[0-9]+}}) -- 2.11.0