void Cfg::loopInvariantCodeMotion() {
TimerMarker T(TimerStack::TT_loopInvariantCodeMotion, this);
// Does not introduce new nodes as of now.
- for (auto &Pair : LoopInfo) {
- CfgNode *Header = Nodes[Pair.first];
+ for (auto &Loop : LoopInfo) {
+ CfgNode *Header = Loop.Header;
assert(Header);
if (Header->getLoopNestDepth() < 1)
- continue;
- CfgNode *PreHeader = nullptr;
- for (auto *Pred : Header->getInEdges()) {
- if (Pred->getLoopNestDepth() == Header->getLoopNestDepth() - 1) {
- if (PreHeader == nullptr) {
- PreHeader = Pred;
- } else {
- PreHeader = nullptr;
- break;
- // Do not consider cases with two incoming edges.
- // Will require insertion of nodes.
- }
- }
- }
+ return;
+ CfgNode *PreHeader = Loop.PreHeader;
if (PreHeader == nullptr || PreHeader->getInsts().size() == 0) {
- continue; // to next loop
+ return; // try next loop
}
auto &Insts = PreHeader->getInsts();
auto &LastInst = Insts.back();
Insts.pop_back();
- for (auto *Inst : findLoopInvariantInstructions(Pair.first)) {
+ for (auto *Inst : findLoopInvariantInstructions(Loop.Body)) {
PreHeader->appendInst(Inst);
}
PreHeader->appendInst(&LastInst);
}
}
-Ice::CfgVector<Inst *>
-Cfg::findLoopInvariantInstructions(Ice::SizeT LoopHeaderIndex) {
+CfgVector<Inst *>
+Cfg::findLoopInvariantInstructions(const CfgUnorderedSet<SizeT> &Body) {
CfgUnorderedSet<Inst *> InvariantInsts;
CfgUnorderedSet<Variable *> InvariantVars;
for (auto *Var : getArgs()) {
bool Changed = false;
do {
Changed = false;
- for (auto NodeIndex : LoopInfo[LoopHeaderIndex]) {
+ for (auto NodeIndex : Body) {
auto *Node = Nodes[NodeIndex];
CfgVector<std::reference_wrapper<Inst>> Insts(Node->getInsts().begin(),
Node->getInsts().end());
void Cfg::generateLoopInfo() {
TimerMarker T(TimerStack::TT_computeLoopNestDepth, this);
- LoopInfo = LoopAnalyzer(this).getLoopInfo();
+ LoopInfo = ComputeLoopInfo(this);
}
// This is a lightweight version of live-range-end calculation. Marks the last
#include "IceClFlags.h"
#include "IceDefs.h"
#include "IceGlobalContext.h"
+#include "IceLoopAnalyzer.h"
#include "IceStringPool.h"
#include "IceTypes.h"
uint32_t CombinedAlignment, InstList &Insts,
AllocaBaseVariableType BaseVariableType);
void findRematerializable();
- CfgVector<Inst *> findLoopInvariantInstructions(SizeT LoopHeaderIndex);
+ CfgVector<Inst *>
+ findLoopInvariantInstructions(const CfgUnorderedSet<SizeT> &Body);
GlobalContext *Ctx;
uint32_t SequenceNumber; /// output order for emission
/// Globals required by this CFG. Mostly used for the profiler's globals.
std::unique_ptr<VariableDeclarationList> GlobalInits;
CfgVector<InstJumpTable *> JumpTables;
- CfgUnorderedMap<SizeT, CfgVector<SizeT>> LoopInfo;
/// CurrentNode is maintained during dumping/emitting just for validating
/// Variable::DefNode. Normally, a traversal over CfgNodes maintains this, but
/// before global operations like register allocation, resetCurrentNode()
/// should be called to avoid spurious validation failures.
const CfgNode *CurrentNode = nullptr;
+ CfgVector<Loop> LoopInfo;
public:
static void TlsInit() { CfgAllocatorTraits::init(); }
#include "IceCfg.h"
#include "IceCfgNode.h"
+#include <algorithm>
+
namespace Ice {
+class LoopAnalyzer {
+public:
+ explicit LoopAnalyzer(Cfg *Func);
+
+ /// Use Tarjan's strongly connected components algorithm to identify outermost
+ /// to innermost loops. By deleting the head of the loop from the graph, inner
+ /// loops can be found. This assumes that the head node is not shared between
+ /// loops but instead all paths to the head come from 'continue' constructs.
+ ///
+ /// This only computes the loop nest depth within the function and does not
+ /// take into account whether the function was called from within a loop.
+ // TODO(ascull): this currently uses a extension of Tarjan's algorithm with
+ // is bounded linear. ncbray suggests another algorithm which is linear in
+ // practice but not bounded linear. I think it also finds dominators.
+ // http://lenx.100871.net/papers/loop-SAS.pdf
+
+ CfgVector<CfgUnorderedSet<SizeT>> getLoopBodies() { return Loops; }
+
+private:
+ LoopAnalyzer() = delete;
+ LoopAnalyzer(const LoopAnalyzer &) = delete;
+ LoopAnalyzer &operator=(const LoopAnalyzer &) = delete;
+ void computeLoopNestDepth();
+
+ using IndexT = uint32_t;
+ static constexpr IndexT UndefinedIndex = 0;
+ static constexpr IndexT FirstDefinedIndex = 1;
+
+ // TODO(ascull): classify the other fields
+ class LoopNode {
+ LoopNode() = delete;
+ LoopNode operator=(const LoopNode &) = delete;
+
+ public:
+ explicit LoopNode(CfgNode *BB) : BB(BB) { reset(); }
+ LoopNode(const LoopNode &) = default;
+
+ void reset();
+
+ NodeList::const_iterator successorsEnd() const;
+ NodeList::const_iterator currentSuccessor() const { return Succ; }
+ void nextSuccessor() { ++Succ; }
+
+ void visit(IndexT VisitIndex) { Index = LowLink = VisitIndex; }
+ bool isVisited() const { return Index != UndefinedIndex; }
+ IndexT getIndex() const { return Index; }
+
+ void tryLink(IndexT NewLink) {
+ if (NewLink < LowLink)
+ LowLink = NewLink;
+ }
+ IndexT getLowLink() const { return LowLink; }
+
+ void setOnStack(bool NewValue = true) { OnStack = NewValue; }
+ bool isOnStack() const { return OnStack; }
+
+ void setDeleted() { Deleted = true; }
+ bool isDeleted() const { return Deleted; }
+
+ void incrementLoopNestDepth();
+ bool hasSelfEdge() const;
+ CfgNode *getNode() { return BB; }
+
+ private:
+ CfgNode *BB;
+ NodeList::const_iterator Succ;
+ IndexT Index;
+ IndexT LowLink;
+ bool OnStack;
+ bool Deleted = false;
+ };
+
+ using LoopNodeList = CfgVector<LoopNode>;
+ using LoopNodePtrList = CfgVector<LoopNode *>;
+
+ /// Process the node as part as part of Tarjan's algorithm and return either a
+ /// node to recurse into or nullptr when the node has been fully processed.
+ LoopNode *processNode(LoopNode &Node);
+
+ /// The function to analyze for loops.
+ Cfg *const Func;
+ /// A list of decorated nodes in the same order as Func->getNodes() which
+ /// means the node's index will also be valid in this list.
+ LoopNodeList AllNodes;
+ /// This is used as a replacement for the call stack.
+ LoopNodePtrList WorkStack;
+ /// Track which loop a node belongs to.
+ LoopNodePtrList LoopStack;
+ /// The index to assign to the next visited node.
+ IndexT NextIndex = FirstDefinedIndex;
+ /// The number of nodes which have been marked deleted. This is used to track
+ /// when the iteration should end.
+ LoopNodePtrList::size_type NumDeletedNodes = 0;
+
+ /// All the Loops, in descending order of size
+ CfgVector<CfgUnorderedSet<SizeT>> Loops;
+};
void LoopAnalyzer::LoopNode::reset() {
if (Deleted)
return;
if (*It == &Node) {
(*It)->setDeleted();
++NumDeletedNodes;
- CfgVector<SizeT> LoopNodes;
+ CfgUnorderedSet<SizeT> LoopNodes;
for (auto LoopIter = It.base() - 1; LoopIter != LoopStack.end();
++LoopIter) {
- LoopNodes.push_back((*LoopIter)->getNode()->getIndex());
+ LoopNodes.insert((*LoopIter)->getNode()->getIndex());
}
- Loops[(*It)->getNode()->getIndex()] = LoopNodes;
+ Loops.push_back(LoopNodes);
LoopStack.erase(It.base() - 1, LoopStack.end());
break;
}
return nullptr;
}
+CfgVector<Loop> ComputeLoopInfo(Cfg *Func) {
+ auto LoopBodies = LoopAnalyzer(Func).getLoopBodies();
+
+ CfgVector<Loop> Loops;
+ Loops.reserve(LoopBodies.size());
+ std::sort(
+ LoopBodies.begin(), LoopBodies.end(),
+ [](const CfgUnorderedSet<SizeT> &A, const CfgUnorderedSet<SizeT> &B) {
+ return A.size() > B.size();
+ });
+ for (auto &LoopBody : LoopBodies) {
+ CfgNode *Header = nullptr;
+ bool IsSimpleLoop = true;
+ for (auto NodeIndex : LoopBody) {
+ CfgNode *Cur = Func->getNodes()[NodeIndex];
+ for (auto *Prev : Cur->getInEdges()) {
+ if (LoopBody.find(Prev->getIndex()) ==
+ LoopBody.end()) { // coming from outside
+ if (Header == nullptr) {
+ Header = Cur;
+ } else {
+ Header = nullptr;
+ IsSimpleLoop = false;
+ break;
+ }
+ }
+ }
+ if (!IsSimpleLoop) {
+ break;
+ }
+ }
+ if (!IsSimpleLoop)
+ continue; // To next potential loop
+
+ CfgNode *PreHeader = nullptr;
+ for (auto *Prev : Header->getInEdges()) {
+ if (LoopBody.find(Prev->getIndex()) == LoopBody.end()) {
+ if (PreHeader == nullptr) {
+ PreHeader = Prev;
+ } else {
+ PreHeader = nullptr;
+ break;
+ }
+ }
+ }
+
+ Loops.emplace_back(Header, PreHeader, LoopBody);
+ }
+ return Loops;
+}
} // end of namespace Ice
namespace Ice {
-/// Analyze a function's CFG for loops. The CFG must not change during the
-/// lifetime of this object.
-class LoopAnalyzer {
- LoopAnalyzer() = delete;
- LoopAnalyzer(const LoopAnalyzer &) = delete;
- LoopAnalyzer &operator=(const LoopAnalyzer &) = delete;
-
-public:
- explicit LoopAnalyzer(Cfg *Func);
-
- /// Use Tarjan's strongly connected components algorithm to identify outermost
- /// to innermost loops. By deleting the head of the loop from the graph, inner
- /// loops can be found. This assumes that the head node is not shared between
- /// loops but instead all paths to the head come from 'continue' constructs.
- ///
- /// This only computes the loop nest depth within the function and does not
- /// take into account whether the function was called from within a loop.
- // TODO(ascull): this currently uses a extension of Tarjan's algorithm with
- // is bounded linear. ncbray suggests another algorithm which is linear in
- // practice but not bounded linear. I think it also finds dominators.
- // http://lenx.100871.net/papers/loop-SAS.pdf
- CfgUnorderedMap<SizeT, CfgVector<SizeT>> getLoopInfo() { return Loops; }
-
-private:
- void computeLoopNestDepth();
- using IndexT = uint32_t;
- static constexpr IndexT UndefinedIndex = 0;
- static constexpr IndexT FirstDefinedIndex = 1;
-
- // TODO(ascull): classify the other fields
- class LoopNode {
- LoopNode() = delete;
- LoopNode operator=(const LoopNode &) = delete;
-
- public:
- explicit LoopNode(CfgNode *BB) : BB(BB) { reset(); }
- LoopNode(const LoopNode &) = default;
-
- void reset();
-
- NodeList::const_iterator successorsEnd() const;
- NodeList::const_iterator currentSuccessor() const { return Succ; }
- void nextSuccessor() { ++Succ; }
-
- void visit(IndexT VisitIndex) { Index = LowLink = VisitIndex; }
- bool isVisited() const { return Index != UndefinedIndex; }
- IndexT getIndex() const { return Index; }
-
- void tryLink(IndexT NewLink) {
- if (NewLink < LowLink)
- LowLink = NewLink;
- }
- IndexT getLowLink() const { return LowLink; }
-
- void setOnStack(bool NewValue = true) { OnStack = NewValue; }
- bool isOnStack() const { return OnStack; }
-
- void setDeleted() { Deleted = true; }
- bool isDeleted() const { return Deleted; }
-
- void incrementLoopNestDepth();
- bool hasSelfEdge() const;
-
- CfgNode *getNode() { return BB; }
-
- private:
- CfgNode *BB;
- NodeList::const_iterator Succ;
- IndexT Index;
- IndexT LowLink;
- bool OnStack;
- bool Deleted = false;
- };
-
- using LoopNodeList = CfgVector<LoopNode>;
- using LoopNodePtrList = CfgVector<LoopNode *>;
-
- /// Process the node as part as part of Tarjan's algorithm and return either a
- /// node to recurse into or nullptr when the node has been fully processed.
- LoopNode *processNode(LoopNode &Node);
-
- /// The function to analyze for loops.
- Cfg *const Func;
- /// A list of decorated nodes in the same order as Func->getNodes() which
- /// means the node's index will also be valid in this list.
- LoopNodeList AllNodes;
- /// This is used as a replacement for the call stack.
- LoopNodePtrList WorkStack;
- /// Track which loop a node belongs to.
- LoopNodePtrList LoopStack;
- /// The index to assign to the next visited node.
- IndexT NextIndex = FirstDefinedIndex;
- /// The number of nodes which have been marked deleted. This is used to track
- /// when the iteration should end.
- LoopNodePtrList::size_type NumDeletedNodes = 0;
- /// Detailed loop information
- CfgUnorderedMap<SizeT, CfgVector<SizeT>> Loops;
+struct Loop {
+ Loop(CfgNode *Header, CfgNode *PreHeader, CfgUnorderedSet<SizeT> Body)
+ : Header(Header), PreHeader(PreHeader), Body(Body) {}
+ CfgNode *Header;
+ CfgNode *PreHeader;
+ CfgUnorderedSet<SizeT> Body; // Node IDs
};
+CfgVector<Loop> ComputeLoopInfo(Cfg *Func);
+
} // end of namespace Ice
#endif // SUBZERO_SRC_ICELOOPANALYZER_H