From c8d11e859317afe754a32b972966db56544b8378 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Fri, 29 Jan 2016 17:22:43 +0000 Subject: [PATCH] Temporarily revert "[ScheduleDAGInstrs::buildSchedGraph()] Handling of memory dependecies rewritten." Some buildbot failures needs to be debugged. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@259213 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/PseudoSourceValue.h | 4 - include/llvm/CodeGen/ScheduleDAG.h | 11 - include/llvm/CodeGen/ScheduleDAGInstrs.h | 70 +-- lib/CodeGen/ScheduleDAGInstrs.cpp | 723 +++++++++++++++---------------- 4 files changed, 351 insertions(+), 457 deletions(-) diff --git a/include/llvm/CodeGen/PseudoSourceValue.h b/include/llvm/CodeGen/PseudoSourceValue.h index c3f6fde9fb3..f67552030db 100644 --- a/include/llvm/CodeGen/PseudoSourceValue.h +++ b/include/llvm/CodeGen/PseudoSourceValue.h @@ -27,8 +27,6 @@ class MachineMemOperand; class raw_ostream; raw_ostream &operator<<(raw_ostream &OS, const MachineMemOperand &MMO); -class PseudoSourceValue; -raw_ostream &operator<<(raw_ostream &OS, const PseudoSourceValue* PSV); /// Special value supplied for machine level alias analysis. It indicates that /// a memory access references the functions stack frame (e.g., a spill slot), @@ -47,8 +45,6 @@ public: private: PSVKind Kind; - friend raw_ostream &llvm::operator<<(raw_ostream &OS, - const PseudoSourceValue* PSV); friend class MachineMemOperand; // For printCustom(). diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h index 6469cabd3de..bda9dbd51ff 100644 --- a/include/llvm/CodeGen/ScheduleDAG.h +++ b/include/llvm/CodeGen/ScheduleDAG.h @@ -396,17 +396,6 @@ namespace llvm { /// specified node. bool addPred(const SDep &D, bool Required = true); - /// addPredBarrier - This adds a barrier edge to SU by calling - /// addPred(), with latency 0 generally or latency 1 for a store - /// followed by a load. - bool addPredBarrier(SUnit *SU) { - SDep Dep(SU, SDep::Barrier); - unsigned TrueMemOrderLatency = - ((SU->getInstr()->mayStore() && this->getInstr()->mayLoad()) ? 1 : 0); - Dep.setLatency(TrueMemOrderLatency); - return addPred(Dep); - } - /// removePred - This removes the specified edge as a pred of the current /// node if it exists. It also removes the current node as a successor of /// the specified node. diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h index a83a5158689..70537d1895f 100644 --- a/include/llvm/CodeGen/ScheduleDAGInstrs.h +++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -15,14 +15,12 @@ #ifndef LLVM_CODEGEN_SCHEDULEDAGINSTRS_H #define LLVM_CODEGEN_SCHEDULEDAGINSTRS_H -#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SparseMultiSet.h" #include "llvm/ADT/SparseSet.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/Support/Compiler.h" #include "llvm/Target/TargetRegisterInfo.h" -#include namespace llvm { class MachineFrameInfo; @@ -86,10 +84,6 @@ namespace llvm { typedef SparseMultiSet VReg2SUnitOperIdxMultiMap; - typedef PointerUnion ValueType; - typedef SmallVector, 4> - UnderlyingObjectsVector; - /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of /// MachineInstrs. class ScheduleDAGInstrs : public ScheduleDAG { @@ -155,66 +149,10 @@ namespace llvm { /// Tracks the last instructions in this region using each virtual register. VReg2SUnitOperIdxMultiMap CurrentVRegUses; - AliasAnalysis *AAForDep; - - /// Remember a generic side-effecting instruction as we proceed. - /// No other SU ever gets scheduled around it (except in the special - /// case of a huge region that gets reduced). - SUnit *BarrierChain; - - public: - - /// A list of SUnits, used in Value2SUsMap, during DAG construction. - /// Note: to gain speed it might be worth investigating an optimized - /// implementation of this data structure, such as a singly linked list - /// with a memory pool (SmallVector was tried but slow and SparseSet is not - /// applicable). - typedef std::list SUList; - protected: - /// A map from ValueType to SUList, used during DAG construction, - /// as a means of remembering which SUs depend on which memory - /// locations. - class Value2SUsMap; - - /// Remove in FIFO order some SUs from huge maps. - void reduceHugeMemNodeMaps(Value2SUsMap &stores, - Value2SUsMap &loads, unsigned N); - - /// Add a chain edge between SUa and SUb, but only if both AliasAnalysis - /// and Target fail to deny the dependency. - void addChainDependency(SUnit *SUa, SUnit *SUb, - unsigned Latency = 0); - - /// Add dependencies as needed from all SUs in list to SU. - void addChainDependencies(SUnit *SU, SUList &sus, unsigned Latency) { - for (auto *su : sus) - addChainDependency(SU, su, Latency); - } - - /// Add dependencies as needed from all SUs in map, to SU. - void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap); - - /// Add dependencies as needed to SU, from all SUs mapped to V. - void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap, - ValueType V); - - /// Add barrier chain edges from all SUs in map, and then clear - /// the map. This is equivalent to insertBarrierChain(), but - /// optimized for the common case where the new BarrierChain (a - /// global memory object) has a higher NodeNum than all SUs in - /// map. It is assumed BarrierChain has been set before calling - /// this. - void addBarrierChain(Value2SUsMap &map); - - /// Insert a barrier chain in a huge region, far below current - /// SU. Add barrier chain edges from all SUs in map with higher - /// NodeNums than this new BarrierChain, and remove them from - /// map. It is assumed BarrierChain has been set before calling - /// this. - void insertBarrierChain(Value2SUsMap &map); - - /// For an unanalyzable memory access, this Value is used in maps. - UndefValue *UnknownValue; + /// PendingLoads - Remember where unknown loads are after the most recent + /// unknown store, as we iterate. As with Defs and Uses, this is here + /// to minimize construction/destruction. + std::vector PendingLoads; /// DbgValues - Remember instruction that precedes DBG_VALUE. /// These are generated by buildSchedGraph but persist so they can be diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 37695f12e3b..00a0b0fc33a 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -14,6 +14,7 @@ #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/ADT/IntEqClasses.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -27,8 +28,6 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/ScheduleDFS.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Type.h" #include "llvm/IR/Operator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -51,42 +50,12 @@ static cl::opt EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, static cl::opt UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction")); -// Note: the two options below might be used in tuning compile time vs -// output quality. Setting HugeRegion so large that it will never be -// reached means best-effort, but may be slow. - -// When Stores and Loads maps (or NonAliasStores and NonAliasLoads) -// together hold this many SUs, a reduction of maps will be done. -static cl::opt HugeRegion("dag-maps-huge-region", cl::Hidden, - cl::init(1000), cl::desc("The limit to use while constructing the DAG " - "prior to scheduling, at which point a trade-off " - "is made to avoid excessive compile time.")); - -static cl::opt ReductionSize("dag-maps-reduction-size", cl::Hidden, - cl::desc("A huge scheduling region will have maps reduced by this many " - "nodes at a time. Defaults to HugeRegion / 2.")); - -static void dumpSUList(ScheduleDAGInstrs::SUList &L) { -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - dbgs() << "{ "; - for (auto *su : L) { - dbgs() << "SU(" << su->NodeNum << ")"; - if (su != L.back()) - dbgs() << ", "; - } - dbgs() << "}\n"; -#endif -} - ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo *mli, bool RemoveKillFlags) : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), RemoveKillFlags(RemoveKillFlags), CanHandleTerminators(false), - TrackLaneMasks(false), AAForDep(nullptr), BarrierChain(nullptr), - UnknownValue(UndefValue::get( - Type::getVoidTy(mf.getFunction()->getContext()))), - FirstDbgValue(nullptr) { + TrackLaneMasks(false), FirstDbgValue(nullptr) { DbgValues.clear(); const TargetSubtargetInfo &ST = mf.getSubtarget(); @@ -152,6 +121,10 @@ static void getUnderlyingObjects(const Value *V, } while (!Working.empty()); } +typedef PointerUnion ValueType; +typedef SmallVector, 4> +UnderlyingObjectsVector; + /// getUnderlyingObjectsForInstr - If this machine instr has memory reference /// information and it can be tracked to a normal reference to a known /// object, return the Value for that object. @@ -571,31 +544,41 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI, return true; } - if ((*MI->memoperands_begin())->getValue() == nullptr) + const Value *V = (*MI->memoperands_begin())->getValue(); + if (!V) return true; + SmallVector Objs; + getUnderlyingObjects(V, Objs, DL); + for (Value *V : Objs) { + // Does this pointer refer to a distinct and identifiable object? + if (!isIdentifiedObject(V)) + return true; + } + return false; } /// This returns true if the two MIs need a chain edge between them. -/// This is called on normal stores and loads. +/// If these are not even memory operations, we still may need +/// chain deps between them. The question really is - could +/// these two MIs be reordered during scheduling from memory dependency +/// point of view. static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, const DataLayout &DL, MachineInstr *MIa, MachineInstr *MIb) { const MachineFunction *MF = MIa->getParent()->getParent(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); - assert ((MIa->mayStore() || MIb->mayStore()) && - "Dependency checked between two loads"); - - // buildSchedGraph() will clear list of stores if not using AA, - // which means all stores have to be chained without AA. - if (!AA && MIa->mayStore() && MIb->mayStore()) - return true; - - // Let the target decide if memory accesses cannot possibly overlap. - if (TII->areMemAccessesTriviallyDisjoint(MIa, MIb, AA)) + // Cover a trivial case - no edge is need to itself. + if (MIa == MIb) return false; + + // Let the target decide if memory accesses cannot possibly overlap. + if ((MIa->mayLoad() || MIa->mayStore()) && + (MIb->mayLoad() || MIb->mayStore())) + if (TII->areMemAccessesTriviallyDisjoint(MIa, MIb, AA)) + return false; // FIXME: Need to handle multiple memory operands to support all targets. if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand()) @@ -604,6 +587,11 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, if (isUnsafeMemoryObject(MIa, MFI, DL) || isUnsafeMemoryObject(MIb, MFI, DL)) return true; + // If we are dealing with two "normal" loads, we do not need an edge + // between them - they could be reordered. + if (!MIa->mayStore() && !MIb->mayStore()) + return false; + // To this point analysis is generic. From here on we do need AA. if (!AA) return true; @@ -646,15 +634,106 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, return (AAResult != NoAlias); } -/// Check whether two objects need a chain edge and add it if needed. -void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb, - unsigned Latency) { - if (MIsNeedChainEdge(AAForDep, MFI, MF.getDataLayout(), SUa->getInstr(), - SUb->getInstr())) { - SDep Dep(SUa, SDep::MayAliasMem); - Dep.setLatency(Latency); +/// This recursive function iterates over chain deps of SUb looking for +/// "latest" node that needs a chain edge to SUa. +static unsigned iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI, + const DataLayout &DL, SUnit *SUa, SUnit *SUb, + SUnit *ExitSU, unsigned *Depth, + SmallPtrSetImpl &Visited) { + if (!SUa || !SUb || SUb == ExitSU) + return *Depth; + + // Remember visited nodes. + if (!Visited.insert(SUb).second) + return *Depth; + // If there is _some_ dependency already in place, do not + // descend any further. + // TODO: Need to make sure that if that dependency got eliminated or ignored + // for any reason in the future, we would not violate DAG topology. + // Currently it does not happen, but makes an implicit assumption about + // future implementation. + // + // Independently, if we encounter node that is some sort of global + // object (like a call) we already have full set of dependencies to it + // and we can stop descending. + if (SUa->isSucc(SUb) || + isGlobalMemoryObject(AA, SUb->getInstr())) + return *Depth; + + // If we do need an edge, or we have exceeded depth budget, + // add that edge to the predecessors chain of SUb, + // and stop descending. + if (*Depth > 200 || + MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) { + SUb->addPred(SDep(SUa, SDep::MayAliasMem)); + return *Depth; + } + // Track current depth. + (*Depth)++; + // Iterate over memory dependencies only. + for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end(); + I != E; ++I) + if (I->isNormalMemoryOrBarrier()) + iterateChainSucc(AA, MFI, DL, SUa, I->getSUnit(), ExitSU, Depth, Visited); + return *Depth; +} + +/// This function assumes that "downward" from SU there exist +/// tail/leaf of already constructed DAG. It iterates downward and +/// checks whether SU can be aliasing any node dominated +/// by it. +static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI, + const DataLayout &DL, SUnit *SU, SUnit *ExitSU, + std::set &CheckList, + unsigned LatencyToLoad) { + if (!SU) + return; + + SmallPtrSet Visited; + unsigned Depth = 0; + + for (std::set::iterator I = CheckList.begin(), IE = CheckList.end(); + I != IE; ++I) { + if (SU == *I) + continue; + if (MIsNeedChainEdge(AA, MFI, DL, SU->getInstr(), (*I)->getInstr())) { + SDep Dep(SU, SDep::MayAliasMem); + Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0); + (*I)->addPred(Dep); + } + + // Iterate recursively over all previously added memory chain + // successors. Keep track of visited nodes. + for (SUnit::const_succ_iterator J = (*I)->Succs.begin(), + JE = (*I)->Succs.end(); J != JE; ++J) + if (J->isNormalMemoryOrBarrier()) + iterateChainSucc(AA, MFI, DL, SU, J->getSUnit(), ExitSU, &Depth, + Visited); + } +} + +/// Check whether two objects need a chain edge, if so, add it +/// otherwise remember the rejected SU. +static inline void addChainDependency(AliasAnalysis *AA, + const MachineFrameInfo *MFI, + const DataLayout &DL, SUnit *SUa, + SUnit *SUb, std::set &RejectList, + unsigned TrueMemOrderLatency = 0, + bool isNormalMemory = false) { + // If this is a false dependency, + // do not add the edge, but remember the rejected node. + if (MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) { + SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier); + Dep.setLatency(TrueMemOrderLatency); SUb->addPred(Dep); } + else { + // Duplicate entries should be ignored. + RejectList.insert(SUb); + DEBUG(dbgs() << "\tReject chain dep between SU(" + << SUa->NodeNum << ") and SU(" + << SUb->NodeNum << ")\n"); + } } /// Create an SUnit for each real instruction, numbered in top-down topological @@ -753,122 +832,6 @@ void ScheduleDAGInstrs::collectVRegUses(SUnit *SU) { } } -class ScheduleDAGInstrs::Value2SUsMap : public MapVector { - - /// Current total number of SUs in map. - unsigned NumNodes; - - /// 1 for loads, 0 for stores. (see comment in SUList) - unsigned TrueMemOrderLatency; -public: - - Value2SUsMap(unsigned lat = 0) : NumNodes(0), TrueMemOrderLatency(lat) {} - - /// To keep NumNodes up to date, insert() is used instead of - /// this operator w/ push_back(). - ValueType &operator[](const SUList &Key) { - llvm_unreachable("Don't use. Use insert() instead."); }; - - /// Add SU to the SUList of V. If Map grows huge, reduce its size - /// by calling reduce(). - void inline insert(SUnit *SU, ValueType V) { - MapVector::operator[](V).push_back(SU); - NumNodes++; - } - - /// Clears the list of SUs mapped to V. - void inline clearList(ValueType V) { - iterator Itr = find(V); - if (Itr != end()) { - assert (NumNodes >= Itr->second.size()); - NumNodes -= Itr->second.size(); - - Itr->second.clear(); - } - } - - /// Clears map from all contents. - void clear() { - MapVector::clear(); - NumNodes = 0; - } - - unsigned inline size() const { return NumNodes; } - - /// Count the number of SUs in this map after a reduction. - void reComputeSize(void) { - NumNodes = 0; - for (auto &I : *this) - NumNodes += I.second.size(); - } - - unsigned inline getTrueMemOrderLatency() const { - return TrueMemOrderLatency; - } - - void dump(); -}; - -void ScheduleDAGInstrs::addChainDependencies(SUnit *SU, - Value2SUsMap &Val2SUsMap) { - for (auto &I : Val2SUsMap) - addChainDependencies(SU, I.second, - Val2SUsMap.getTrueMemOrderLatency()); -} - -void ScheduleDAGInstrs::addChainDependencies(SUnit *SU, - Value2SUsMap &Val2SUsMap, - ValueType V) { - Value2SUsMap::iterator Itr = Val2SUsMap.find(V); - if (Itr != Val2SUsMap.end()) - addChainDependencies(SU, Itr->second, - Val2SUsMap.getTrueMemOrderLatency()); -} - -void ScheduleDAGInstrs::addBarrierChain(Value2SUsMap &map) { - assert (BarrierChain != nullptr); - - for (auto &I : map) { - SUList &sus = I.second; - for (auto *SU : sus) - SU->addPredBarrier(BarrierChain); - } - map.clear(); -} - -void ScheduleDAGInstrs::insertBarrierChain(Value2SUsMap &map) { - assert (BarrierChain != nullptr); - - // Go through all lists of SUs. - for (Value2SUsMap::iterator I = map.begin(), EE = map.end(); I != EE;) { - Value2SUsMap::iterator CurrItr = I++; - SUList &sus = CurrItr->second; - SUList::iterator SUItr = sus.begin(), SUEE = sus.end(); - for (; SUItr != SUEE; ++SUItr) { - // Stop on BarrierChain or any instruction above it. - if ((*SUItr)->NodeNum <= BarrierChain->NodeNum) - break; - - (*SUItr)->addPredBarrier(BarrierChain); - } - - // Remove also the BarrierChain from list if present. - if (*SUItr == BarrierChain) - SUItr++; - - // Remove all SUs that are now successors of BarrierChain. - if (SUItr != sus.begin()) - sus.erase(sus.begin(), SUItr); - } - - // Remove all entries with empty su lists. - map.remove_if([&](std::pair &mapEntry) { - return (mapEntry.second.empty()); }); - - // Recompute the size of the map (NumNodes). - map.reComputeSize(); -} - /// If RegPressure is non-null, compute register pressure as a side effect. The /// DAG builder is an efficient place to do it because it already visits /// operands. @@ -880,9 +843,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, const TargetSubtargetInfo &ST = MF.getSubtarget(); bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI : ST.useAA(); - AAForDep = UseAA ? AA : nullptr; - - BarrierChain = nullptr; + AliasAnalysis *AAForDep = UseAA ? AA : nullptr; this->TrackLaneMasks = TrackLaneMasks; MISUnitMap.clear(); @@ -894,30 +855,19 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, if (PDiffs) PDiffs->init(SUnits.size()); - // We build scheduling units by walking a block's instruction list - // from bottom to top. - - // Each MIs' memory operand(s) is analyzed to a list of underlying - // objects. The SU is then inserted in the SUList(s) mapped from - // that Value(s). Each Value thus gets mapped to a list of SUs - // depending on it, defs and uses kept separately. Two SUs are - // non-aliasing to each other if they depend on different Values - // exclusively. - Value2SUsMap Stores, Loads(1 /*TrueMemOrderLatency*/); - - // Certain memory accesses are known to not alias any SU in Stores - // or Loads, and have therefore their own 'NonAlias' - // domain. E.g. spill / reload instructions never alias LLVM I/R - // Values. It is assumed that this type of memory accesses always - // have a proper memory operand modelling, and are therefore never - // unanalyzable. This means they are non aliasing against all nodes - // in Stores and Loads, including the unanalyzable ones. - Value2SUsMap NonAliasStores, NonAliasLoads(1 /*TrueMemOrderLatency*/); - - // Always reduce a huge region with half of the elements, except - // when user sets this number explicitly. - if (ReductionSize.getNumOccurrences() == 0) - ReductionSize = (HugeRegion / 2); + // We build scheduling units by walking a block's instruction list from bottom + // to top. + + // Remember where a generic side-effecting instruction is as we proceed. + SUnit *BarrierChain = nullptr, *AliasChain = nullptr; + + // Memory references to specific known memory locations are tracked + // so that they can be given more precise dependencies. We track + // separately the known memory locations that may alias and those + // that are known not to alias + MapVector > AliasMemDefs, NonAliasMemDefs; + MapVector > AliasMemUses, NonAliasMemUses; + std::set RejectMemNodes; // Remove any stale debug info; sometimes BuildSchedGraph is called again // without emitting the info from the previous call. @@ -1012,123 +962,221 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, ExitSU.addPred(Dep); } - // Add memory dependencies (Note: isStoreToStackSlot and - // isLoadFromStackSLot are not usable after stack slots are lowered to - // actual addresses). - - // This is a barrier event that acts as a pivotal node in the DAG. + // Add chain dependencies. + // Chain dependencies used to enforce memory order should have + // latency of 0 (except for true dependency of Store followed by + // aliased Load... we estimate that with a single cycle of latency + // assuming the hardware will bypass) + // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable + // after stack slots are lowered to actual addresses. + // TODO: Use an AliasAnalysis and do real alias-analysis queries, and + // produce more precise dependence information. + unsigned TrueMemOrderLatency = MI->mayStore() ? 1 : 0; if (isGlobalMemoryObject(AA, MI)) { - - // Become the barrier chain. + // Be conservative with these and add dependencies on all memory + // references, even those that are known to not alias. + for (MapVector >::iterator I = + NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) { + for (unsigned i = 0, e = I->second.size(); i != e; ++i) { + I->second[i]->addPred(SDep(SU, SDep::Barrier)); + } + } + for (MapVector >::iterator I = + NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) { + for (unsigned i = 0, e = I->second.size(); i != e; ++i) { + SDep Dep(SU, SDep::Barrier); + Dep.setLatency(TrueMemOrderLatency); + I->second[i]->addPred(Dep); + } + } + // Add SU to the barrier chain. if (BarrierChain) - BarrierChain->addPredBarrier(SU); + BarrierChain->addPred(SDep(SU, SDep::Barrier)); BarrierChain = SU; + // This is a barrier event that acts as a pivotal node in the DAG, + // so it is safe to clear list of exposed nodes. + adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes, + TrueMemOrderLatency); + RejectMemNodes.clear(); + NonAliasMemDefs.clear(); + NonAliasMemUses.clear(); + + // fall-through + new_alias_chain: + // Chain all possibly aliasing memory references through SU. + if (AliasChain) { + unsigned ChainLatency = 0; + if (AliasChain->getInstr()->mayLoad()) + ChainLatency = TrueMemOrderLatency; + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain, + RejectMemNodes, ChainLatency); + } + AliasChain = SU; + for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, + PendingLoads[k], RejectMemNodes, + TrueMemOrderLatency); + for (MapVector >::iterator I = + AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) { + for (unsigned i = 0, e = I->second.size(); i != e; ++i) + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, + I->second[i], RejectMemNodes); + } + for (MapVector >::iterator I = + AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { + for (unsigned i = 0, e = I->second.size(); i != e; ++i) + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, + I->second[i], RejectMemNodes, TrueMemOrderLatency); + } + // This call must come after calls to addChainDependency() since it + // consumes the 'RejectMemNodes' list that addChainDependency() possibly + // adds to. + adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes, + TrueMemOrderLatency); + PendingLoads.clear(); + AliasMemDefs.clear(); + AliasMemUses.clear(); + } else if (MI->mayStore()) { + // Add dependence on barrier chain, if needed. + // There is no point to check aliasing on barrier event. Even if + // SU and barrier _could_ be reordered, they should not. In addition, + // we have lost all RejectMemNodes below barrier. + if (BarrierChain) + BarrierChain->addPred(SDep(SU, SDep::Barrier)); - DEBUG(dbgs() << "Global memory object and new barrier chain: SU(" - << BarrierChain->NodeNum << ").\n";); - - // Add dependencies against everything below it and clear maps. - addBarrierChain(Stores); - addBarrierChain(Loads); - addBarrierChain(NonAliasStores); - addBarrierChain(NonAliasLoads); - - continue; - } - - // If it's not a store or a variant load, we're done. - if (!MI->mayStore() && !(MI->mayLoad() && !MI->isInvariantLoad(AA))) - continue; - - // Always add dependecy edge to BarrierChain if present. - if (BarrierChain) - BarrierChain->addPredBarrier(SU); - - // Find the underlying objects for MI. The Objs vector is either - // empty, or filled with the Values of memory locations which this - // SU depends on. An empty vector means the memory location is - // unknown, and may alias anything except NonAlias nodes. - UnderlyingObjectsVector Objs; - getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout()); + UnderlyingObjectsVector Objs; + getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout()); - if (MI->mayStore()) { if (Objs.empty()) { - // An unknown store depends on all stores and loads. - addChainDependencies(SU, Stores); - addChainDependencies(SU, NonAliasStores); - addChainDependencies(SU, Loads); - addChainDependencies(SU, NonAliasLoads); - - // If we're not using AA, clear Stores map since all stores - // will be chained. - if (!AAForDep) - Stores.clear(); - - // Map this store to 'UnknownValue'. - Stores.insert(SU, UnknownValue); - continue; + // Treat all other stores conservatively. + goto new_alias_chain; } - // Add precise dependencies against all previously seen memory - // accesses mapped to the same Value(s). - for (auto &underlObj : Objs) { - ValueType V = underlObj.getPointer(); - bool ThisMayAlias = underlObj.getInt(); - - Value2SUsMap &stores_ = (ThisMayAlias ? Stores : NonAliasStores); - - // Add dependencies to previous stores and loads mapped to V. - addChainDependencies(SU, stores_, V); - addChainDependencies(SU, (ThisMayAlias ? Loads : NonAliasLoads), V); - - // If we're not using AA, then we only need one store per object. - if (!AAForDep) - stores_.clearList(V); - - // Map this store to V. - stores_.insert(SU, V); + bool MayAlias = false; + for (UnderlyingObjectsVector::iterator K = Objs.begin(), KE = Objs.end(); + K != KE; ++K) { + ValueType V = K->getPointer(); + bool ThisMayAlias = K->getInt(); + if (ThisMayAlias) + MayAlias = true; + + // A store to a specific PseudoSourceValue. Add precise dependencies. + // Record the def in MemDefs, first adding a dep if there is + // an existing def. + MapVector >::iterator I = + ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); + MapVector >::iterator IE = + ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); + if (I != IE) { + for (unsigned i = 0, e = I->second.size(); i != e; ++i) + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, + I->second[i], RejectMemNodes, 0, true); + + // If we're not using AA, then we only need one store per object. + if (!AAForDep) + I->second.clear(); + I->second.push_back(SU); + } else { + if (ThisMayAlias) { + if (!AAForDep) + AliasMemDefs[V].clear(); + AliasMemDefs[V].push_back(SU); + } else { + if (!AAForDep) + NonAliasMemDefs[V].clear(); + NonAliasMemDefs[V].push_back(SU); + } + } + // Handle the uses in MemUses, if there are any. + MapVector >::iterator J = + ((ThisMayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V)); + MapVector >::iterator JE = + ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); + if (J != JE) { + for (unsigned i = 0, e = J->second.size(); i != e; ++i) + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, + J->second[i], RejectMemNodes, + TrueMemOrderLatency, true); + J->second.clear(); + } } - // The store may have dependencies to unanalyzable loads and - // stores. - addChainDependencies(SU, Loads, UnknownValue); - addChainDependencies(SU, Stores, UnknownValue); - } - else { // SU is a load. - if (Objs.empty()) { - // An unknown load depends on all stores. - addChainDependencies(SU, Stores); - addChainDependencies(SU, NonAliasStores); - - Loads.insert(SU, UnknownValue); - continue; + if (MayAlias) { + // Add dependencies from all the PendingLoads, i.e. loads + // with no underlying object. + for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, + PendingLoads[k], RejectMemNodes, + TrueMemOrderLatency); + // Add dependence on alias chain, if needed. + if (AliasChain) + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain, + RejectMemNodes); } + // This call must come after calls to addChainDependency() since it + // consumes the 'RejectMemNodes' list that addChainDependency() possibly + // adds to. + adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes, + TrueMemOrderLatency); + } else if (MI->mayLoad()) { + bool MayAlias = true; + if (MI->isInvariantLoad(AA)) { + // Invariant load, no chain dependencies needed! + } else { + UnderlyingObjectsVector Objs; + getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout()); + + if (Objs.empty()) { + // A load with no underlying object. Depend on all + // potentially aliasing stores. + for (MapVector >::iterator I = + AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) + for (unsigned i = 0, e = I->second.size(); i != e; ++i) + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, + I->second[i], RejectMemNodes); + + PendingLoads.push_back(SU); + MayAlias = true; + } else { + MayAlias = false; + } - for (auto &underlObj : Objs) { - ValueType V = underlObj.getPointer(); - bool ThisMayAlias = underlObj.getInt(); - - // Add precise dependencies against all previously seen stores - // mapping to the same Value(s). - addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V); - - // Map this load to V. - (ThisMayAlias ? Loads : NonAliasLoads).insert(SU, V); + for (UnderlyingObjectsVector::iterator + J = Objs.begin(), JE = Objs.end(); J != JE; ++J) { + ValueType V = J->getPointer(); + bool ThisMayAlias = J->getInt(); + + if (ThisMayAlias) + MayAlias = true; + + // A load from a specific PseudoSourceValue. Add precise dependencies. + MapVector >::iterator I = + ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); + MapVector >::iterator IE = + ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); + if (I != IE) + for (unsigned i = 0, e = I->second.size(); i != e; ++i) + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, + I->second[i], RejectMemNodes, 0, true); + if (ThisMayAlias) + AliasMemUses[V].push_back(SU); + else + NonAliasMemUses[V].push_back(SU); + } + // Add dependencies on alias and barrier chains, if needed. + if (MayAlias && AliasChain) + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain, + RejectMemNodes); + if (MayAlias) + // This call must come after calls to addChainDependency() since it + // consumes the 'RejectMemNodes' list that addChainDependency() + // possibly adds to. + adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, + RejectMemNodes, /*Latency=*/0); + if (BarrierChain) + BarrierChain->addPred(SDep(SU, SDep::Barrier)); } - // The load may have dependencies to unanalyzable stores. - addChainDependencies(SU, Stores, UnknownValue); - } - - // Reduce maps if they grow huge. - if (Stores.size() + Loads.size() >= HugeRegion) { - DEBUG(dbgs() << "Reducing Stores and Loads maps.\n";); - reduceHugeMemNodeMaps(Stores, Loads, ReductionSize); - } - if (NonAliasStores.size() + NonAliasLoads.size() >= HugeRegion) { - DEBUG(dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n";); - reduceHugeMemNodeMaps(NonAliasStores, NonAliasLoads, ReductionSize); } } - if (DbgMI) FirstDbgValue = DbgMI; @@ -1136,84 +1184,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, Uses.clear(); CurrentVRegDefs.clear(); CurrentVRegUses.clear(); -} - -raw_ostream &llvm::operator<<(raw_ostream &OS, const PseudoSourceValue* PSV) { - PSV->printCustom(OS); - return OS; -} - -void ScheduleDAGInstrs::Value2SUsMap::dump() { - for (auto &Itr : *this) { - if (Itr.first.is()) { - const Value *V = Itr.first.get(); - if (isa(V)) - dbgs() << "Unknown"; - else - V->printAsOperand(dbgs()); - } - else if (Itr.first.is()) - dbgs() << Itr.first.get(); - else - llvm_unreachable("Unknown Value type."); - - dbgs() << " : "; - dumpSUList(Itr.second); - } -} - -/// Reduce maps in FIFO order, by N SUs. This is better than turning -/// every Nth memory SU into BarrierChain in buildSchedGraph(), since -/// it avoids unnecessary edges between seen SUs above the new -/// BarrierChain, and those below it. -void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores, - Value2SUsMap &loads, unsigned N) { - DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n"; - stores.dump(); - dbgs() << "Loading SUnits:\n"; - loads.dump()); - - // Insert all SU's NodeNums into a vector and sort it. - std::vector NodeNums; - NodeNums.reserve(stores.size() + loads.size()); - for (auto &I : stores) - for (auto *SU : I.second) - NodeNums.push_back(SU->NodeNum); - for (auto &I : loads) - for (auto *SU : I.second) - NodeNums.push_back(SU->NodeNum); - std::sort(NodeNums.begin(), NodeNums.end()); - - // The N last elements in NodeNums will be removed, and the SU with - // the lowest NodeNum of them will become the new BarrierChain to - // let the not yet seen SUs have a dependency to the removed SUs. - assert (N <= NodeNums.size()); - SUnit *newBarrierChain = &SUnits[*(NodeNums.end() - N)]; - if (BarrierChain) { - // The aliasing and non-aliasing maps reduce independently of each - // other, but share a common BarrierChain. Check if the - // newBarrierChain is above the former one. If it is not, it may - // introduce a loop to use newBarrierChain, so keep the old one. - if (newBarrierChain->NodeNum < BarrierChain->NodeNum) { - BarrierChain->addPredBarrier(newBarrierChain); - BarrierChain = newBarrierChain; - DEBUG(dbgs() << "Inserting new barrier chain: SU(" - << BarrierChain->NodeNum << ").\n";); - } - else - DEBUG(dbgs() << "Keeping old barrier chain: SU(" - << BarrierChain->NodeNum << ").\n";); - } - else - BarrierChain = newBarrierChain; - - insertBarrierChain(stores); - insertBarrierChain(loads); - - DEBUG(dbgs() << "After reduction:\nStoring SUnits:\n"; - stores.dump(); - dbgs() << "Loading SUnits:\n"; - loads.dump()); + PendingLoads.clear(); } /// \brief Initialize register live-range state for updating kills. -- 2.11.0