From 2e7be612d5d0eb42ee3ae08194dbb03b750cc6bf Mon Sep 17 00:00:00 2001 From: David Goodwin Date: Mon, 26 Oct 2009 16:59:04 +0000 Subject: [PATCH] Break anti-dependence breaking out into its own class. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@85127 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/AntiDepBreaker.h | 56 +++ lib/CodeGen/CMakeLists.txt | 1 + lib/CodeGen/CriticalAntiDepBreaker.cpp | 539 +++++++++++++++++++++++++ lib/CodeGen/CriticalAntiDepBreaker.h | 95 +++++ lib/CodeGen/PostRASchedulerList.cpp | 585 ++-------------------------- lib/Target/ARM/ARMSubtarget.h | 2 +- test/CodeGen/X86/break-anti-dependencies.ll | 4 +- 7 files changed, 737 insertions(+), 545 deletions(-) create mode 100644 include/llvm/CodeGen/AntiDepBreaker.h create mode 100644 lib/CodeGen/CriticalAntiDepBreaker.cpp create mode 100644 lib/CodeGen/CriticalAntiDepBreaker.h diff --git a/include/llvm/CodeGen/AntiDepBreaker.h b/include/llvm/CodeGen/AntiDepBreaker.h new file mode 100644 index 00000000000..31fdb24ec44 --- /dev/null +++ b/include/llvm/CodeGen/AntiDepBreaker.h @@ -0,0 +1,56 @@ +//=- llvm/CodeGen/AntiDepBreaker.h - Anti-Dependence Breaking -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AntiDepBreaker class, which implements +// anti-dependence breaking heuristics for post-register-allocation scheduling. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_ANTIDEPBREAKER_H +#define LLVM_CODEGEN_ANTIDEPBREAKER_H + +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Target/TargetRegisterInfo.h" + +namespace llvm { + +/// AntiDepBreaker - This class works into conjunction with the +/// post-RA scheduler to rename registers to break register +/// anti-dependencies. +class AntiDepBreaker { +public: + /// Start - Initialize anti-dep breaking for a new basic block. + virtual void StartBlock(MachineBasicBlock *BB) =0; + + /// BreakAntiDependencies - Identifiy anti-dependencies within a + /// basic-block region and break them by renaming registers. Return + /// the number of anti-dependencies broken. + /// + virtual unsigned BreakAntiDependencies(std::vector& SUnits, + MachineBasicBlock::iterator& Begin, + MachineBasicBlock::iterator& End, + unsigned InsertPosIndex) =0; + + /// Observe - Update liveness information to account for the current + /// instruction, which will not be scheduled. + /// + virtual void Observe(MachineInstr *MI, unsigned Count, + unsigned InsertPosIndex) =0; + + /// Finish - Finish anti-dep breaking for a basic block. + virtual void FinishBlock() =0; +}; + +} + +#endif diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 713c30c7d4a..14b215ec07a 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -1,6 +1,7 @@ add_llvm_library(LLVMCodeGen BranchFolding.cpp CodePlacementOpt.cpp + CriticalAntiDepBreaker.cpp DeadMachineInstructionElim.cpp DwarfEHPrepare.cpp ELFCodeEmitter.cpp diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp new file mode 100644 index 00000000000..ceb48d78457 --- /dev/null +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -0,0 +1,539 @@ +//===----- CriticalAntiDepBreaker.cpp - Anti-dep breaker -------- ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CriticalAntiDepBreaker class, which +// implements register anti-dependence breaking along a blocks +// critical path during post-RA scheduler. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "critical-antidep" +#include "CriticalAntiDepBreaker.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +CriticalAntiDepBreaker:: +CriticalAntiDepBreaker(MachineFunction& MFi) : + AntiDepBreaker(), MF(MFi), + MRI(MF.getRegInfo()), + TRI(MF.getTarget().getRegisterInfo()), + AllocatableSet(TRI->getAllocatableSet(MF)) +{ +} + +CriticalAntiDepBreaker::~CriticalAntiDepBreaker() { +} + +void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { + // Clear out the register class data. + std::fill(Classes, array_endof(Classes), + static_cast(0)); + + // Initialize the indices to indicate that no registers are live. + std::fill(KillIndices, array_endof(KillIndices), ~0u); + std::fill(DefIndices, array_endof(DefIndices), BB->size()); + + // Clear "do not change" set. + KeepRegs.clear(); + + bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn()); + + // Determine the live-out physregs for this block. + if (IsReturnBlock) { + // In a return block, examine the function live-out regs. + for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), + E = MRI.liveout_end(); I != E; ++I) { + unsigned Reg = *I; + Classes[Reg] = reinterpret_cast(-1); + KillIndices[Reg] = BB->size(); + DefIndices[Reg] = ~0u; + // Repeat, for all aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + Classes[AliasReg] = reinterpret_cast(-1); + KillIndices[AliasReg] = BB->size(); + DefIndices[AliasReg] = ~0u; + } + } + } else { + // In a non-return block, examine the live-in regs of all successors. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) + for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), + E = (*SI)->livein_end(); I != E; ++I) { + unsigned Reg = *I; + Classes[Reg] = reinterpret_cast(-1); + KillIndices[Reg] = BB->size(); + DefIndices[Reg] = ~0u; + // Repeat, for all aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + Classes[AliasReg] = reinterpret_cast(-1); + KillIndices[AliasReg] = BB->size(); + DefIndices[AliasReg] = ~0u; + } + } + } + + // Mark live-out callee-saved registers. In a return block this is + // all callee-saved registers. In non-return this is any + // callee-saved register that is not saved in the prolog. + const MachineFrameInfo *MFI = MF.getFrameInfo(); + BitVector Pristine = MFI->getPristineRegs(BB); + for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) { + unsigned Reg = *I; + if (!IsReturnBlock && !Pristine.test(Reg)) continue; + Classes[Reg] = reinterpret_cast(-1); + KillIndices[Reg] = BB->size(); + DefIndices[Reg] = ~0u; + // Repeat, for all aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + Classes[AliasReg] = reinterpret_cast(-1); + KillIndices[AliasReg] = BB->size(); + DefIndices[AliasReg] = ~0u; + } + } +} + +void CriticalAntiDepBreaker::FinishBlock() { + RegRefs.clear(); + KeepRegs.clear(); +} + +void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, + unsigned InsertPosIndex) { + assert(Count < InsertPosIndex && "Instruction index out of expected range!"); + + // Any register which was defined within the previous scheduling region + // may have been rescheduled and its lifetime may overlap with registers + // in ways not reflected in our current liveness state. For each such + // register, adjust the liveness state to be conservatively correct. + for (unsigned Reg = 0; Reg != TargetRegisterInfo::FirstVirtualRegister; ++Reg) + if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) { + assert(KillIndices[Reg] == ~0u && "Clobbered register is live!"); + // Mark this register to be non-renamable. + Classes[Reg] = reinterpret_cast(-1); + // Move the def index to the end of the previous region, to reflect + // that the def could theoretically have been scheduled at the end. + DefIndices[Reg] = InsertPosIndex; + } + + PrescanInstruction(MI); + ScanInstruction(MI, Count); +} + +/// CriticalPathStep - Return the next SUnit after SU on the bottom-up +/// critical path. +static SDep *CriticalPathStep(SUnit *SU) { + SDep *Next = 0; + unsigned NextDepth = 0; + // Find the predecessor edge with the greatest depth. + for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); + P != PE; ++P) { + SUnit *PredSU = P->getSUnit(); + unsigned PredLatency = P->getLatency(); + unsigned PredTotalLatency = PredSU->getDepth() + PredLatency; + // In the case of a latency tie, prefer an anti-dependency edge over + // other types of edges. + if (NextDepth < PredTotalLatency || + (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) { + NextDepth = PredTotalLatency; + Next = &*P; + } + } + return Next; +} + +void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { + // Scan the register operands for this instruction and update + // Classes and RegRefs. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + const TargetRegisterClass *NewRC = 0; + + if (i < MI->getDesc().getNumOperands()) + NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI); + + // For now, only allow the register to be changed if its register + // class is consistent across all uses. + if (!Classes[Reg] && NewRC) + Classes[Reg] = NewRC; + else if (!NewRC || Classes[Reg] != NewRC) + Classes[Reg] = reinterpret_cast(-1); + + // Now check for aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + // If an alias of the reg is used during the live range, give up. + // Note that this allows us to skip checking if AntiDepReg + // overlaps with any of the aliases, among other things. + unsigned AliasReg = *Alias; + if (Classes[AliasReg]) { + Classes[AliasReg] = reinterpret_cast(-1); + Classes[Reg] = reinterpret_cast(-1); + } + } + + // If we're still willing to consider this register, note the reference. + if (Classes[Reg] != reinterpret_cast(-1)) + RegRefs.insert(std::make_pair(Reg, &MO)); + + // It's not safe to change register allocation for source operands of + // that have special allocation requirements. + if (MO.isUse() && MI->getDesc().hasExtraSrcRegAllocReq()) { + if (KeepRegs.insert(Reg)) { + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) + KeepRegs.insert(*Subreg); + } + } + } +} + +void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, + unsigned Count) { + // Update liveness. + // Proceding upwards, registers that are defed but not used in this + // instruction are now dead. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + if (!MO.isDef()) continue; + // Ignore two-addr defs. + if (MI->isRegTiedToUseOperand(i)) continue; + + DefIndices[Reg] = Count; + KillIndices[Reg] = ~0u; + assert(((KillIndices[Reg] == ~0u) != + (DefIndices[Reg] == ~0u)) && + "Kill and Def maps aren't consistent for Reg!"); + KeepRegs.erase(Reg); + Classes[Reg] = 0; + RegRefs.erase(Reg); + // Repeat, for all subregs. + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) { + unsigned SubregReg = *Subreg; + DefIndices[SubregReg] = Count; + KillIndices[SubregReg] = ~0u; + KeepRegs.erase(SubregReg); + Classes[SubregReg] = 0; + RegRefs.erase(SubregReg); + } + // Conservatively mark super-registers as unusable. + for (const unsigned *Super = TRI->getSuperRegisters(Reg); + *Super; ++Super) { + unsigned SuperReg = *Super; + Classes[SuperReg] = reinterpret_cast(-1); + } + } + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + if (!MO.isUse()) continue; + + const TargetRegisterClass *NewRC = 0; + if (i < MI->getDesc().getNumOperands()) + NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI); + + // For now, only allow the register to be changed if its register + // class is consistent across all uses. + if (!Classes[Reg] && NewRC) + Classes[Reg] = NewRC; + else if (!NewRC || Classes[Reg] != NewRC) + Classes[Reg] = reinterpret_cast(-1); + + RegRefs.insert(std::make_pair(Reg, &MO)); + + // It wasn't previously live but now it is, this is a kill. + if (KillIndices[Reg] == ~0u) { + KillIndices[Reg] = Count; + DefIndices[Reg] = ~0u; + assert(((KillIndices[Reg] == ~0u) != + (DefIndices[Reg] == ~0u)) && + "Kill and Def maps aren't consistent for Reg!"); + } + // Repeat, for all aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + if (KillIndices[AliasReg] == ~0u) { + KillIndices[AliasReg] = Count; + DefIndices[AliasReg] = ~0u; + } + } + } +} + +unsigned +CriticalAntiDepBreaker::findSuitableFreeRegister(unsigned AntiDepReg, + unsigned LastNewReg, + const TargetRegisterClass *RC) { + for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF), + RE = RC->allocation_order_end(MF); R != RE; ++R) { + unsigned NewReg = *R; + // Don't replace a register with itself. + if (NewReg == AntiDepReg) continue; + // Don't replace a register with one that was recently used to repair + // an anti-dependence with this AntiDepReg, because that would + // re-introduce that anti-dependence. + if (NewReg == LastNewReg) continue; + // If NewReg is dead and NewReg's most recent def is not before + // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg. + assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) && + "Kill and Def maps aren't consistent for AntiDepReg!"); + assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) && + "Kill and Def maps aren't consistent for NewReg!"); + if (KillIndices[NewReg] != ~0u || + Classes[NewReg] == reinterpret_cast(-1) || + KillIndices[AntiDepReg] > DefIndices[NewReg]) + continue; + return NewReg; + } + + // No registers are free and available! + return 0; +} + +unsigned CriticalAntiDepBreaker:: +BreakAntiDependencies(std::vector& SUnits, + MachineBasicBlock::iterator& Begin, + MachineBasicBlock::iterator& End, + unsigned InsertPosIndex) { + // The code below assumes that there is at least one instruction, + // so just duck out immediately if the block is empty. + if (SUnits.empty()) return 0; + + // Find the node at the bottom of the critical path. + SUnit *Max = 0; + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + SUnit *SU = &SUnits[i]; + if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency) + Max = SU; + } + +#ifndef NDEBUG + { + DEBUG(errs() << "Critical path has total latency " + << (Max->getDepth() + Max->Latency) << "\n"); + DEBUG(errs() << "Available regs:"); + for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) { + if (KillIndices[Reg] == ~0u) + DEBUG(errs() << " " << TRI->getName(Reg)); + } + DEBUG(errs() << '\n'); + } +#endif + + // Track progress along the critical path through the SUnit graph as we walk + // the instructions. + SUnit *CriticalPathSU = Max; + MachineInstr *CriticalPathMI = CriticalPathSU->getInstr(); + + // Consider this pattern: + // A = ... + // ... = A + // A = ... + // ... = A + // A = ... + // ... = A + // A = ... + // ... = A + // There are three anti-dependencies here, and without special care, + // we'd break all of them using the same register: + // A = ... + // ... = A + // B = ... + // ... = B + // B = ... + // ... = B + // B = ... + // ... = B + // because at each anti-dependence, B is the first register that + // isn't A which is free. This re-introduces anti-dependencies + // at all but one of the original anti-dependencies that we were + // trying to break. To avoid this, keep track of the most recent + // register that each register was replaced with, avoid + // using it to repair an anti-dependence on the same register. + // This lets us produce this: + // A = ... + // ... = A + // B = ... + // ... = B + // C = ... + // ... = C + // B = ... + // ... = B + // This still has an anti-dependence on B, but at least it isn't on the + // original critical path. + // + // TODO: If we tracked more than one register here, we could potentially + // fix that remaining critical edge too. This is a little more involved, + // because unlike the most recent register, less recent registers should + // still be considered, though only if no other registers are available. + unsigned LastNewReg[TargetRegisterInfo::FirstVirtualRegister] = {}; + + // Attempt to break anti-dependence edges on the critical path. Walk the + // instructions from the bottom up, tracking information about liveness + // as we go to help determine which registers are available. + unsigned Broken = 0; + unsigned Count = InsertPosIndex - 1; + for (MachineBasicBlock::iterator I = End, E = Begin; + I != E; --Count) { + MachineInstr *MI = --I; + + // Check if this instruction has a dependence on the critical path that + // is an anti-dependence that we may be able to break. If it is, set + // AntiDepReg to the non-zero register associated with the anti-dependence. + // + // We limit our attention to the critical path as a heuristic to avoid + // breaking anti-dependence edges that aren't going to significantly + // impact the overall schedule. There are a limited number of registers + // and we want to save them for the important edges. + // + // TODO: Instructions with multiple defs could have multiple + // anti-dependencies. The current code here only knows how to break one + // edge per instruction. Note that we'd have to be able to break all of + // the anti-dependencies in an instruction in order to be effective. + unsigned AntiDepReg = 0; + if (MI == CriticalPathMI) { + if (SDep *Edge = CriticalPathStep(CriticalPathSU)) { + SUnit *NextSU = Edge->getSUnit(); + + // Only consider anti-dependence edges. + if (Edge->getKind() == SDep::Anti) { + AntiDepReg = Edge->getReg(); + assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); + if (!AllocatableSet.test(AntiDepReg)) + // Don't break anti-dependencies on non-allocatable registers. + AntiDepReg = 0; + else if (KeepRegs.count(AntiDepReg)) + // Don't break anti-dependencies if an use down below requires + // this exact register. + AntiDepReg = 0; + else { + // If the SUnit has other dependencies on the SUnit that it + // anti-depends on, don't bother breaking the anti-dependency + // since those edges would prevent such units from being + // scheduled past each other regardless. + // + // Also, if there are dependencies on other SUnits with the + // same register as the anti-dependency, don't attempt to + // break it. + for (SUnit::pred_iterator P = CriticalPathSU->Preds.begin(), + PE = CriticalPathSU->Preds.end(); P != PE; ++P) + if (P->getSUnit() == NextSU ? + (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) : + (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) { + AntiDepReg = 0; + break; + } + } + } + CriticalPathSU = NextSU; + CriticalPathMI = CriticalPathSU->getInstr(); + } else { + // We've reached the end of the critical path. + CriticalPathSU = 0; + CriticalPathMI = 0; + } + } + + PrescanInstruction(MI); + + if (MI->getDesc().hasExtraDefRegAllocReq()) + // If this instruction's defs have special allocation requirement, don't + // break this anti-dependency. + AntiDepReg = 0; + else if (AntiDepReg) { + // If this instruction has a use of AntiDepReg, breaking it + // is invalid. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + if (MO.isUse() && AntiDepReg == Reg) { + AntiDepReg = 0; + break; + } + } + } + + // Determine AntiDepReg's register class, if it is live and is + // consistently used within a single class. + const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0; + assert((AntiDepReg == 0 || RC != NULL) && + "Register should be live if it's causing an anti-dependence!"); + if (RC == reinterpret_cast(-1)) + AntiDepReg = 0; + + // Look for a suitable register to use to break the anti-depenence. + // + // TODO: Instead of picking the first free register, consider which might + // be the best. + if (AntiDepReg != 0) { + if (unsigned NewReg = findSuitableFreeRegister(AntiDepReg, + LastNewReg[AntiDepReg], + RC)) { + DEBUG(errs() << "Breaking anti-dependence edge on " + << TRI->getName(AntiDepReg) + << " with " << RegRefs.count(AntiDepReg) << " references" + << " using " << TRI->getName(NewReg) << "!\n"); + + // Update the references to the old register to refer to the new + // register. + std::pair::iterator, + std::multimap::iterator> + Range = RegRefs.equal_range(AntiDepReg); + for (std::multimap::iterator + Q = Range.first, QE = Range.second; Q != QE; ++Q) + Q->second->setReg(NewReg); + + // We just went back in time and modified history; the + // liveness information for the anti-depenence reg is now + // inconsistent. Set the state as if it were dead. + Classes[NewReg] = Classes[AntiDepReg]; + DefIndices[NewReg] = DefIndices[AntiDepReg]; + KillIndices[NewReg] = KillIndices[AntiDepReg]; + assert(((KillIndices[NewReg] == ~0u) != + (DefIndices[NewReg] == ~0u)) && + "Kill and Def maps aren't consistent for NewReg!"); + + Classes[AntiDepReg] = 0; + DefIndices[AntiDepReg] = KillIndices[AntiDepReg]; + KillIndices[AntiDepReg] = ~0u; + assert(((KillIndices[AntiDepReg] == ~0u) != + (DefIndices[AntiDepReg] == ~0u)) && + "Kill and Def maps aren't consistent for AntiDepReg!"); + + RegRefs.erase(AntiDepReg); + LastNewReg[AntiDepReg] = NewReg; + ++Broken; + } + } + + ScanInstruction(MI, Count); + } + + return Broken; +} diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h new file mode 100644 index 00000000000..830b29a060d --- /dev/null +++ b/lib/CodeGen/CriticalAntiDepBreaker.h @@ -0,0 +1,95 @@ +//=- llvm/CodeGen/CriticalAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CriticalAntiDepBreaker class, which +// implements register anti-dependence breaking along a blocks +// critical path during post-RA scheduler. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H +#define LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H + +#include "llvm/CodeGen/AntiDepBreaker.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallSet.h" + +namespace llvm { + class CriticalAntiDepBreaker : public AntiDepBreaker { + MachineFunction& MF; + MachineRegisterInfo &MRI; + const TargetRegisterInfo *TRI; + + /// AllocatableSet - The set of allocatable registers. + /// We'll be ignoring anti-dependencies on non-allocatable registers, + /// because they may not be safe to break. + const BitVector AllocatableSet; + + /// Classes - For live regs that are only used in one register class in a + /// live range, the register class. If the register is not live, the + /// corresponding value is null. If the register is live but used in + /// multiple register classes, the corresponding value is -1 casted to a + /// pointer. + const TargetRegisterClass * + Classes[TargetRegisterInfo::FirstVirtualRegister]; + + /// RegRegs - Map registers to all their references within a live range. + std::multimap RegRefs; + + /// KillIndices - The index of the most recent kill (proceding bottom-up), + /// or ~0u if the register is not live. + unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister]; + + /// DefIndices - The index of the most recent complete def (proceding bottom + /// up), or ~0u if the register is live. + unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister]; + + /// KeepRegs - A set of registers which are live and cannot be changed to + /// break anti-dependencies. + SmallSet KeepRegs; + + public: + CriticalAntiDepBreaker(MachineFunction& MFi); + ~CriticalAntiDepBreaker(); + + /// Start - Initialize anti-dep breaking for a new basic block. + void StartBlock(MachineBasicBlock *BB); + + /// BreakAntiDependencies - Identifiy anti-dependencies along the critical path + /// of the ScheduleDAG and break them by renaming registers. + /// + unsigned BreakAntiDependencies(std::vector& SUnits, + MachineBasicBlock::iterator& Begin, + MachineBasicBlock::iterator& End, + unsigned InsertPosIndex); + + /// Observe - Update liveness information to account for the current + /// instruction, which will not be scheduled. + /// + void Observe(MachineInstr *MI, unsigned Count, unsigned InsertPosIndex); + + /// Finish - Finish anti-dep breaking for a basic block. + void FinishBlock(); + + private: + void PrescanInstruction(MachineInstr *MI); + void ScanInstruction(MachineInstr *MI, unsigned Count); + unsigned findSuitableFreeRegister(unsigned AntiDepReg, + unsigned LastNewReg, + const TargetRegisterClass *); + }; +} + +#endif diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 106de5c0646..ea66f02a4b4 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -19,6 +19,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "post-RA-sched" +#include "CriticalAntiDepBreaker.h" #include "ExactHazardRecognizer.h" #include "SimpleHazardRecognizer.h" #include "ScheduleDAGInstrs.h" @@ -40,6 +41,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/Statistic.h" #include #include @@ -47,6 +49,7 @@ using namespace llvm; STATISTIC(NumNoops, "Number of noops inserted"); STATISTIC(NumStalls, "Number of pipeline stalls"); +STATISTIC(NumFixedAnti, "Number of fixed anti-dependencies"); // Post-RA scheduling is enabled with // TargetSubtarget.enablePostRAScheduler(). This flag can be used to @@ -55,10 +58,11 @@ static cl::opt EnablePostRAScheduler("post-RA-scheduler", cl::desc("Enable scheduling after register allocation"), cl::init(false), cl::Hidden); -static cl::opt +static cl::opt EnableAntiDepBreaking("break-anti-dependencies", - cl::desc("Break post-RA scheduling anti-dependencies"), - cl::init(true), cl::Hidden); + cl::desc("Break post-RA scheduling anti-dependencies: " + "\"critical\", \"all\", or \"none\""), + cl::init("none"), cl::Hidden); static cl::opt EnablePostRAHazardAvoidance("avoid-hazards", cl::desc("Enable exact hazard avoidance"), @@ -116,56 +120,30 @@ namespace { /// Topo - A topological ordering for SUnits. ScheduleDAGTopologicalSort Topo; - /// AllocatableSet - The set of allocatable registers. - /// We'll be ignoring anti-dependencies on non-allocatable registers, - /// because they may not be safe to break. - const BitVector AllocatableSet; - /// HazardRec - The hazard recognizer to use. ScheduleHazardRecognizer *HazardRec; + /// AntiDepBreak - Anti-dependence breaking object, or NULL if none + AntiDepBreaker *AntiDepBreak; + /// AA - AliasAnalysis for making memory reference queries. AliasAnalysis *AA; - /// AntiDepMode - Anti-dependence breaking mode - TargetSubtarget::AntiDepBreakMode AntiDepMode; - - /// Classes - For live regs that are only used in one register class in a - /// live range, the register class. If the register is not live, the - /// corresponding value is null. If the register is live but used in - /// multiple register classes, the corresponding value is -1 casted to a - /// pointer. - const TargetRegisterClass * - Classes[TargetRegisterInfo::FirstVirtualRegister]; - - /// RegRegs - Map registers to all their references within a live range. - std::multimap RegRefs; - /// KillIndices - The index of the most recent kill (proceding bottom-up), /// or ~0u if the register is not live. unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister]; - /// DefIndices - The index of the most recent complete def (proceding bottom - /// up), or ~0u if the register is live. - unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister]; - - /// KeepRegs - A set of registers which are live and cannot be changed to - /// break anti-dependencies. - SmallSet KeepRegs; - public: SchedulePostRATDList(MachineFunction &MF, const MachineLoopInfo &MLI, const MachineDominatorTree &MDT, ScheduleHazardRecognizer *HR, - AliasAnalysis *aa, - TargetSubtarget::AntiDepBreakMode adm) + AntiDepBreaker *ADB, + AliasAnalysis *aa) : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), - AllocatableSet(TRI->getAllocatableSet(MF)), - HazardRec(HR), AA(aa), AntiDepMode(adm) {} + HazardRec(HR), AntiDepBreak(ADB), AA(aa) {} ~SchedulePostRATDList() { - delete HazardRec; } /// StartBlock - Initialize register live-range state for scheduling in @@ -177,11 +155,6 @@ namespace { /// void Schedule(); - /// FixupKills - Fix register kill flags that have been made - /// invalid due to scheduling - /// - void FixupKills(MachineBasicBlock *MBB); - /// Observe - Update liveness information to account for the current /// instruction, which will not be scheduled. /// @@ -191,17 +164,16 @@ namespace { /// void FinishBlock(); + /// FixupKills - Fix register kill flags that have been made + /// invalid due to scheduling + /// + void FixupKills(MachineBasicBlock *MBB); + private: - void PrescanInstruction(MachineInstr *MI); - void ScanInstruction(MachineInstr *MI, unsigned Count); void ReleaseSucc(SUnit *SU, SDep *SuccEdge); void ReleaseSuccessors(SUnit *SU); void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); void ListScheduleTopDown(); - bool BreakAntiDependencies(); - unsigned findSuitableFreeRegister(unsigned AntiDepReg, - unsigned LastNewReg, - const TargetRegisterClass *); void StartBlockForKills(MachineBasicBlock *BB); // ToggleKillFlag - Toggle a register operand kill flag. Other @@ -250,8 +222,9 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { // Check for antidep breaking override... if (EnableAntiDepBreaking.getPosition() > 0) { - AntiDepMode = (EnableAntiDepBreaking) ? - TargetSubtarget::ANTIDEP_CRITICAL : TargetSubtarget::ANTIDEP_NONE; + AntiDepMode = (EnableAntiDepBreaking == "all") ? TargetSubtarget::ANTIDEP_ALL : + (EnableAntiDepBreaking == "critical") ? TargetSubtarget::ANTIDEP_CRITICAL : + TargetSubtarget::ANTIDEP_NONE; } DEBUG(errs() << "PostRAScheduler\n"); @@ -262,8 +235,12 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { ScheduleHazardRecognizer *HR = EnablePostRAHazardAvoidance ? (ScheduleHazardRecognizer *)new ExactHazardRecognizer(InstrItins) : (ScheduleHazardRecognizer *)new SimpleHazardRecognizer(); + AntiDepBreaker *ADB = + (AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ? NULL /* FIXME */ : + (AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ? + new CriticalAntiDepBreaker(Fn) : NULL; - SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR, AA, AntiDepMode); + SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR, ADB, AA); // Loop over all of the basic blocks for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); @@ -311,6 +288,9 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { Scheduler.FixupKills(MBB); } + delete HR; + delete ADB; + return true; } @@ -321,78 +301,10 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) { // Call the superclass. ScheduleDAGInstrs::StartBlock(BB); - // Reset the hazard recognizer. + // Reset the hazard recognizer and anti-dep breaker. HazardRec->Reset(); - - // Clear out the register class data. - std::fill(Classes, array_endof(Classes), - static_cast(0)); - - // Initialize the indices to indicate that no registers are live. - std::fill(KillIndices, array_endof(KillIndices), ~0u); - std::fill(DefIndices, array_endof(DefIndices), BB->size()); - - // Clear "do not change" set. - KeepRegs.clear(); - - bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn()); - - // Determine the live-out physregs for this block. - if (IsReturnBlock) { - // In a return block, examine the function live-out regs. - for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), - E = MRI.liveout_end(); I != E; ++I) { - unsigned Reg = *I; - Classes[Reg] = reinterpret_cast(-1); - KillIndices[Reg] = BB->size(); - DefIndices[Reg] = ~0u; - // Repeat, for all aliases. - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - unsigned AliasReg = *Alias; - Classes[AliasReg] = reinterpret_cast(-1); - KillIndices[AliasReg] = BB->size(); - DefIndices[AliasReg] = ~0u; - } - } - } else { - // In a non-return block, examine the live-in regs of all successors. - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), - SE = BB->succ_end(); SI != SE; ++SI) - for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), - E = (*SI)->livein_end(); I != E; ++I) { - unsigned Reg = *I; - Classes[Reg] = reinterpret_cast(-1); - KillIndices[Reg] = BB->size(); - DefIndices[Reg] = ~0u; - // Repeat, for all aliases. - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - unsigned AliasReg = *Alias; - Classes[AliasReg] = reinterpret_cast(-1); - KillIndices[AliasReg] = BB->size(); - DefIndices[AliasReg] = ~0u; - } - } - } - - // Mark live-out callee-saved registers. In a return block this is - // all callee-saved registers. In non-return this is any - // callee-saved register that is not saved in the prolog. - const MachineFrameInfo *MFI = MF.getFrameInfo(); - BitVector Pristine = MFI->getPristineRegs(BB); - for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) { - unsigned Reg = *I; - if (!IsReturnBlock && !Pristine.test(Reg)) continue; - Classes[Reg] = reinterpret_cast(-1); - KillIndices[Reg] = BB->size(); - DefIndices[Reg] = ~0u; - // Repeat, for all aliases. - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - unsigned AliasReg = *Alias; - Classes[AliasReg] = reinterpret_cast(-1); - KillIndices[AliasReg] = BB->size(); - DefIndices[AliasReg] = ~0u; - } - } + if (AntiDepBreak != NULL) + AntiDepBreak->StartBlock(BB); } /// Schedule - Schedule the instruction range using list scheduling. @@ -403,8 +315,11 @@ void SchedulePostRATDList::Schedule() { // Build the scheduling graph. BuildSchedGraph(AA); - if (AntiDepMode != TargetSubtarget::ANTIDEP_NONE) { - if (BreakAntiDependencies()) { + if (AntiDepBreak != NULL) { + unsigned Broken = + AntiDepBreak->BreakAntiDependencies(SUnits, Begin, InsertPos, + InsertPosIndex); + if (Broken > 0) { // We made changes. Update the dependency graph. // Theoretically we could update the graph in place: // When a live range is changed to use a different register, remove @@ -415,6 +330,8 @@ void SchedulePostRATDList::Schedule() { EntrySU = SUnit(); ExitSU = SUnit(); BuildSchedGraph(AA); + + NumFixedAnti += Broken; } } @@ -432,436 +349,20 @@ void SchedulePostRATDList::Schedule() { /// instruction, which will not be scheduled. /// void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) { - assert(Count < InsertPosIndex && "Instruction index out of expected range!"); - - // Any register which was defined within the previous scheduling region - // may have been rescheduled and its lifetime may overlap with registers - // in ways not reflected in our current liveness state. For each such - // register, adjust the liveness state to be conservatively correct. - for (unsigned Reg = 0; Reg != TargetRegisterInfo::FirstVirtualRegister; ++Reg) - if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) { - assert(KillIndices[Reg] == ~0u && "Clobbered register is live!"); - // Mark this register to be non-renamable. - Classes[Reg] = reinterpret_cast(-1); - // Move the def index to the end of the previous region, to reflect - // that the def could theoretically have been scheduled at the end. - DefIndices[Reg] = InsertPosIndex; - } - - PrescanInstruction(MI); - ScanInstruction(MI, Count); + if (AntiDepBreak != NULL) + AntiDepBreak->Observe(MI, Count, InsertPosIndex); } /// FinishBlock - Clean up register live-range state. /// void SchedulePostRATDList::FinishBlock() { - RegRefs.clear(); + if (AntiDepBreak != NULL) + AntiDepBreak->FinishBlock(); // Call the superclass. ScheduleDAGInstrs::FinishBlock(); } -/// CriticalPathStep - Return the next SUnit after SU on the bottom-up -/// critical path. -static SDep *CriticalPathStep(SUnit *SU) { - SDep *Next = 0; - unsigned NextDepth = 0; - // Find the predecessor edge with the greatest depth. - for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); - P != PE; ++P) { - SUnit *PredSU = P->getSUnit(); - unsigned PredLatency = P->getLatency(); - unsigned PredTotalLatency = PredSU->getDepth() + PredLatency; - // In the case of a latency tie, prefer an anti-dependency edge over - // other types of edges. - if (NextDepth < PredTotalLatency || - (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) { - NextDepth = PredTotalLatency; - Next = &*P; - } - } - return Next; -} - -void SchedulePostRATDList::PrescanInstruction(MachineInstr *MI) { - // Scan the register operands for this instruction and update - // Classes and RegRefs. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (Reg == 0) continue; - const TargetRegisterClass *NewRC = 0; - - if (i < MI->getDesc().getNumOperands()) - NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI); - - // For now, only allow the register to be changed if its register - // class is consistent across all uses. - if (!Classes[Reg] && NewRC) - Classes[Reg] = NewRC; - else if (!NewRC || Classes[Reg] != NewRC) - Classes[Reg] = reinterpret_cast(-1); - - // Now check for aliases. - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - // If an alias of the reg is used during the live range, give up. - // Note that this allows us to skip checking if AntiDepReg - // overlaps with any of the aliases, among other things. - unsigned AliasReg = *Alias; - if (Classes[AliasReg]) { - Classes[AliasReg] = reinterpret_cast(-1); - Classes[Reg] = reinterpret_cast(-1); - } - } - - // If we're still willing to consider this register, note the reference. - if (Classes[Reg] != reinterpret_cast(-1)) - RegRefs.insert(std::make_pair(Reg, &MO)); - - // It's not safe to change register allocation for source operands of - // that have special allocation requirements. - if (MO.isUse() && MI->getDesc().hasExtraSrcRegAllocReq()) { - if (KeepRegs.insert(Reg)) { - for (const unsigned *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) - KeepRegs.insert(*Subreg); - } - } - } -} - -void SchedulePostRATDList::ScanInstruction(MachineInstr *MI, - unsigned Count) { - // Update liveness. - // Proceding upwards, registers that are defed but not used in this - // instruction are now dead. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (Reg == 0) continue; - if (!MO.isDef()) continue; - // Ignore two-addr defs. - if (MI->isRegTiedToUseOperand(i)) continue; - - DefIndices[Reg] = Count; - KillIndices[Reg] = ~0u; - assert(((KillIndices[Reg] == ~0u) != - (DefIndices[Reg] == ~0u)) && - "Kill and Def maps aren't consistent for Reg!"); - KeepRegs.erase(Reg); - Classes[Reg] = 0; - RegRefs.erase(Reg); - // Repeat, for all subregs. - for (const unsigned *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) { - unsigned SubregReg = *Subreg; - DefIndices[SubregReg] = Count; - KillIndices[SubregReg] = ~0u; - KeepRegs.erase(SubregReg); - Classes[SubregReg] = 0; - RegRefs.erase(SubregReg); - } - // Conservatively mark super-registers as unusable. - for (const unsigned *Super = TRI->getSuperRegisters(Reg); - *Super; ++Super) { - unsigned SuperReg = *Super; - Classes[SuperReg] = reinterpret_cast(-1); - } - } - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (Reg == 0) continue; - if (!MO.isUse()) continue; - - const TargetRegisterClass *NewRC = 0; - if (i < MI->getDesc().getNumOperands()) - NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI); - - // For now, only allow the register to be changed if its register - // class is consistent across all uses. - if (!Classes[Reg] && NewRC) - Classes[Reg] = NewRC; - else if (!NewRC || Classes[Reg] != NewRC) - Classes[Reg] = reinterpret_cast(-1); - - RegRefs.insert(std::make_pair(Reg, &MO)); - - // It wasn't previously live but now it is, this is a kill. - if (KillIndices[Reg] == ~0u) { - KillIndices[Reg] = Count; - DefIndices[Reg] = ~0u; - assert(((KillIndices[Reg] == ~0u) != - (DefIndices[Reg] == ~0u)) && - "Kill and Def maps aren't consistent for Reg!"); - } - // Repeat, for all aliases. - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - unsigned AliasReg = *Alias; - if (KillIndices[AliasReg] == ~0u) { - KillIndices[AliasReg] = Count; - DefIndices[AliasReg] = ~0u; - } - } - } -} - -unsigned -SchedulePostRATDList::findSuitableFreeRegister(unsigned AntiDepReg, - unsigned LastNewReg, - const TargetRegisterClass *RC) { - for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF), - RE = RC->allocation_order_end(MF); R != RE; ++R) { - unsigned NewReg = *R; - // Don't replace a register with itself. - if (NewReg == AntiDepReg) continue; - // Don't replace a register with one that was recently used to repair - // an anti-dependence with this AntiDepReg, because that would - // re-introduce that anti-dependence. - if (NewReg == LastNewReg) continue; - // If NewReg is dead and NewReg's most recent def is not before - // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg. - assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) && - "Kill and Def maps aren't consistent for AntiDepReg!"); - assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) && - "Kill and Def maps aren't consistent for NewReg!"); - if (KillIndices[NewReg] != ~0u || - Classes[NewReg] == reinterpret_cast(-1) || - KillIndices[AntiDepReg] > DefIndices[NewReg]) - continue; - return NewReg; - } - - // No registers are free and available! - return 0; -} - -/// BreakAntiDependencies - Identifiy anti-dependencies along the critical path -/// of the ScheduleDAG and break them by renaming registers. -/// -bool SchedulePostRATDList::BreakAntiDependencies() { - // The code below assumes that there is at least one instruction, - // so just duck out immediately if the block is empty. - if (SUnits.empty()) return false; - - // Find the node at the bottom of the critical path. - SUnit *Max = 0; - for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { - SUnit *SU = &SUnits[i]; - if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency) - Max = SU; - } - -#ifndef NDEBUG - { - DEBUG(errs() << "Critical path has total latency " - << (Max->getDepth() + Max->Latency) << "\n"); - DEBUG(errs() << "Available regs:"); - for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) { - if (KillIndices[Reg] == ~0u) - DEBUG(errs() << " " << TRI->getName(Reg)); - } - DEBUG(errs() << '\n'); - } -#endif - - // Track progress along the critical path through the SUnit graph as we walk - // the instructions. - SUnit *CriticalPathSU = Max; - MachineInstr *CriticalPathMI = CriticalPathSU->getInstr(); - - // Consider this pattern: - // A = ... - // ... = A - // A = ... - // ... = A - // A = ... - // ... = A - // A = ... - // ... = A - // There are three anti-dependencies here, and without special care, - // we'd break all of them using the same register: - // A = ... - // ... = A - // B = ... - // ... = B - // B = ... - // ... = B - // B = ... - // ... = B - // because at each anti-dependence, B is the first register that - // isn't A which is free. This re-introduces anti-dependencies - // at all but one of the original anti-dependencies that we were - // trying to break. To avoid this, keep track of the most recent - // register that each register was replaced with, avoid - // using it to repair an anti-dependence on the same register. - // This lets us produce this: - // A = ... - // ... = A - // B = ... - // ... = B - // C = ... - // ... = C - // B = ... - // ... = B - // This still has an anti-dependence on B, but at least it isn't on the - // original critical path. - // - // TODO: If we tracked more than one register here, we could potentially - // fix that remaining critical edge too. This is a little more involved, - // because unlike the most recent register, less recent registers should - // still be considered, though only if no other registers are available. - unsigned LastNewReg[TargetRegisterInfo::FirstVirtualRegister] = {}; - - // Attempt to break anti-dependence edges on the critical path. Walk the - // instructions from the bottom up, tracking information about liveness - // as we go to help determine which registers are available. - bool Changed = false; - unsigned Count = InsertPosIndex - 1; - for (MachineBasicBlock::iterator I = InsertPos, E = Begin; - I != E; --Count) { - MachineInstr *MI = --I; - - // Check if this instruction has a dependence on the critical path that - // is an anti-dependence that we may be able to break. If it is, set - // AntiDepReg to the non-zero register associated with the anti-dependence. - // - // We limit our attention to the critical path as a heuristic to avoid - // breaking anti-dependence edges that aren't going to significantly - // impact the overall schedule. There are a limited number of registers - // and we want to save them for the important edges. - // - // TODO: Instructions with multiple defs could have multiple - // anti-dependencies. The current code here only knows how to break one - // edge per instruction. Note that we'd have to be able to break all of - // the anti-dependencies in an instruction in order to be effective. - unsigned AntiDepReg = 0; - if (MI == CriticalPathMI) { - if (SDep *Edge = CriticalPathStep(CriticalPathSU)) { - SUnit *NextSU = Edge->getSUnit(); - - // Only consider anti-dependence edges. - if (Edge->getKind() == SDep::Anti) { - AntiDepReg = Edge->getReg(); - assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); - if (!AllocatableSet.test(AntiDepReg)) - // Don't break anti-dependencies on non-allocatable registers. - AntiDepReg = 0; - else if (KeepRegs.count(AntiDepReg)) - // Don't break anti-dependencies if an use down below requires - // this exact register. - AntiDepReg = 0; - else { - // If the SUnit has other dependencies on the SUnit that it - // anti-depends on, don't bother breaking the anti-dependency - // since those edges would prevent such units from being - // scheduled past each other regardless. - // - // Also, if there are dependencies on other SUnits with the - // same register as the anti-dependency, don't attempt to - // break it. - for (SUnit::pred_iterator P = CriticalPathSU->Preds.begin(), - PE = CriticalPathSU->Preds.end(); P != PE; ++P) - if (P->getSUnit() == NextSU ? - (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) : - (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) { - AntiDepReg = 0; - break; - } - } - } - CriticalPathSU = NextSU; - CriticalPathMI = CriticalPathSU->getInstr(); - } else { - // We've reached the end of the critical path. - CriticalPathSU = 0; - CriticalPathMI = 0; - } - } - - PrescanInstruction(MI); - - if (MI->getDesc().hasExtraDefRegAllocReq()) - // If this instruction's defs have special allocation requirement, don't - // break this anti-dependency. - AntiDepReg = 0; - else if (AntiDepReg) { - // If this instruction has a use of AntiDepReg, breaking it - // is invalid. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (Reg == 0) continue; - if (MO.isUse() && AntiDepReg == Reg) { - AntiDepReg = 0; - break; - } - } - } - - // Determine AntiDepReg's register class, if it is live and is - // consistently used within a single class. - const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0; - assert((AntiDepReg == 0 || RC != NULL) && - "Register should be live if it's causing an anti-dependence!"); - if (RC == reinterpret_cast(-1)) - AntiDepReg = 0; - - // Look for a suitable register to use to break the anti-depenence. - // - // TODO: Instead of picking the first free register, consider which might - // be the best. - if (AntiDepReg != 0) { - if (unsigned NewReg = findSuitableFreeRegister(AntiDepReg, - LastNewReg[AntiDepReg], - RC)) { - DEBUG(errs() << "Breaking anti-dependence edge on " - << TRI->getName(AntiDepReg) - << " with " << RegRefs.count(AntiDepReg) << " references" - << " using " << TRI->getName(NewReg) << "!\n"); - - // Update the references to the old register to refer to the new - // register. - std::pair::iterator, - std::multimap::iterator> - Range = RegRefs.equal_range(AntiDepReg); - for (std::multimap::iterator - Q = Range.first, QE = Range.second; Q != QE; ++Q) - Q->second->setReg(NewReg); - - // We just went back in time and modified history; the - // liveness information for the anti-depenence reg is now - // inconsistent. Set the state as if it were dead. - Classes[NewReg] = Classes[AntiDepReg]; - DefIndices[NewReg] = DefIndices[AntiDepReg]; - KillIndices[NewReg] = KillIndices[AntiDepReg]; - assert(((KillIndices[NewReg] == ~0u) != - (DefIndices[NewReg] == ~0u)) && - "Kill and Def maps aren't consistent for NewReg!"); - - Classes[AntiDepReg] = 0; - DefIndices[AntiDepReg] = KillIndices[AntiDepReg]; - KillIndices[AntiDepReg] = ~0u; - assert(((KillIndices[AntiDepReg] == ~0u) != - (DefIndices[AntiDepReg] == ~0u)) && - "Kill and Def maps aren't consistent for AntiDepReg!"); - - RegRefs.erase(AntiDepReg); - Changed = true; - LastNewReg[AntiDepReg] = NewReg; - } - } - - ScanInstruction(MI, Count); - } - - return Changed; -} - /// StartBlockForKills - Initialize register live-range state for updating kills /// void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) { diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 74781593a0d..e721a7fd680 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -130,7 +130,7 @@ protected: /// for Thumb1. bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, TargetSubtarget::AntiDepBreakMode& mode) const { - mode = TargetSubtarget::ANTIDEP_NONE; + mode = TargetSubtarget::ANTIDEP_CRITICAL; return PostRAScheduler && OptLevel >= CodeGenOpt::Default; } diff --git a/test/CodeGen/X86/break-anti-dependencies.ll b/test/CodeGen/X86/break-anti-dependencies.ll index 6b245c103e2..972b3cd43cf 100644 --- a/test/CodeGen/X86/break-anti-dependencies.ll +++ b/test/CodeGen/X86/break-anti-dependencies.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -march=x86-64 -post-RA-scheduler -break-anti-dependencies=false > %t +; RUN: llc < %s -march=x86-64 -post-RA-scheduler -break-anti-dependencies=none > %t ; RUN: grep {%xmm0} %t | count 14 ; RUN: not grep {%xmm1} %t -; RUN: llc < %s -march=x86-64 -post-RA-scheduler -break-anti-dependencies > %t +; RUN: llc < %s -march=x86-64 -post-RA-scheduler -break-anti-dependencies=critical > %t ; RUN: grep {%xmm0} %t | count 7 ; RUN: grep {%xmm1} %t | count 7 -- 2.11.0