1 //===- subzero/src/IceRegAlloc.cpp - Linear-scan implementation -----------===//
3 // The Subzero Code Generator
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief Implements the LinearScan class, which performs the linear-scan
12 /// register allocation after liveness analysis has been performed.
14 //===----------------------------------------------------------------------===//
16 #include "IceRegAlloc.h"
19 #include "IceCfgNode.h"
21 #include "IceInstVarIter.h"
22 #include "IceOperand.h"
23 #include "IceTargetLowering.h"
29 // Returns true if Var has any definitions within Item's live range.
30 // TODO(stichnot): Consider trimming the Definitions list similar to how the
31 // live ranges are trimmed, since all the overlapsDefs() tests are whether some
32 // variable's definitions overlap Cur, and trimming is with respect Cur.start.
33 // Initial tests show no measurable performance difference, so we'll keep the
34 // code simple for now.
35 bool overlapsDefs(const Cfg *Func, const Variable *Item, const Variable *Var) {
36 constexpr bool UseTrimmed = true;
37 VariablesMetadata *VMetadata = Func->getVMetadata();
38 if (const Inst *FirstDef = VMetadata->getFirstDefinition(Var))
39 if (Item->getLiveRange().overlapsInst(FirstDef->getNumber(), UseTrimmed))
41 for (const Inst *Def : VMetadata->getLatterDefinitions(Var)) {
42 if (Item->getLiveRange().overlapsInst(Def->getNumber(), UseTrimmed))
48 void dumpDisableOverlap(const Cfg *Func, const Variable *Var,
50 if (!BuildDefs::dump())
52 if (!Func->isVerbose(IceV_LinearScan))
55 VariablesMetadata *VMetadata = Func->getVMetadata();
56 Ostream &Str = Func->getContext()->getStrDump();
57 Str << "Disabling Overlap due to " << Reason << " " << *Var
58 << " LIVE=" << Var->getLiveRange() << " Defs=";
59 if (const Inst *FirstDef = VMetadata->getFirstDefinition(Var))
60 Str << FirstDef->getNumber();
61 const InstDefList &Defs = VMetadata->getLatterDefinitions(Var);
62 for (size_t i = 0; i < Defs.size(); ++i) {
63 Str << "," << Defs[i]->getNumber();
68 void dumpLiveRange(const Variable *Var, const Cfg *Func) {
69 if (!BuildDefs::dump())
71 Ostream &Str = Func->getContext()->getStrDump();
73 snprintf(buf, llvm::array_lengthof(buf), "%2d", Var->getRegNumTmp());
74 Str << "R=" << buf << " V=";
76 Str << " Range=" << Var->getLiveRange();
79 int32_t findMinWeightIndex(
80 const llvm::SmallBitVector &RegMask,
81 const llvm::SmallVector<RegWeight, LinearScan::REGS_SIZE> &Weights) {
82 int32_t MinWeightIndex = RegMask.find_first();
83 assert(MinWeightIndex >= 0);
84 for (int32_t i = RegMask.find_next(MinWeightIndex); i != -1;
85 i = RegMask.find_next(i)) {
86 if (Weights[i] < Weights[MinWeightIndex])
89 return MinWeightIndex;
92 } // end of anonymous namespace
94 LinearScan::LinearScan(Cfg *Func)
95 : Func(Func), Ctx(Func->getContext()), Target(Func->getTarget()),
96 Verbose(BuildDefs::dump() && Func->isVerbose(IceV_LinearScan)),
97 UseReserve(Ctx->getFlags().getRegAllocReserve()) {}
99 // Prepare for full register allocation of all variables. We depend on liveness
100 // analysis to have calculated live ranges.
101 void LinearScan::initForGlobal() {
102 TimerMarker T(TimerStack::TT_initUnhandled, Func);
103 FindPreference = true;
104 // For full register allocation, normally we want to enable FindOverlap
105 // (meaning we look for opportunities for two overlapping live ranges to
106 // safely share the same register). However, we disable it for phi-lowering
107 // register allocation since no overlap opportunities should be available and
108 // it's more expensive to look for opportunities.
109 FindOverlap = (Kind != RAK_Phi);
110 const VarList &Vars = Func->getVariables();
111 Unhandled.reserve(Vars.size());
112 UnhandledPrecolored.reserve(Vars.size());
113 // Gather the live ranges of all variables and add them to the Unhandled set.
114 for (Variable *Var : Vars) {
115 // Don't consider rematerializable variables.
116 if (Var->isRematerializable())
118 // Explicitly don't consider zero-weight variables, which are meant to be
120 if (Var->mustNotHaveReg())
122 // Don't bother if the variable has a null live range, which means it was
124 if (Var->getLiveRange().isEmpty())
126 Var->untrimLiveRange();
127 Unhandled.push_back(Var);
129 Var->setRegNumTmp(Var->getRegNum());
130 Var->setMustHaveReg();
131 UnhandledPrecolored.push_back(Var);
135 // Build the (ordered) list of FakeKill instruction numbers.
137 // Phi lowering should not be creating new call instructions, so there should
138 // be no infinite-weight not-yet-colored live ranges that span a call
139 // instruction, hence no need to construct the Kills list.
142 for (CfgNode *Node : Func->getNodes()) {
143 for (Inst &I : Node->getInsts()) {
144 if (auto *Kill = llvm::dyn_cast<InstFakeKill>(&I)) {
145 if (!Kill->isDeleted() && !Kill->getLinked()->isDeleted())
146 Kills.push_back(I.getNumber());
152 // Validate the integrity of the live ranges. If there are any errors, it
153 // prints details and returns false. On success, it returns true.
154 bool LinearScan::livenessValidateIntervals(
155 const DefUseErrorList &DefsWithoutUses,
156 const DefUseErrorList &UsesBeforeDefs,
157 const CfgVector<InstNumberT> &LRBegin,
158 const CfgVector<InstNumberT> &LREnd) const {
159 if (DefsWithoutUses.empty() && UsesBeforeDefs.empty())
162 if (!BuildDefs::dump())
165 const VarList &Vars = Func->getVariables();
166 OstreamLocker L(Ctx);
167 Ostream &Str = Ctx->getStrDump();
168 for (SizeT VarNum : DefsWithoutUses) {
169 Variable *Var = Vars[VarNum];
170 Str << "LR def without use, instruction " << LRBegin[VarNum]
171 << ", variable " << Var->getName(Func) << "\n";
173 for (SizeT VarNum : UsesBeforeDefs) {
174 Variable *Var = Vars[VarNum];
175 Str << "LR use before def, instruction " << LREnd[VarNum] << ", variable "
176 << Var->getName(Func) << "\n";
181 // Prepare for very simple register allocation of only infinite-weight Variables
182 // while respecting pre-colored Variables. Some properties we take advantage of:
184 // * Live ranges of interest consist of a single segment.
186 // * Live ranges of interest never span a call instruction.
188 // * Phi instructions are not considered because either phis have already been
189 // lowered, or they don't contain any pre-colored or infinite-weight
192 // * We don't need to renumber instructions before computing live ranges because
193 // all the high-level ICE instructions are deleted prior to lowering, and the
194 // low-level instructions are added in monotonically increasing order.
196 // * There are no opportunities for register preference or allowing overlap.
198 // Some properties we aren't (yet) taking advantage of:
200 // * Because live ranges are a single segment, the Inactive set will always be
201 // empty, and the live range trimming operation is unnecessary.
203 // * Calculating overlap of single-segment live ranges could be optimized a bit.
204 void LinearScan::initForInfOnly() {
205 TimerMarker T(TimerStack::TT_initUnhandled, Func);
206 FindPreference = false;
209 const VarList &Vars = Func->getVariables();
211 // Iterate across all instructions and record the begin and end of the live
212 // range for each variable that is pre-colored or infinite weight.
213 CfgVector<InstNumberT> LRBegin(Vars.size(), Inst::NumberSentinel);
214 CfgVector<InstNumberT> LREnd(Vars.size(), Inst::NumberSentinel);
215 DefUseErrorList DefsWithoutUses, UsesBeforeDefs;
216 for (CfgNode *Node : Func->getNodes()) {
217 for (Inst &Inst : Node->getInsts()) {
218 if (Inst.isDeleted())
220 FOREACH_VAR_IN_INST(Var, Inst) {
221 if (Var->isRematerializable())
223 if (Var->getIgnoreLiveness())
225 if (Var->hasReg() || Var->mustHaveReg()) {
226 SizeT VarNum = Var->getIndex();
227 LREnd[VarNum] = Inst.getNumber();
228 if (!Var->getIsArg() && LRBegin[VarNum] == Inst::NumberSentinel)
229 UsesBeforeDefs.push_back(VarNum);
232 if (const Variable *Var = Inst.getDest()) {
233 if (!Var->isRematerializable() && !Var->getIgnoreLiveness() &&
234 (Var->hasReg() || Var->mustHaveReg())) {
235 if (LRBegin[Var->getIndex()] == Inst::NumberSentinel) {
236 LRBegin[Var->getIndex()] = Inst.getNumber();
244 Unhandled.reserve(NumVars);
245 UnhandledPrecolored.reserve(NumVars);
246 for (SizeT i = 0; i < Vars.size(); ++i) {
247 Variable *Var = Vars[i];
248 if (Var->isRematerializable())
250 if (LRBegin[i] != Inst::NumberSentinel) {
251 if (LREnd[i] == Inst::NumberSentinel) {
252 DefsWithoutUses.push_back(i);
255 Unhandled.push_back(Var);
256 Var->resetLiveRange();
257 Var->addLiveRange(LRBegin[i], LREnd[i]);
258 Var->untrimLiveRange();
260 Var->setRegNumTmp(Var->getRegNum());
261 Var->setMustHaveReg();
262 UnhandledPrecolored.push_back(Var);
268 if (!livenessValidateIntervals(DefsWithoutUses, UsesBeforeDefs, LRBegin,
270 llvm::report_fatal_error("initForInfOnly: Liveness error");
274 if (!DefsWithoutUses.empty() || !UsesBeforeDefs.empty()) {
275 if (BuildDefs::dump()) {
276 OstreamLocker L(Ctx);
277 Ostream &Str = Ctx->getStrDump();
278 for (SizeT VarNum : DefsWithoutUses) {
279 Variable *Var = Vars[VarNum];
280 Str << "LR def without use, instruction " << LRBegin[VarNum]
281 << ", variable " << Var->getName(Func) << "\n";
283 for (SizeT VarNum : UsesBeforeDefs) {
284 Variable *Var = Vars[VarNum];
285 Str << "LR use before def, instruction " << LREnd[VarNum]
286 << ", variable " << Var->getName(Func) << "\n";
289 llvm::report_fatal_error("initForInfOnly: Liveness error");
291 // This isn't actually a fatal condition, but it would be nice to know if we
292 // somehow pre-calculated Unhandled's size wrong.
293 assert(NumVars == 0);
295 // Don't build up the list of Kills because we know that no infinite-weight
296 // Variable has a live range spanning a call.
300 void LinearScan::initForSecondChance() {
301 TimerMarker T(TimerStack::TT_initUnhandled, Func);
302 FindPreference = true;
304 const VarList &Vars = Func->getVariables();
305 Unhandled.reserve(Vars.size());
306 UnhandledPrecolored.reserve(Vars.size());
307 for (Variable *Var : Vars) {
308 if (Var->isRematerializable())
311 Var->untrimLiveRange();
312 Var->setRegNumTmp(Var->getRegNum());
313 Var->setMustHaveReg();
314 UnhandledPrecolored.push_back(Var);
315 Unhandled.push_back(Var);
318 for (Variable *Var : Evicted) {
319 Var->untrimLiveRange();
320 Unhandled.push_back(Var);
324 void LinearScan::init(RegAllocKind Kind) {
327 UnhandledPrecolored.clear();
332 SizeT NumRegs = Target->getNumRegisters();
333 RegAliases.resize(NumRegs);
334 for (SizeT Reg = 0; Reg < NumRegs; ++Reg) {
335 RegAliases[Reg] = &Target->getAliasesForRegister(Reg);
340 llvm::report_fatal_error("Invalid RAK_Unknown");
349 case RAK_SecondChance:
350 initForSecondChance();
356 auto CompareRanges = [](const Variable *L, const Variable *R) {
357 InstNumberT Lstart = L->getLiveRange().getStart();
358 InstNumberT Rstart = R->getLiveRange().getStart();
359 if (Lstart == Rstart)
360 return L->getIndex() < R->getIndex();
361 return Lstart < Rstart;
363 // Do a reverse sort so that erasing elements (from the end) is fast.
364 std::sort(Unhandled.rbegin(), Unhandled.rend(), CompareRanges);
365 std::sort(UnhandledPrecolored.rbegin(), UnhandledPrecolored.rend(),
368 Handled.reserve(Unhandled.size());
369 Inactive.reserve(Unhandled.size());
370 Active.reserve(Unhandled.size());
371 Evicted.reserve(Unhandled.size());
374 // This is called when Cur must be allocated a register but no registers are
375 // available across Cur's live range. To handle this, we find a register that is
376 // not explicitly used during Cur's live range, spill that register to a stack
377 // location right before Cur's live range begins, and fill (reload) the register
378 // from the stack location right after Cur's live range ends.
379 void LinearScan::addSpillFill(IterationState &Iter) {
380 // Identify the actual instructions that begin and end Iter.Cur's live range.
381 // Iterate through Iter.Cur's node's instruction list until we find the actual
382 // instructions with instruction numbers corresponding to Iter.Cur's recorded
383 // live range endpoints. This sounds inefficient but shouldn't be a problem
384 // in practice because:
385 // (1) This function is almost never called in practice.
386 // (2) Since this register over-subscription problem happens only for
387 // phi-lowered instructions, the number of instructions in the node is
388 // proportional to the number of phi instructions in the original node,
389 // which is never very large in practice.
390 // (3) We still have to iterate through all instructions of Iter.Cur's live
391 // range to find all explicitly used registers (though the live range is
392 // usually only 2-3 instructions), so the main cost that could be avoided
393 // would be finding the instruction that begin's Iter.Cur's live range.
394 assert(!Iter.Cur->getLiveRange().isEmpty());
395 InstNumberT Start = Iter.Cur->getLiveRange().getStart();
396 InstNumberT End = Iter.Cur->getLiveRange().getEnd();
397 CfgNode *Node = Func->getVMetadata()->getLocalUseNode(Iter.Cur);
399 InstList &Insts = Node->getInsts();
400 InstList::iterator SpillPoint = Insts.end();
401 InstList::iterator FillPoint = Insts.end();
402 // Stop searching after we have found both the SpillPoint and the FillPoint.
403 for (auto I = Insts.begin(), E = Insts.end();
404 I != E && (SpillPoint == E || FillPoint == E); ++I) {
405 if (I->getNumber() == Start)
407 if (I->getNumber() == End)
409 if (SpillPoint != E) {
410 // Remove from RegMask any physical registers referenced during Cur's live
411 // range. Start looking after SpillPoint gets set, i.e. once Cur's live
413 FOREACH_VAR_IN_INST(Var, *I) {
414 if (!Var->hasRegTmp())
416 const llvm::SmallBitVector &Aliases = *RegAliases[Var->getRegNumTmp()];
417 for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0;
418 RegAlias = Aliases.find_next(RegAlias)) {
419 Iter.RegMask[RegAlias] = false;
424 assert(SpillPoint != Insts.end());
425 assert(FillPoint != Insts.end());
427 // TODO(stichnot): Randomize instead of find_first().
428 int32_t RegNum = Iter.RegMask.find_first();
429 assert(RegNum != -1);
430 Iter.Cur->setRegNumTmp(RegNum);
431 Variable *Preg = Target->getPhysicalRegister(RegNum, Iter.Cur->getType());
432 // TODO(stichnot): Add SpillLoc to VariablesMetadata tracking so that SpillLoc
433 // is correctly identified as !isMultiBlock(), reducing stack frame size.
434 Variable *SpillLoc = Func->makeVariable(Iter.Cur->getType());
435 // Add "reg=FakeDef;spill=reg" before SpillPoint
436 Target->lowerInst(Node, SpillPoint, InstFakeDef::create(Func, Preg));
437 Target->lowerInst(Node, SpillPoint, InstAssign::create(Func, SpillLoc, Preg));
438 // add "reg=spill;FakeUse(reg)" before FillPoint
439 Target->lowerInst(Node, FillPoint, InstAssign::create(Func, Preg, SpillLoc));
440 Target->lowerInst(Node, FillPoint, InstFakeUse::create(Func, Preg));
443 void LinearScan::handleActiveRangeExpiredOrInactive(const Variable *Cur) {
444 for (SizeT I = Active.size(); I > 0; --I) {
445 const SizeT Index = I - 1;
446 Variable *Item = Active[Index];
447 Item->trimLiveRange(Cur->getLiveRange().getStart());
449 if (Item->rangeEndsBefore(Cur)) {
450 // Move Item from Active to Handled list.
451 dumpLiveRangeTrace("Expiring ", Item);
452 moveItem(Active, Index, Handled);
454 } else if (!Item->rangeOverlapsStart(Cur)) {
455 // Move Item from Active to Inactive list.
456 dumpLiveRangeTrace("Inactivating ", Item);
457 moveItem(Active, Index, Inactive);
461 // Decrement Item from RegUses[].
462 assert(Item->hasRegTmp());
463 const llvm::SmallBitVector &Aliases = *RegAliases[Item->getRegNumTmp()];
464 for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0;
465 RegAlias = Aliases.find_next(RegAlias)) {
467 assert(RegUses[RegAlias] >= 0);
473 void LinearScan::handleInactiveRangeExpiredOrReactivated(const Variable *Cur) {
474 for (SizeT I = Inactive.size(); I > 0; --I) {
475 const SizeT Index = I - 1;
476 Variable *Item = Inactive[Index];
477 Item->trimLiveRange(Cur->getLiveRange().getStart());
478 if (Item->rangeEndsBefore(Cur)) {
479 // Move Item from Inactive to Handled list.
480 dumpLiveRangeTrace("Expiring ", Item);
481 moveItem(Inactive, Index, Handled);
482 } else if (Item->rangeOverlapsStart(Cur)) {
483 // Move Item from Inactive to Active list.
484 dumpLiveRangeTrace("Reactivating ", Item);
485 moveItem(Inactive, Index, Active);
486 // Increment Item in RegUses[].
487 assert(Item->hasRegTmp());
488 const llvm::SmallBitVector &Aliases = *RegAliases[Item->getRegNumTmp()];
489 for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0;
490 RegAlias = Aliases.find_next(RegAlias)) {
491 assert(RegUses[RegAlias] >= 0);
498 // Infer register preference and allowable overlap. Only form a preference when
499 // the current Variable has an unambiguous "first" definition. The preference is
500 // some source Variable of the defining instruction that either is assigned a
501 // register that is currently free, or that is assigned a register that is not
502 // free but overlap is allowed. Overlap is allowed when the Variable under
503 // consideration is single-definition, and its definition is a simple assignment
504 // - i.e., the register gets copied/aliased but is never modified. Furthermore,
505 // overlap is only allowed when preferred Variable definition instructions do
506 // not appear within the current Variable's live range.
507 void LinearScan::findRegisterPreference(IterationState &Iter) {
508 Iter.Prefer = nullptr;
509 Iter.PreferReg = Variable::NoRegister;
510 Iter.AllowOverlap = false;
515 VariablesMetadata *VMetadata = Func->getVMetadata();
516 const Inst *DefInst = VMetadata->getFirstDefinitionSingleBlock(Iter.Cur);
517 if (DefInst == nullptr)
520 assert(DefInst->getDest() == Iter.Cur);
521 const bool IsSingleDefAssign =
522 DefInst->isVarAssign() && !VMetadata->isMultiDef(Iter.Cur);
523 FOREACH_VAR_IN_INST(SrcVar, *DefInst) {
524 // Only consider source variables that have (so far) been assigned a
526 if (!SrcVar->hasRegTmp())
529 // That register must be one in the RegMask set, e.g. don't try to prefer
530 // the stack pointer as a result of the stacksave intrinsic.
531 const llvm::SmallBitVector &Aliases = *RegAliases[SrcVar->getRegNumTmp()];
532 const int32_t SrcReg = (Iter.RegMask & Aliases).find_first();
536 if (FindOverlap && IsSingleDefAssign && !Iter.Free[SrcReg]) {
537 // Don't bother trying to enable AllowOverlap if the register is already
538 // free (hence the test on Iter.Free[SrcReg]).
539 Iter.AllowOverlap = !overlapsDefs(Func, Iter.Cur, SrcVar);
541 if (Iter.AllowOverlap || Iter.Free[SrcReg]) {
542 Iter.Prefer = SrcVar;
543 Iter.PreferReg = SrcReg;
544 // Stop looking for a preference after the first valid preference is
545 // found. One might think that we should look at all instruction
546 // variables to find the best <Prefer,AllowOverlap> combination, but note
547 // that AllowOverlap can only be true for a simple assignment statement
548 // which can have only one source operand, so it's not possible for
549 // AllowOverlap to be true beyond the first source operand.
550 FOREACH_VAR_IN_INST_BREAK;
553 if (BuildDefs::dump() && Verbose && Iter.Prefer) {
554 Ostream &Str = Ctx->getStrDump();
555 Str << "Initial Iter.Prefer=";
556 Iter.Prefer->dump(Func);
557 Str << " R=" << Iter.PreferReg << " LIVE=" << Iter.Prefer->getLiveRange()
558 << " Overlap=" << Iter.AllowOverlap << "\n";
562 // Remove registers from the Iter.Free[] list where an Inactive range overlaps
563 // with the current range.
564 void LinearScan::filterFreeWithInactiveRanges(IterationState &Iter) {
565 for (const Variable *Item : Inactive) {
566 if (!Item->rangeOverlaps(Iter.Cur))
568 const llvm::SmallBitVector &Aliases = *RegAliases[Item->getRegNumTmp()];
569 // TODO(stichnot): Do this with bitvector ops, not a loop, for efficiency.
570 for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0;
571 RegAlias = Aliases.find_next(RegAlias)) {
572 // Don't assert(Iter.Free[RegNum]) because in theory (though probably
573 // never in practice) there could be two inactive variables that were
574 // marked with AllowOverlap.
575 Iter.Free[RegAlias] = false;
576 Iter.FreeUnfiltered[RegAlias] = false;
577 // Disable AllowOverlap if an Inactive variable, which is not Prefer,
578 // shares Prefer's register, and has a definition within Cur's live range.
579 if (Iter.AllowOverlap && Item != Iter.Prefer &&
580 RegAlias == Iter.PreferReg && overlapsDefs(Func, Iter.Cur, Item)) {
581 Iter.AllowOverlap = false;
582 dumpDisableOverlap(Func, Item, "Inactive");
588 // Remove registers from the Iter.Free[] list where an Unhandled pre-colored
589 // range overlaps with the current range, and set those registers to infinite
590 // weight so that they aren't candidates for eviction.
591 // Cur->rangeEndsBefore(Item) is an early exit check that turns a guaranteed
592 // O(N^2) algorithm into expected linear complexity.
593 void LinearScan::filterFreeWithPrecoloredRanges(IterationState &Iter) {
594 // TODO(stichnot): Partition UnhandledPrecolored according to register class,
595 // to restrict the number of overlap comparisons needed.
596 for (Variable *Item : reverse_range(UnhandledPrecolored)) {
597 assert(Item->hasReg());
598 if (Iter.Cur->rangeEndsBefore(Item))
600 if (!Item->rangeOverlaps(Iter.Cur))
602 const llvm::SmallBitVector &Aliases =
603 *RegAliases[Item->getRegNum()]; // Note: not getRegNumTmp()
604 for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0;
605 RegAlias = Aliases.find_next(RegAlias)) {
606 Iter.Weights[RegAlias].setWeight(RegWeight::Inf);
607 Iter.Free[RegAlias] = false;
608 Iter.FreeUnfiltered[RegAlias] = false;
609 Iter.PrecoloredUnhandledMask[RegAlias] = true;
610 // Disable Iter.AllowOverlap if the preferred register is one of these
611 // pre-colored unhandled overlapping ranges.
612 if (Iter.AllowOverlap && RegAlias == Iter.PreferReg) {
613 Iter.AllowOverlap = false;
614 dumpDisableOverlap(Func, Item, "PrecoloredUnhandled");
620 void LinearScan::allocatePrecoloredRegister(Variable *Cur) {
621 int32_t RegNum = Cur->getRegNum();
622 // RegNumTmp should have already been set above.
623 assert(Cur->getRegNumTmp() == RegNum);
624 dumpLiveRangeTrace("Precoloring ", Cur);
625 Active.push_back(Cur);
626 const llvm::SmallBitVector &Aliases = *RegAliases[RegNum];
627 for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0;
628 RegAlias = Aliases.find_next(RegAlias)) {
629 assert(RegUses[RegAlias] >= 0);
632 assert(!UnhandledPrecolored.empty());
633 assert(UnhandledPrecolored.back() == Cur);
634 UnhandledPrecolored.pop_back();
637 void LinearScan::allocatePreferredRegister(IterationState &Iter) {
638 Iter.Cur->setRegNumTmp(Iter.PreferReg);
639 dumpLiveRangeTrace("Preferring ", Iter.Cur);
640 const llvm::SmallBitVector &Aliases = *RegAliases[Iter.PreferReg];
641 for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0;
642 RegAlias = Aliases.find_next(RegAlias)) {
643 assert(RegUses[RegAlias] >= 0);
646 Active.push_back(Iter.Cur);
649 void LinearScan::allocateFreeRegister(IterationState &Iter, bool Filtered) {
650 const int32_t RegNum =
651 Filtered ? Iter.Free.find_first() : Iter.FreeUnfiltered.find_first();
652 Iter.Cur->setRegNumTmp(RegNum);
654 dumpLiveRangeTrace("Allocating ", Iter.Cur);
656 dumpLiveRangeTrace("Allocating X ", Iter.Cur);
657 const llvm::SmallBitVector &Aliases = *RegAliases[RegNum];
658 for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0;
659 RegAlias = Aliases.find_next(RegAlias)) {
660 assert(RegUses[RegAlias] >= 0);
663 Active.push_back(Iter.Cur);
666 void LinearScan::handleNoFreeRegisters(IterationState &Iter) {
667 // Check Active ranges.
668 for (const Variable *Item : Active) {
669 assert(Item->rangeOverlaps(Iter.Cur));
670 assert(Item->hasRegTmp());
671 const llvm::SmallBitVector &Aliases = *RegAliases[Item->getRegNumTmp()];
672 // We add the Item's weight to each alias/subregister to represent that,
673 // should we decide to pick any of them, then we would incur that many
675 RegWeight W = Item->getWeight(Func);
676 for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0;
677 RegAlias = Aliases.find_next(RegAlias)) {
678 Iter.Weights[RegAlias].addWeight(W);
681 // Same as above, but check Inactive ranges instead of Active.
682 for (const Variable *Item : Inactive) {
683 if (!Item->rangeOverlaps(Iter.Cur))
685 assert(Item->hasRegTmp());
686 const llvm::SmallBitVector &Aliases = *RegAliases[Item->getRegNumTmp()];
687 RegWeight W = Item->getWeight(Func);
688 for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0;
689 RegAlias = Aliases.find_next(RegAlias)) {
690 Iter.Weights[RegAlias].addWeight(W);
694 // All the weights are now calculated. Find the register with smallest weight.
695 int32_t MinWeightIndex = findMinWeightIndex(Iter.RegMask, Iter.Weights);
697 if (Iter.Cur->getWeight(Func) <= Iter.Weights[MinWeightIndex]) {
698 if (!Iter.Cur->mustHaveReg()) {
699 // Iter.Cur doesn't have priority over any other live ranges, so don't
700 // allocate any register to it, and move it to the Handled state.
701 Handled.push_back(Iter.Cur);
704 if (Kind == RAK_Phi) {
705 // Iter.Cur is infinite-weight but all physical registers are already
706 // taken, so we need to force one to be temporarily available.
708 Handled.push_back(Iter.Cur);
711 // The remaining portion of the enclosing "if" block should only be
712 // reachable if we are manually limiting physical registers for testing.
714 if (Iter.FreeUnfiltered.any()) {
715 // There is some available physical register held in reserve, so use it.
716 constexpr bool NotFiltered = false;
717 allocateFreeRegister(Iter, NotFiltered);
718 // Iter.Cur is now on the Active list.
721 // At this point, we need to find some reserve register that is already
722 // assigned to a non-infinite-weight variable. This could happen if some
723 // variable was previously assigned an alias of such a register.
724 MinWeightIndex = findMinWeightIndex(Iter.RegMaskUnfiltered, Iter.Weights);
726 if (Iter.Cur->getWeight(Func) <= Iter.Weights[MinWeightIndex]) {
727 dumpLiveRangeTrace("Failing ", Iter.Cur);
728 Func->setError("Unable to find a physical register for an "
729 "infinite-weight live range "
730 "(consider using -reg-reserve): " +
731 Iter.Cur->getName(Func));
732 Handled.push_back(Iter.Cur);
735 // At this point, MinWeightIndex points to a valid reserve register to
736 // reallocate to Iter.Cur, so drop into the eviction code.
739 // Evict all live ranges in Active that register number MinWeightIndex is
741 const llvm::SmallBitVector &Aliases = *RegAliases[MinWeightIndex];
742 for (SizeT I = Active.size(); I > 0; --I) {
743 const SizeT Index = I - 1;
744 Variable *Item = Active[Index];
745 int32_t RegNum = Item->getRegNumTmp();
746 if (Aliases[RegNum]) {
747 dumpLiveRangeTrace("Evicting A ", Item);
748 const llvm::SmallBitVector &Aliases = *RegAliases[RegNum];
749 for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0;
750 RegAlias = Aliases.find_next(RegAlias)) {
752 assert(RegUses[RegAlias] >= 0);
754 Item->setRegNumTmp(Variable::NoRegister);
755 moveItem(Active, Index, Handled);
756 Evicted.push_back(Item);
759 // Do the same for Inactive.
760 for (SizeT I = Inactive.size(); I > 0; --I) {
761 const SizeT Index = I - 1;
762 Variable *Item = Inactive[Index];
763 // Note: The Item->rangeOverlaps(Cur) clause is not part of the description
764 // of AssignMemLoc() in the original paper. But there doesn't seem to be any
765 // need to evict an inactive live range that doesn't overlap with the live
766 // range currently being considered. It's especially bad if we would end up
767 // evicting an infinite-weight but currently-inactive live range. The most
768 // common situation for this would be a scratch register kill set for call
770 if (Aliases[Item->getRegNumTmp()] && Item->rangeOverlaps(Iter.Cur)) {
771 dumpLiveRangeTrace("Evicting I ", Item);
772 Item->setRegNumTmp(Variable::NoRegister);
773 moveItem(Inactive, Index, Handled);
774 Evicted.push_back(Item);
777 // Assign the register to Cur.
778 Iter.Cur->setRegNumTmp(MinWeightIndex);
779 for (int32_t RegAlias = Aliases.find_first(); RegAlias >= 0;
780 RegAlias = Aliases.find_next(RegAlias)) {
781 assert(RegUses[RegAlias] >= 0);
784 Active.push_back(Iter.Cur);
785 dumpLiveRangeTrace("Allocating ", Iter.Cur);
788 void LinearScan::assignFinalRegisters(
789 const llvm::SmallBitVector &RegMaskFull,
790 const llvm::SmallBitVector &PreDefinedRegisters, bool Randomized) {
791 const size_t NumRegisters = RegMaskFull.size();
792 llvm::SmallVector<int32_t, REGS_SIZE> Permutation(NumRegisters);
794 // Create a random number generator for regalloc randomization. Merge
795 // function's sequence and Kind value as the Salt. Because regAlloc() is
796 // called twice under O2, the second time with RAK_Phi, we check Kind ==
797 // RAK_Phi to determine the lowest-order bit to make sure the Salt is
800 (Func->getSequenceNumber() << 1) ^ (Kind == RAK_Phi ? 0u : 1u);
801 Target->makeRandomRegisterPermutation(
802 Permutation, PreDefinedRegisters | ~RegMaskFull, Salt);
805 // Finish up by setting RegNum = RegNumTmp (or a random permutation thereof)
806 // for each Variable.
807 for (Variable *Item : Handled) {
808 int32_t RegNum = Item->getRegNumTmp();
809 int32_t AssignedRegNum = RegNum;
811 if (Randomized && Item->hasRegTmp() && !Item->hasReg()) {
812 AssignedRegNum = Permutation[RegNum];
814 if (BuildDefs::dump() && Verbose) {
815 Ostream &Str = Ctx->getStrDump();
816 if (!Item->hasRegTmp()) {
817 Str << "Not assigning ";
821 Str << (AssignedRegNum == Item->getRegNum() ? "Reassigning "
823 << Target->getRegName(AssignedRegNum, Item->getType()) << "(r"
824 << AssignedRegNum << ") to ";
829 Item->setRegNum(AssignedRegNum);
833 // Implements the linear-scan algorithm. Based on "Linear Scan Register
834 // Allocation in the Context of SSA Form and Register Constraints" by Hanspeter
835 // Mössenböck and Michael Pfeiffer,
836 // ftp://ftp.ssw.uni-linz.ac.at/pub/Papers/Moe02.PDF. This implementation is
837 // modified to take affinity into account and allow two interfering variables to
838 // share the same register in certain cases.
840 // Requires running Cfg::liveness(Liveness_Intervals) in preparation. Results
841 // are assigned to Variable::RegNum for each Variable.
842 void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull,
844 TimerMarker T(TimerStack::TT_linearScan, Func);
845 assert(RegMaskFull.any()); // Sanity check
848 Func->resetCurrentNode();
849 const size_t NumRegisters = RegMaskFull.size();
850 llvm::SmallBitVector PreDefinedRegisters(NumRegisters);
852 for (Variable *Var : UnhandledPrecolored) {
853 PreDefinedRegisters[Var->getRegNum()] = true;
857 // Build a LiveRange representing the Kills list.
858 LiveRange KillsRange(Kills);
861 // Reset the register use count.
862 RegUses.resize(NumRegisters);
863 std::fill(RegUses.begin(), RegUses.end(), 0);
865 // Unhandled is already set to all ranges in increasing order of start points.
866 assert(Active.empty());
867 assert(Inactive.empty());
868 assert(Handled.empty());
869 const TargetLowering::RegSetMask RegsInclude =
870 TargetLowering::RegSet_CallerSave;
871 const TargetLowering::RegSetMask RegsExclude = TargetLowering::RegSet_None;
872 const llvm::SmallBitVector KillsMask =
873 Target->getRegisterSet(RegsInclude, RegsExclude);
875 // Allocate memory once outside the loop.
877 Iter.Weights.reserve(NumRegisters);
878 Iter.PrecoloredUnhandledMask.reserve(NumRegisters);
880 while (!Unhandled.empty()) {
881 Iter.Cur = Unhandled.back();
882 Unhandled.pop_back();
883 dumpLiveRangeTrace("\nConsidering ", Iter.Cur);
884 assert(Target->getRegistersForVariable(Iter.Cur).any());
885 Iter.RegMask = RegMaskFull & Target->getRegistersForVariable(Iter.Cur);
886 Iter.RegMaskUnfiltered =
887 RegMaskFull & Target->getAllRegistersForVariable(Iter.Cur);
888 KillsRange.trim(Iter.Cur->getLiveRange().getStart());
890 // Check for pre-colored ranges. If Cur is pre-colored, it definitely gets
891 // that register. Previously processed live ranges would have avoided that
892 // register due to it being pre-colored. Future processed live ranges won't
893 // evict that register because the live range has infinite weight.
894 if (Iter.Cur->hasReg()) {
895 allocatePrecoloredRegister(Iter.Cur);
899 handleActiveRangeExpiredOrInactive(Iter.Cur);
900 handleInactiveRangeExpiredOrReactivated(Iter.Cur);
902 // Calculate available registers into Iter.Free[] and Iter.FreeUnfiltered[].
903 Iter.Free = Iter.RegMask;
904 Iter.FreeUnfiltered = Iter.RegMaskUnfiltered;
905 for (SizeT i = 0; i < Iter.RegMask.size(); ++i) {
906 if (RegUses[i] > 0) {
907 Iter.Free[i] = false;
908 Iter.FreeUnfiltered[i] = false;
912 findRegisterPreference(Iter);
913 filterFreeWithInactiveRanges(Iter);
915 // Disable AllowOverlap if an Active variable, which is not Prefer, shares
916 // Prefer's register, and has a definition within Cur's live range.
917 if (Iter.AllowOverlap) {
918 const llvm::SmallBitVector &Aliases = *RegAliases[Iter.PreferReg];
919 for (const Variable *Item : Active) {
920 int32_t RegNum = Item->getRegNumTmp();
921 if (Item != Iter.Prefer && Aliases[RegNum] &&
922 overlapsDefs(Func, Iter.Cur, Item)) {
923 Iter.AllowOverlap = false;
924 dumpDisableOverlap(Func, Item, "Active");
929 Iter.Weights.resize(Iter.RegMask.size());
930 std::fill(Iter.Weights.begin(), Iter.Weights.end(), RegWeight());
932 Iter.PrecoloredUnhandledMask.resize(Iter.RegMask.size());
933 Iter.PrecoloredUnhandledMask.reset();
935 filterFreeWithPrecoloredRanges(Iter);
937 // Remove scratch registers from the Iter.Free[] list, and mark their
938 // Iter.Weights[] as infinite, if KillsRange overlaps Cur's live range.
939 constexpr bool UseTrimmed = true;
940 if (Iter.Cur->getLiveRange().overlaps(KillsRange, UseTrimmed)) {
941 Iter.Free.reset(KillsMask);
942 Iter.FreeUnfiltered.reset(KillsMask);
943 for (int i = KillsMask.find_first(); i != -1;
944 i = KillsMask.find_next(i)) {
945 Iter.Weights[i].setWeight(RegWeight::Inf);
946 if (Iter.PreferReg == i)
947 Iter.AllowOverlap = false;
951 // Print info about physical register availability.
952 if (BuildDefs::dump() && Verbose) {
953 Ostream &Str = Ctx->getStrDump();
954 for (SizeT i = 0; i < Iter.RegMask.size(); ++i) {
955 if (Iter.RegMaskUnfiltered[i]) {
956 Str << Target->getRegName(i, Iter.Cur->getType())
957 << "(U=" << RegUses[i] << ",F=" << Iter.Free[i]
958 << ",P=" << Iter.PrecoloredUnhandledMask[i] << ") ";
964 if (Iter.Prefer && (Iter.AllowOverlap || Iter.Free[Iter.PreferReg])) {
965 // First choice: a preferred register that is either free or is allowed to
966 // overlap with its linked variable.
967 allocatePreferredRegister(Iter);
968 } else if (Iter.Free.any()) {
969 // Second choice: any free register.
970 constexpr bool Filtered = true;
971 allocateFreeRegister(Iter, Filtered);
973 // Fallback: there are no free registers, so we look for the lowest-weight
974 // register and see if Cur has higher weight.
975 handleNoFreeRegisters(Iter);
980 // Move anything Active or Inactive to Handled for easier handling.
981 Handled.insert(Handled.end(), Active.begin(), Active.end());
983 Handled.insert(Handled.end(), Inactive.begin(), Inactive.end());
987 assignFinalRegisters(RegMaskFull, PreDefinedRegisters, Randomized);
993 // ======================== Dump routines ======================== //
995 void LinearScan::dumpLiveRangeTrace(const char *Label, const Variable *Item) {
996 if (!BuildDefs::dump())
1000 Ostream &Str = Ctx->getStrDump();
1002 dumpLiveRange(Item, Func);
1007 void LinearScan::dump(Cfg *Func) const {
1008 if (!BuildDefs::dump())
1012 Ostream &Str = Func->getContext()->getStrDump();
1013 Func->resetCurrentNode();
1014 Str << "**** Current regalloc state:\n";
1015 Str << "++++++ Handled:\n";
1016 for (const Variable *Item : Handled) {
1017 dumpLiveRange(Item, Func);
1020 Str << "++++++ Unhandled:\n";
1021 for (const Variable *Item : reverse_range(Unhandled)) {
1022 dumpLiveRange(Item, Func);
1025 Str << "++++++ Active:\n";
1026 for (const Variable *Item : Active) {
1027 dumpLiveRange(Item, Func);
1030 Str << "++++++ Inactive:\n";
1031 for (const Variable *Item : Inactive) {
1032 dumpLiveRange(Item, Func);
1037 } // end of namespace Ice