1 //===- subzero/src/IceCfgNode.cpp - Basic block (node) implementation -----===//
3 // The Subzero Code Generator
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// This file implements the CfgNode class, including the complexities of
12 /// instruction insertion and in-edge calculation.
14 //===----------------------------------------------------------------------===//
16 #include "IceCfgNode.h"
18 #include "IceAssembler.h"
20 #include "IceGlobalInits.h"
22 #include "IceInstVarIter.h"
23 #include "IceLiveness.h"
24 #include "IceOperand.h"
25 #include "IceTargetLowering.h"
29 CfgNode::CfgNode(Cfg *Func, SizeT LabelNumber)
30 : Func(Func), Number(LabelNumber), LabelNumber(LabelNumber) {}
32 // Returns the name the node was created with. If no name was given, it
33 // synthesizes a (hopefully) unique name.
34 IceString CfgNode::getName() const {
36 return Func->getIdentifierName(NameIndex);
37 return "__" + std::to_string(LabelNumber);
40 // Adds an instruction to either the Phi list or the regular instruction list.
41 // Validates that all Phis are added before all regular instructions.
42 void CfgNode::appendInst(Inst *Inst) {
44 if (InstPhi *Phi = llvm::dyn_cast<InstPhi>(Inst)) {
46 Func->setError("Phi instruction added to the middle of a block");
51 Insts.push_back(Inst);
55 // Renumbers the non-deleted instructions in the node. This needs to be done in
56 // preparation for live range analysis. The instruction numbers in a block must
57 // be monotonically increasing. The range of instruction numbers in a block,
58 // from lowest to highest, must not overlap with the range of any other block.
59 void CfgNode::renumberInstructions() {
60 InstNumberT FirstNumber = Func->getNextInstNumber();
65 InstCountEstimate = Func->getNextInstNumber() - FirstNumber;
68 // When a node is created, the OutEdges are immediately known, but the InEdges
69 // have to be built up incrementally. After the CFG has been constructed, the
70 // computePredecessors() pass finalizes it by creating the InEdges list.
71 void CfgNode::computePredecessors() {
72 for (CfgNode *Succ : OutEdges)
73 Succ->InEdges.push_back(this);
76 void CfgNode::computeSuccessors() {
77 OutEdges = Insts.rbegin()->getTerminatorEdges();
80 // Validate each Phi instruction in the node with respect to control flow. For
81 // every phi argument, its label must appear in the predecessor list. For each
82 // predecessor, there must be a phi argument with that label. We don't check
83 // that phi arguments with the same label have the same value.
84 void CfgNode::validatePhis() {
85 for (Inst &Instr : Phis) {
86 auto *Phi = llvm::cast<InstPhi>(&Instr);
87 // We do a simple O(N^2) algorithm to check for consistency. Even so, it
88 // shows up as only about 0.2% of the total translation time. But if
89 // necessary, we could improve the complexity by using a hash table to
90 // count how many times each node is referenced in the Phi instruction, and
91 // how many times each node is referenced in the incoming edge list, and
92 // compare the two for equality.
93 for (SizeT i = 0; i < Phi->getSrcSize(); ++i) {
94 CfgNode *Label = Phi->getLabel(i);
96 for (CfgNode *InNode : getInEdges()) {
97 if (InNode == Label) {
103 llvm::report_fatal_error("Phi error: label for bad incoming edge");
105 for (CfgNode *InNode : getInEdges()) {
107 for (SizeT i = 0; i < Phi->getSrcSize(); ++i) {
108 CfgNode *Label = Phi->getLabel(i);
109 if (InNode == Label) {
115 llvm::report_fatal_error("Phi error: missing label for incoming edge");
120 // This does part 1 of Phi lowering, by creating a new dest variable for each
121 // Phi instruction, replacing the Phi instruction's dest with that variable,
122 // and adding an explicit assignment of the old dest to the new dest. For
126 // "a_phi=phi(...); a=a_phi".
128 // This is in preparation for part 2 which deletes the Phi instructions and
129 // appends assignment instructions to predecessor blocks. Note that this
130 // transformation preserves SSA form.
131 void CfgNode::placePhiLoads() {
132 for (Inst &I : Phis) {
133 auto Phi = llvm::dyn_cast<InstPhi>(&I);
134 Insts.insert(Insts.begin(), Phi->lower(Func));
138 // This does part 2 of Phi lowering. For each Phi instruction at each out-edge,
139 // create a corresponding assignment instruction, and add all the assignments
140 // near the end of this block. They need to be added before any branch
141 // instruction, and also if the block ends with a compare instruction followed
142 // by a branch instruction that we may want to fuse, it's better to insert the
143 // new assignments before the compare instruction. The
144 // tryOptimizedCmpxchgCmpBr() method assumes this ordering of instructions.
146 // Note that this transformation takes the Phi dest variables out of SSA form,
147 // as there may be assignments to the dest variable in multiple blocks.
148 void CfgNode::placePhiStores() {
149 // Find the insertion point.
150 InstList::iterator InsertionPoint = Insts.end();
151 // Every block must end in a terminator instruction, and therefore must have
152 // at least one instruction, so it's valid to decrement InsertionPoint (but
153 // assert just in case).
154 assert(InsertionPoint != Insts.begin());
156 // Confirm that InsertionPoint is a terminator instruction. Calling
157 // getTerminatorEdges() on a non-terminator instruction will cause an
158 // llvm_unreachable().
159 (void)InsertionPoint->getTerminatorEdges();
160 // SafeInsertionPoint is always immediately before the terminator
161 // instruction. If the block ends in a compare and conditional branch, it's
162 // better to place the Phi store before the compare so as not to interfere
163 // with compare/branch fusing. However, if the compare instruction's dest
164 // operand is the same as the new assignment statement's source operand, this
165 // can't be done due to data dependences, so we need to fall back to the
166 // SafeInsertionPoint. To illustrate:
168 // %97 = load i8* %96, align 1
169 // %98 = icmp ne i8 %97, 0
170 // br i1 %98, label %99, label %2132
172 // %100 = phi i8 [ %97, %95 ], [ %110, %108 ]
173 // %101 = phi i1 [ %98, %95 ], [ %111, %108 ]
174 // would be Phi-lowered as:
176 // %97 = load i8* %96, align 1
177 // %100_phi = %97 ; can be at InsertionPoint
178 // %98 = icmp ne i8 %97, 0
179 // %101_phi = %98 ; must be at SafeInsertionPoint
180 // br i1 %98, label %99, label %2132
185 // TODO(stichnot): It may be possible to bypass this whole SafeInsertionPoint
186 // mechanism. If a source basic block ends in a conditional branch:
189 // br i1 %foo, label %labelTrue, label %labelFalse
190 // and a branch target has a Phi involving the branch operand:
192 // %bar = phi i1 [ %foo, %labelSource ], ...
193 // then we actually know the constant i1 value of the Phi operand:
195 // %bar = phi i1 [ true, %labelSource ], ...
196 // It seems that this optimization should be done by clang or opt, but we
197 // could also do it here.
198 InstList::iterator SafeInsertionPoint = InsertionPoint;
199 // Keep track of the dest variable of a compare instruction, so that we
200 // insert the new instruction at the SafeInsertionPoint if the compare's dest
201 // matches the Phi-lowered assignment's source.
202 Variable *CmpInstDest = nullptr;
203 // If the current insertion point is at a conditional branch instruction, and
204 // the previous instruction is a compare instruction, then we move the
205 // insertion point before the compare instruction so as not to interfere with
206 // compare/branch fusing.
207 if (InstBr *Branch = llvm::dyn_cast<InstBr>(InsertionPoint)) {
208 if (!Branch->isUnconditional()) {
209 if (InsertionPoint != Insts.begin()) {
211 if (llvm::isa<InstIcmp>(InsertionPoint) ||
212 llvm::isa<InstFcmp>(InsertionPoint)) {
213 CmpInstDest = InsertionPoint->getDest();
221 // Consider every out-edge.
222 for (CfgNode *Succ : OutEdges) {
223 // Consider every Phi instruction at the out-edge.
224 for (Inst &I : Succ->Phis) {
225 auto Phi = llvm::dyn_cast<InstPhi>(&I);
226 Operand *Operand = Phi->getOperandForTarget(this);
228 Variable *Dest = I.getDest();
230 InstAssign *NewInst = InstAssign::create(Func, Dest, Operand);
231 if (CmpInstDest == Operand)
232 Insts.insert(SafeInsertionPoint, NewInst);
234 Insts.insert(InsertionPoint, NewInst);
239 // Deletes the phi instructions after the loads and stores are placed.
240 void CfgNode::deletePhis() {
245 // Splits the edge from Pred to this node by creating a new node and hooking up
246 // the in and out edges appropriately. (The EdgeIndex parameter is only used to
247 // make the new node's name unique when there are multiple edges between the
248 // same pair of nodes.) The new node's instruction list is initialized to the
249 // empty list, with no terminator instruction. There must not be multiple edges
250 // from Pred to this node so all Inst::getTerminatorEdges implementations must
251 // not contain duplicates.
252 CfgNode *CfgNode::splitIncomingEdge(CfgNode *Pred, SizeT EdgeIndex) {
253 CfgNode *NewNode = Func->makeNode();
254 // Depth is the minimum as it works if both are the same, but if one is
255 // outside the loop and the other is inside, the new node should be placed
256 // outside and not be executed multiple times within the loop.
257 NewNode->setLoopNestDepth(
258 std::min(getLoopNestDepth(), Pred->getLoopNestDepth()));
259 if (BuildDefs::dump())
260 NewNode->setName("split_" + Pred->getName() + "_" + getName() + "_" +
261 std::to_string(EdgeIndex));
262 // The new node is added to the end of the node list, and will later need to
263 // be sorted into a reasonable topological order.
264 NewNode->setNeedsPlacement(true);
265 // Repoint Pred's out-edge.
267 for (CfgNode *&I : Pred->OutEdges) {
270 NewNode->InEdges.push_back(Pred);
277 // Repoint this node's in-edge.
279 for (CfgNode *&I : InEdges) {
282 NewNode->OutEdges.push_back(this);
289 // Repoint all suitable branch instructions' target and return.
291 for (Inst &I : Pred->getInsts())
292 if (!I.isDeleted() && I.repointEdges(this, NewNode))
301 // Helper function used by advancedPhiLowering().
302 bool sameVarOrReg(TargetLowering *Target, const Variable *Var1,
303 const Operand *Opnd) {
306 const auto Var2 = llvm::dyn_cast<Variable>(Opnd);
310 // If either operand lacks a register, they cannot be the same.
316 int32_t RegNum1 = Var1->getRegNum();
317 int32_t RegNum2 = Var2->getRegNum();
318 // Quick common-case check.
319 if (RegNum1 == RegNum2)
322 assert(Target->getAliasesForRegister(RegNum1)[RegNum2] ==
323 Target->getAliasesForRegister(RegNum2)[RegNum1]);
324 return Target->getAliasesForRegister(RegNum1)[RegNum2];
327 } // end of anonymous namespace
329 // This the "advanced" version of Phi lowering for a basic block, in contrast
330 // to the simple version that lowers through assignments involving temporaries.
332 // All Phi instructions in a basic block are conceptually executed in parallel.
333 // However, if we lower Phis early and commit to a sequential ordering, we may
334 // end up creating unnecessary interferences which lead to worse register
335 // allocation. Delaying Phi scheduling until after register allocation can help
336 // unless there are no free registers for shuffling registers or stack slots
337 // and spilling becomes necessary.
339 // The advanced Phi lowering starts by finding a topological sort of the Phi
340 // instructions, where "A=B" comes before "B=C" due to the anti-dependence on
341 // B. Preexisting register assignments are considered in the topological sort.
342 // If a topological sort is not possible due to a cycle, the cycle is broken by
343 // introducing a non-parallel temporary. For example, a cycle arising from a
344 // permutation like "A=B;B=C;C=A" can become "T=A;A=B;B=C;C=T". All else being
345 // equal, prefer to schedule assignments with register-allocated Src operands
346 // earlier, in case that register becomes free afterwards, and prefer to
347 // schedule assignments with register-allocated Dest variables later, to keep
348 // that register free for longer.
350 // Once the ordering is determined, the Cfg edge is split and the assignment
351 // list is lowered by the target lowering layer. Since the assignment lowering
352 // may create new infinite-weight temporaries, a follow-on register allocation
353 // pass will be needed. To prepare for this, liveness (including live range
354 // calculation) of the split nodes needs to be calculated, and liveness of the
355 // original node need to be updated to "undo" the effects of the phi
358 // The specific placement of the new node within the Cfg node list is deferred
359 // until later, including after empty node contraction.
361 // After phi assignments are lowered across all blocks, another register
362 // allocation pass is run, focusing only on pre-colored and infinite-weight
363 // variables, similar to Om1 register allocation (except without the need to
364 // specially compute these variables' live ranges, since they have already been
365 // precisely calculated). The register allocator in this mode needs the ability
366 // to forcibly spill and reload registers in case none are naturally available.
367 void CfgNode::advancedPhiLowering() {
368 if (getPhis().empty())
371 // Count the number of non-deleted Phi instructions.
377 size_t NumPred; // number of entries whose Src is this Dest
378 int32_t Weight; // preference for topological order
380 llvm::SmallVector<PhiDesc, 32> Desc(getPhis().size());
383 for (Inst &I : Phis) {
384 auto *Inst = llvm::dyn_cast<InstPhi>(&I);
385 if (!Inst->isDeleted()) {
386 Variable *Dest = Inst->getDest();
387 Desc[NumPhis].Phi = Inst;
388 Desc[NumPhis].Dest = Dest;
390 // Undo the effect of the phi instruction on this node's live-in set by
391 // marking the phi dest variable as live on entry.
392 SizeT VarNum = Func->getLiveness()->getLiveIndex(Dest->getIndex());
393 assert(!Func->getLiveness()->getLiveIn(this)[VarNum]);
394 Func->getLiveness()->getLiveIn(this)[VarNum] = true;
401 TargetLowering *Target = Func->getTarget();
402 SizeT InEdgeIndex = 0;
403 for (CfgNode *Pred : InEdges) {
404 CfgNode *Split = splitIncomingEdge(Pred, InEdgeIndex++);
405 SizeT Remaining = NumPhis;
407 // First pass computes Src and initializes NumPred.
408 for (size_t I = 0; I < NumPhis; ++I) {
409 Variable *Dest = Desc[I].Dest;
410 Operand *Src = Desc[I].Phi->getOperandForTarget(Pred);
412 Desc[I].Processed = false;
414 // Cherry-pick any trivial assignments, so that they don't contribute to
415 // the running complexity of the topological sort.
416 if (sameVarOrReg(Target, Dest, Src)) {
417 Desc[I].Processed = true;
420 // If Dest and Src are syntactically the same, don't bother adding
421 // the assignment, because in all respects it would be redundant, and
422 // if Dest/Src are on the stack, the target lowering may naively
423 // decide to lower it using a temporary register.
424 Split->appendInst(InstAssign::create(Func, Dest, Src));
427 // Second pass computes NumPred by comparing every pair of Phi
429 for (size_t I = 0; I < NumPhis; ++I) {
430 if (Desc[I].Processed)
432 const Variable *Dest = Desc[I].Dest;
433 for (size_t J = 0; J < NumPhis; ++J) {
434 if (Desc[J].Processed)
437 // There shouldn't be two Phis with the same Dest variable or
439 assert(!sameVarOrReg(Target, Dest, Desc[J].Dest));
441 const Operand *Src = Desc[J].Src;
442 if (sameVarOrReg(Target, Dest, Src))
447 // Another pass to compute initial Weight values.
449 // Always pick NumPred=0 over NumPred>0.
450 constexpr int32_t WeightNoPreds = 4;
451 // Prefer Src as a register because the register might free up.
452 constexpr int32_t WeightSrcIsReg = 2;
453 // Prefer Dest not as a register because the register stays free longer.
454 constexpr int32_t WeightDestNotReg = 1;
456 for (size_t I = 0; I < NumPhis; ++I) {
457 if (Desc[I].Processed)
460 if (Desc[I].NumPred == 0)
461 Weight += WeightNoPreds;
462 if (auto Var = llvm::dyn_cast<Variable>(Desc[I].Src))
464 Weight += WeightSrcIsReg;
465 if (!Desc[I].Dest->hasReg())
466 Weight += WeightDestNotReg;
467 Desc[I].Weight = Weight;
470 // Repeatedly choose and process the best candidate in the topological
471 // sort, until no candidates remain. This implementation is O(N^2) where N
472 // is the number of Phi instructions, but with a small constant factor
473 // compared to a likely implementation of O(N) topological sort.
474 for (; Remaining; --Remaining) {
475 size_t BestIndex = 0;
476 int32_t BestWeight = -1;
477 // Find the best candidate.
478 for (size_t I = 0; I < NumPhis; ++I) {
479 if (Desc[I].Processed)
482 Weight = Desc[I].Weight;
483 if (Weight > BestWeight) {
488 assert(BestWeight >= 0);
489 assert(Desc[BestIndex].NumPred <= 1);
490 Variable *Dest = Desc[BestIndex].Dest;
491 Operand *Src = Desc[BestIndex].Src;
492 assert(!sameVarOrReg(Target, Dest, Src));
493 // Break a cycle by introducing a temporary.
494 if (Desc[BestIndex].NumPred) {
496 // If the target instruction "A=B" is part of a cycle, find the "X=A"
497 // assignment in the cycle because it will have to be rewritten as
499 for (size_t J = 0; !Found && J < NumPhis; ++J) {
500 if (Desc[J].Processed)
502 Operand *OtherSrc = Desc[J].Src;
503 if (Desc[J].NumPred && sameVarOrReg(Target, Dest, OtherSrc)) {
504 SizeT VarNum = Func->getNumVariables();
505 Variable *Tmp = Func->makeVariable(OtherSrc->getType());
506 if (BuildDefs::dump())
507 Tmp->setName(Func, "__split_" + std::to_string(VarNum));
508 Split->appendInst(InstAssign::create(Func, Tmp, OtherSrc));
515 // Now that a cycle (if any) has been broken, create the actual
517 Split->appendInst(InstAssign::create(Func, Dest, Src));
518 // Update NumPred for all Phi assignments using this Phi's Src as their
519 // Dest variable. Also update Weight if NumPred dropped from 1 to 0.
520 if (auto Var = llvm::dyn_cast<Variable>(Src)) {
521 for (size_t I = 0; I < NumPhis; ++I) {
522 if (Desc[I].Processed)
524 if (sameVarOrReg(Target, Var, Desc[I].Dest)) {
525 if (--Desc[I].NumPred == 0)
526 Desc[I].Weight += WeightNoPreds;
530 Desc[BestIndex].Processed = true;
532 Split->appendInst(InstBr::create(Func, this));
535 Func->getVMetadata()->addNode(Split);
539 // Does address mode optimization. Pass each instruction to the TargetLowering
540 // object. If it returns a new instruction (representing the optimized address
541 // mode), then insert the new instruction and delete the old.
542 void CfgNode::doAddressOpt() {
543 TargetLowering *Target = Func->getTarget();
544 LoweringContext &Context = Target->getContext();
546 while (!Context.atEnd()) {
547 Target->doAddressOpt();
551 void CfgNode::doNopInsertion(RandomNumberGenerator &RNG) {
552 TargetLowering *Target = Func->getTarget();
553 LoweringContext &Context = Target->getContext();
555 Context.setInsertPoint(Context.getCur());
556 // Do not insert nop in bundle locked instructions.
557 bool PauseNopInsertion = false;
558 while (!Context.atEnd()) {
559 if (llvm::isa<InstBundleLock>(Context.getCur())) {
560 PauseNopInsertion = true;
561 } else if (llvm::isa<InstBundleUnlock>(Context.getCur())) {
562 PauseNopInsertion = false;
564 if (!PauseNopInsertion)
565 Target->doNopInsertion(RNG);
566 // Ensure Cur=Next, so that the nops are inserted before the current
567 // instruction rather than after.
568 Context.advanceCur();
569 Context.advanceNext();
573 // Drives the target lowering. Passes the current instruction and the next
574 // non-deleted instruction for target lowering.
575 void CfgNode::genCode() {
576 TargetLowering *Target = Func->getTarget();
577 LoweringContext &Context = Target->getContext();
578 // Lower the regular instructions.
580 Target->initNodeForLowering(this);
581 while (!Context.atEnd()) {
582 InstList::iterator Orig = Context.getCur();
583 if (llvm::isa<InstRet>(*Orig))
586 // Ensure target lowering actually moved the cursor.
587 assert(Context.getCur() != Orig);
589 Context.availabilityReset();
590 // Do preliminary lowering of the Phi instructions.
591 Target->prelowerPhis();
594 void CfgNode::livenessLightweight() {
595 SizeT NumVars = Func->getNumVariables();
596 LivenessBV Live(NumVars);
597 // Process regular instructions in reverse order.
598 for (Inst &I : reverse_range(Insts)) {
601 I.livenessLightweight(Func, Live);
603 for (Inst &I : Phis) {
606 I.livenessLightweight(Func, Live);
610 // Performs liveness analysis on the block. Returns true if the incoming
611 // liveness changed from before, false if it stayed the same. (If it changes,
612 // the node's predecessors need to be processed again.)
613 bool CfgNode::liveness(Liveness *Liveness) {
614 SizeT NumVars = Liveness->getNumVarsInNode(this);
615 LivenessBV Live(NumVars);
616 LiveBeginEndMap *LiveBegin = nullptr;
617 LiveBeginEndMap *LiveEnd = nullptr;
618 // Mark the beginning and ending of each variable's live range with the
619 // sentinel instruction number 0.
620 if (Liveness->getMode() == Liveness_Intervals) {
621 LiveBegin = Liveness->getLiveBegin(this);
622 LiveEnd = Liveness->getLiveEnd(this);
625 // Guess that the number of live ranges beginning is roughly the number of
626 // instructions, and same for live ranges ending.
627 LiveBegin->reserve(getInstCountEstimate());
628 LiveEnd->reserve(getInstCountEstimate());
630 // Initialize Live to be the union of all successors' LiveIn.
631 for (CfgNode *Succ : OutEdges) {
632 Live |= Liveness->getLiveIn(Succ);
633 // Mark corresponding argument of phis in successor as live.
634 for (Inst &I : Succ->Phis) {
637 auto Phi = llvm::dyn_cast<InstPhi>(&I);
638 Phi->livenessPhiOperand(Live, this, Liveness);
641 Liveness->getLiveOut(this) = Live;
643 // Process regular instructions in reverse order.
644 for (Inst &I : reverse_range(Insts)) {
647 I.liveness(I.getNumber(), Live, Liveness, LiveBegin, LiveEnd);
649 // Process phis in forward order so that we can override the instruction
650 // number to be that of the earliest phi instruction in the block.
651 SizeT NumNonDeadPhis = 0;
652 InstNumberT FirstPhiNumber = Inst::NumberSentinel;
653 for (Inst &I : Phis) {
656 if (FirstPhiNumber == Inst::NumberSentinel)
657 FirstPhiNumber = I.getNumber();
658 if (I.liveness(FirstPhiNumber, Live, Liveness, LiveBegin, LiveEnd))
662 // When using the sparse representation, after traversing the instructions in
663 // the block, the Live bitvector should only contain set bits for global
664 // variables upon block entry. We validate this by shrinking the Live vector
665 // and then testing it against the pre-shrunk version. (The shrinking is
666 // required, but the validation is not.)
667 LivenessBV LiveOrig = Live;
668 Live.resize(Liveness->getNumGlobalVars());
669 if (Live != LiveOrig) {
670 if (BuildDefs::dump()) {
671 // This is a fatal liveness consistency error. Print some diagnostics and
673 Ostream &Str = Func->getContext()->getStrDump();
674 Func->resetCurrentNode();
675 Str << "LiveOrig-Live =";
676 for (SizeT i = Live.size(); i < LiveOrig.size(); ++i) {
677 if (LiveOrig.test(i)) {
679 Liveness->getVariable(i, this)->dump(Func);
684 llvm::report_fatal_error("Fatal inconsistency in liveness analysis");
687 bool Changed = false;
688 LivenessBV &LiveIn = Liveness->getLiveIn(this);
689 // Add in current LiveIn
691 // Check result, set LiveIn=Live
692 SizeT &PrevNumNonDeadPhis = Liveness->getNumNonDeadPhis(this);
693 bool LiveInChanged = (Live != LiveIn);
694 Changed = (NumNonDeadPhis != PrevNumNonDeadPhis || LiveInChanged);
697 PrevNumNonDeadPhis = NumNonDeadPhis;
701 // Validate the integrity of the live ranges in this block. If there are any
702 // errors, it prints details and returns false. On success, it returns true.
703 bool CfgNode::livenessValidateIntervals(Liveness *Liveness) const {
704 if (!BuildDefs::asserts())
707 // Verify there are no duplicates.
709 [](const LiveBeginEndMapEntry &A, const LiveBeginEndMapEntry &B) {
710 return A.first == B.first;
712 LiveBeginEndMap &MapBegin = *Liveness->getLiveBegin(this);
713 LiveBeginEndMap &MapEnd = *Liveness->getLiveEnd(this);
714 if (std::adjacent_find(MapBegin.begin(), MapBegin.end(), ComparePair) ==
716 std::adjacent_find(MapEnd.begin(), MapEnd.end(), ComparePair) ==
720 // There is definitely a liveness error. All paths from here return false.
721 if (!BuildDefs::dump())
724 // Print all the errors.
725 if (BuildDefs::dump()) {
726 GlobalContext *Ctx = Func->getContext();
727 OstreamLocker L(Ctx);
728 Ostream &Str = Ctx->getStrDump();
729 if (Func->isVerbose()) {
730 Str << "Live range errors in the following block:\n";
733 for (auto Start = MapBegin.begin();
734 (Start = std::adjacent_find(Start, MapBegin.end(), ComparePair)) !=
737 auto Next = Start + 1;
738 Str << "Duplicate LR begin, block " << getName() << ", instructions "
739 << Start->second << " & " << Next->second << ", variable "
740 << Liveness->getVariable(Start->first, this)->getName(Func) << "\n";
742 for (auto Start = MapEnd.begin();
743 (Start = std::adjacent_find(Start, MapEnd.end(), ComparePair)) !=
746 auto Next = Start + 1;
747 Str << "Duplicate LR end, block " << getName() << ", instructions "
748 << Start->second << " & " << Next->second << ", variable "
749 << Liveness->getVariable(Start->first, this)->getName(Func) << "\n";
756 // Once basic liveness is complete, compute actual live ranges. It is assumed
757 // that within a single basic block, a live range begins at most once and ends
758 // at most once. This is certainly true for pure SSA form. It is also true once
759 // phis are lowered, since each assignment to the phi-based temporary is in a
760 // different basic block, and there is a single read that ends the live in the
761 // basic block that contained the actual phi instruction.
762 void CfgNode::livenessAddIntervals(Liveness *Liveness, InstNumberT FirstInstNum,
763 InstNumberT LastInstNum) {
764 TimerMarker T1(TimerStack::TT_liveRange, Func);
766 SizeT NumVars = Liveness->getNumVarsInNode(this);
767 LivenessBV &LiveIn = Liveness->getLiveIn(this);
768 LivenessBV &LiveOut = Liveness->getLiveOut(this);
769 LiveBeginEndMap &MapBegin = *Liveness->getLiveBegin(this);
770 LiveBeginEndMap &MapEnd = *Liveness->getLiveEnd(this);
771 std::sort(MapBegin.begin(), MapBegin.end());
772 std::sort(MapEnd.begin(), MapEnd.end());
774 if (!livenessValidateIntervals(Liveness)) {
775 llvm::report_fatal_error("livenessAddIntervals: Liveness error");
779 LivenessBV LiveInAndOut = LiveIn;
780 LiveInAndOut &= LiveOut;
782 // Iterate in parallel across the sorted MapBegin[] and MapEnd[].
783 auto IBB = MapBegin.begin(), IEB = MapEnd.begin();
784 auto IBE = MapBegin.end(), IEE = MapEnd.end();
785 while (IBB != IBE || IEB != IEE) {
786 SizeT i1 = IBB == IBE ? NumVars : IBB->first;
787 SizeT i2 = IEB == IEE ? NumVars : IEB->first;
788 SizeT i = std::min(i1, i2);
789 // i1 is the Variable number of the next MapBegin entry, and i2 is the
790 // Variable number of the next MapEnd entry. If i1==i2, then the Variable's
791 // live range begins and ends in this block. If i1<i2, then i1's live range
792 // begins at instruction IBB->second and extends through the end of the
793 // block. If i1>i2, then i2's live range begins at the first instruction of
794 // the block and ends at IEB->second. In any case, we choose the lesser of
795 // i1 and i2 and proceed accordingly.
796 InstNumberT LB = i == i1 ? IBB->second : FirstInstNum;
797 InstNumberT LE = i == i2 ? IEB->second : LastInstNum + 1;
799 Variable *Var = Liveness->getVariable(i, this);
801 Var->addLiveRange(FirstInstNum, LE);
802 Var->addLiveRange(LB, LastInstNum + 1);
803 // Assert that Var is a global variable by checking that its liveness
804 // index is less than the number of globals. This ensures that the
805 // LiveInAndOut[] access is valid.
806 assert(i < Liveness->getNumGlobalVars());
807 LiveInAndOut[i] = false;
809 Var->addLiveRange(LB, LE);
816 // Process the variables that are live across the entire block.
817 for (int i = LiveInAndOut.find_first(); i != -1;
818 i = LiveInAndOut.find_next(i)) {
819 Variable *Var = Liveness->getVariable(i, this);
820 if (Liveness->getRangeMask(Var->getIndex()))
821 Var->addLiveRange(FirstInstNum, LastInstNum + 1);
825 // If this node contains only deleted instructions, and ends in an
826 // unconditional branch, contract the node by repointing all its in-edges to
828 void CfgNode::contractIfEmpty() {
831 Inst *Branch = nullptr;
832 for (Inst &I : Insts) {
835 if (I.isUnconditionalBranch())
837 else if (!I.isRedundantAssign())
840 assert(OutEdges.size() == 1);
841 // Don't try to delete a self-loop.
842 if (OutEdges[0] == this)
845 Branch->setDeleted();
846 CfgNode *Successor = OutEdges.front();
847 // Repoint all this node's in-edges to this node's successor, unless this
848 // node's successor is actually itself (in which case the statement
849 // "OutEdges.front()->InEdges.push_back(Pred)" could invalidate the iterator
850 // over this->InEdges).
851 if (Successor != this) {
852 for (CfgNode *Pred : InEdges) {
853 for (CfgNode *&I : Pred->OutEdges) {
856 Successor->InEdges.push_back(Pred);
859 for (Inst &I : Pred->getInsts()) {
861 I.repointEdges(this, Successor);
865 // Remove the in-edge to the successor to allow node reordering to make
866 // better decisions. For example it's more helpful to place a node after a
867 // reachable predecessor than an unreachable one (like the one we just
869 Successor->InEdges.erase(
870 std::find(Successor->InEdges.begin(), Successor->InEdges.end(), this));
875 void CfgNode::doBranchOpt(const CfgNode *NextNode) {
876 TargetLowering *Target = Func->getTarget();
877 // Find the first opportunity for branch optimization (which will be the last
878 // instruction in the block) and stop. This is sufficient unless there is
879 // some target lowering where we have the possibility of multiple
880 // optimizations per block. Take care with switch lowering as there are
881 // multiple unconditional branches and only the last can be deleted.
882 for (Inst &I : reverse_range(Insts)) {
883 if (!I.isDeleted()) {
884 Target->doBranchOpt(&I, NextNode);
890 // ======================== Dump routines ======================== //
894 // Helper functions for emit().
896 void emitRegisterUsage(Ostream &Str, const Cfg *Func, const CfgNode *Node,
897 bool IsLiveIn, CfgVector<SizeT> &LiveRegCount) {
898 if (!BuildDefs::dump())
900 Liveness *Liveness = Func->getLiveness();
901 const LivenessBV *Live;
902 const int32_t StackReg = Func->getTarget()->getStackReg();
903 const int32_t FrameOrStackReg = Func->getTarget()->getFrameOrStackReg();
905 Live = &Liveness->getLiveIn(Node);
906 Str << "\t\t\t\t# LiveIn=";
908 Live = &Liveness->getLiveOut(Node);
909 Str << "\t\t\t\t# LiveOut=";
911 if (!Live->empty()) {
912 CfgVector<Variable *> LiveRegs;
913 for (SizeT i = 0; i < Live->size(); ++i) {
916 Variable *Var = Liveness->getVariable(i, Node);
919 const int32_t RegNum = Var->getRegNum();
920 if (RegNum == StackReg || RegNum == FrameOrStackReg)
923 ++LiveRegCount[RegNum];
924 LiveRegs.push_back(Var);
926 // Sort the variables by regnum so they are always printed in a familiar
928 std::sort(LiveRegs.begin(), LiveRegs.end(),
929 [](const Variable *V1, const Variable *V2) {
930 return V1->getRegNum() < V2->getRegNum();
933 for (Variable *Var : LiveRegs) {
943 void emitLiveRangesEnded(Ostream &Str, const Cfg *Func, const Inst *Instr,
944 CfgVector<SizeT> &LiveRegCount) {
945 if (!BuildDefs::dump())
948 Variable *Dest = Instr->getDest();
949 // Normally we increment the live count for the dest register. But we
950 // shouldn't if the instruction's IsDestRedefined flag is set, because this
951 // means that the target lowering created this instruction as a non-SSA
952 // assignment; i.e., a different, previous instruction started the dest
953 // variable's live range.
954 if (!Instr->isDestRedefined() && Dest && Dest->hasReg())
955 ++LiveRegCount[Dest->getRegNum()];
956 FOREACH_VAR_IN_INST(Var, *Instr) {
957 bool ShouldReport = Instr->isLastUse(Var);
958 if (ShouldReport && Var->hasReg()) {
959 // Don't report end of live range until the live count reaches 0.
960 SizeT NewCount = --LiveRegCount[Var->getRegNum()];
962 ShouldReport = false;
975 void updateStats(Cfg *Func, const Inst *I) {
976 if (!BuildDefs::dump())
978 // Update emitted instruction count, plus fill/spill count for Variable
979 // operands without a physical register.
980 if (uint32_t Count = I->getEmitInstCount()) {
981 Func->getContext()->statsUpdateEmitted(Count);
982 if (Variable *Dest = I->getDest()) {
984 Func->getContext()->statsUpdateFills();
986 for (SizeT S = 0; S < I->getSrcSize(); ++S) {
987 if (Variable *Src = llvm::dyn_cast<Variable>(I->getSrc(S))) {
989 Func->getContext()->statsUpdateSpills();
995 } // end of anonymous namespace
997 void CfgNode::emit(Cfg *Func) const {
998 if (!BuildDefs::dump())
1000 Func->setCurrentNode(this);
1001 Ostream &Str = Func->getContext()->getStrEmit();
1002 Liveness *Liveness = Func->getLiveness();
1003 const bool DecorateAsm =
1004 Liveness && Func->getContext()->getFlags().getDecorateAsm();
1005 Str << getAsmName() << ":\n";
1006 // LiveRegCount keeps track of the number of currently live variables that
1007 // each register is assigned to. Normally that would be only 0 or 1, but the
1008 // register allocator's AllowOverlap inference allows it to be greater than 1
1009 // for short periods.
1010 CfgVector<SizeT> LiveRegCount(Func->getTarget()->getNumRegisters());
1012 constexpr bool IsLiveIn = true;
1013 emitRegisterUsage(Str, Func, this, IsLiveIn, LiveRegCount);
1014 if (getInEdges().size()) {
1015 Str << "\t\t\t\t# preds=";
1017 for (CfgNode *I : getInEdges()) {
1021 Str << "$" << I->getName();
1025 if (getLoopNestDepth()) {
1026 Str << "\t\t\t\t# loop depth=" << getLoopNestDepth() << "\n";
1030 for (const Inst &I : Phis) {
1033 // Emitting a Phi instruction should cause an error.
1036 for (const Inst &I : Insts) {
1039 if (I.isRedundantAssign()) {
1040 // Usually, redundant assignments end the live range of the src variable
1041 // and begin the live range of the dest variable, with no net effect on
1042 // the liveness of their register. However, if the register allocator
1043 // infers the AllowOverlap condition, then this may be a redundant
1044 // assignment that does not end the src variable's live range, in which
1045 // case the active variable count for that register needs to be bumped.
1046 // That normally would have happened as part of emitLiveRangesEnded(),
1047 // but that isn't called for redundant assignments.
1048 Variable *Dest = I.getDest();
1049 if (DecorateAsm && Dest->hasReg()) {
1050 ++LiveRegCount[Dest->getRegNum()];
1051 if (I.isLastUse(I.getSrc(0)))
1052 --LiveRegCount[llvm::cast<Variable>(I.getSrc(0))->getRegNum()];
1058 emitLiveRangesEnded(Str, Func, &I, LiveRegCount);
1060 updateStats(Func, &I);
1063 constexpr bool IsLiveIn = false;
1064 emitRegisterUsage(Str, Func, this, IsLiveIn, LiveRegCount);
1068 // Helper class for emitIAS().
1070 class BundleEmitHelper {
1071 BundleEmitHelper() = delete;
1072 BundleEmitHelper(const BundleEmitHelper &) = delete;
1073 BundleEmitHelper &operator=(const BundleEmitHelper &) = delete;
1076 BundleEmitHelper(Assembler *Asm, TargetLowering *Target,
1077 const InstList &Insts)
1078 : Asm(Asm), Target(Target), End(Insts.end()), BundleLockStart(End),
1079 BundleSize(1 << Asm->getBundleAlignLog2Bytes()),
1080 BundleMaskLo(BundleSize - 1), BundleMaskHi(~BundleMaskLo) {}
1081 // Check whether we're currently within a bundle_lock region.
1082 bool isInBundleLockRegion() const { return BundleLockStart != End; }
1083 // Check whether the current bundle_lock region has the align_to_end option.
1084 bool isAlignToEnd() const {
1085 assert(isInBundleLockRegion());
1086 return llvm::cast<InstBundleLock>(getBundleLockStart())->getOption() ==
1087 InstBundleLock::Opt_AlignToEnd;
1089 // Check whether the entire bundle_lock region falls within the same bundle.
1090 bool isSameBundle() const {
1091 assert(isInBundleLockRegion());
1092 return SizeSnapshotPre == SizeSnapshotPost ||
1093 (SizeSnapshotPre & BundleMaskHi) ==
1094 ((SizeSnapshotPost - 1) & BundleMaskHi);
1096 // Get the bundle alignment of the first instruction of the bundle_lock
1098 intptr_t getPreAlignment() const {
1099 assert(isInBundleLockRegion());
1100 return SizeSnapshotPre & BundleMaskLo;
1102 // Get the bundle alignment of the first instruction past the bundle_lock
1104 intptr_t getPostAlignment() const {
1105 assert(isInBundleLockRegion());
1106 return SizeSnapshotPost & BundleMaskLo;
1108 // Get the iterator pointing to the bundle_lock instruction, e.g. to roll
1109 // back the instruction iteration to that point.
1110 InstList::const_iterator getBundleLockStart() const {
1111 assert(isInBundleLockRegion());
1112 return BundleLockStart;
1114 // Set up bookkeeping when the bundle_lock instruction is first processed.
1115 void enterBundleLock(InstList::const_iterator I) {
1116 assert(!isInBundleLockRegion());
1117 BundleLockStart = I;
1118 SizeSnapshotPre = Asm->getBufferSize();
1119 Asm->setPreliminary(true);
1120 Target->snapshotEmitState();
1121 assert(isInBundleLockRegion());
1123 // Update bookkeeping when the bundle_unlock instruction is processed.
1124 void enterBundleUnlock() {
1125 assert(isInBundleLockRegion());
1126 SizeSnapshotPost = Asm->getBufferSize();
1128 // Update bookkeeping when we are completely finished with the bundle_lock
1130 void leaveBundleLockRegion() { BundleLockStart = End; }
1131 // Check whether the instruction sequence fits within the current bundle, and
1132 // if not, add nop padding to the end of the current bundle.
1133 void padToNextBundle() {
1134 assert(isInBundleLockRegion());
1135 if (!isSameBundle()) {
1136 intptr_t PadToNextBundle = BundleSize - getPreAlignment();
1137 Asm->padWithNop(PadToNextBundle);
1138 SizeSnapshotPre += PadToNextBundle;
1139 SizeSnapshotPost += PadToNextBundle;
1140 assert((Asm->getBufferSize() & BundleMaskLo) == 0);
1141 assert(Asm->getBufferSize() == SizeSnapshotPre);
1144 // If align_to_end is specified, add padding such that the instruction
1145 // sequences ends precisely at a bundle boundary.
1146 void padForAlignToEnd() {
1147 assert(isInBundleLockRegion());
1148 if (isAlignToEnd()) {
1149 if (intptr_t Offset = getPostAlignment()) {
1150 Asm->padWithNop(BundleSize - Offset);
1151 SizeSnapshotPre = Asm->getBufferSize();
1155 // Update bookkeeping when rolling back for the second pass.
1157 assert(isInBundleLockRegion());
1158 Asm->setBufferSize(SizeSnapshotPre);
1159 Asm->setPreliminary(false);
1160 Target->rollbackEmitState();
1164 Assembler *const Asm;
1165 TargetLowering *const Target;
1166 // End is a sentinel value such that BundleLockStart==End implies that we are
1167 // not in a bundle_lock region.
1168 const InstList::const_iterator End;
1169 InstList::const_iterator BundleLockStart;
1170 const intptr_t BundleSize;
1171 // Masking with BundleMaskLo identifies an address's bundle offset.
1172 const intptr_t BundleMaskLo;
1173 // Masking with BundleMaskHi identifies an address's bundle.
1174 const intptr_t BundleMaskHi;
1175 intptr_t SizeSnapshotPre = 0;
1176 intptr_t SizeSnapshotPost = 0;
1179 } // end of anonymous namespace
1181 void CfgNode::emitIAS(Cfg *Func) const {
1182 Func->setCurrentNode(this);
1183 Assembler *Asm = Func->getAssembler<>();
1184 // TODO(stichnot): When sandboxing, defer binding the node label until just
1185 // before the first instruction is emitted, to reduce the chance that a
1186 // padding nop is a branch target.
1187 Asm->bindCfgNodeLabel(getIndex());
1188 for (const Inst &I : Phis) {
1191 // Emitting a Phi instruction should cause an error.
1195 // Do the simple emission if not sandboxed.
1196 if (!Func->getContext()->getFlags().getUseSandboxing()) {
1197 for (const Inst &I : Insts) {
1198 if (!I.isDeleted() && !I.isRedundantAssign()) {
1200 updateStats(Func, &I);
1206 // The remainder of the function handles emission with sandboxing. There are
1207 // explicit bundle_lock regions delimited by bundle_lock and bundle_unlock
1208 // instructions. All other instructions are treated as an implicit
1209 // one-instruction bundle_lock region. Emission is done twice for each
1210 // bundle_lock region. The first pass is a preliminary pass, after which we
1211 // can figure out what nop padding is needed, then roll back, and make the
1214 // Ideally, the first pass would be speculative and the second pass would
1215 // only be done if nop padding were needed, but the structure of the
1216 // integrated assembler makes it hard to roll back the state of label
1217 // bindings, label links, and relocation fixups. Instead, the first pass just
1218 // disables all mutation of that state.
1220 BundleEmitHelper Helper(Asm, Func->getTarget(), Insts);
1221 InstList::const_iterator End = Insts.end();
1222 // Retrying indicates that we had to roll back to the bundle_lock instruction
1223 // to apply padding before the bundle_lock sequence.
1224 bool Retrying = false;
1225 for (InstList::const_iterator I = Insts.begin(); I != End; ++I) {
1226 if (I->isDeleted() || I->isRedundantAssign())
1229 if (llvm::isa<InstBundleLock>(I)) {
1230 // Set up the initial bundle_lock state. This should not happen while
1231 // retrying, because the retry rolls back to the instruction following
1232 // the bundle_lock instruction.
1234 Helper.enterBundleLock(I);
1238 if (llvm::isa<InstBundleUnlock>(I)) {
1239 Helper.enterBundleUnlock();
1241 // Make sure all instructions are in the same bundle.
1242 assert(Helper.isSameBundle());
1243 // If align_to_end is specified, make sure the next instruction begins
1245 assert(!Helper.isAlignToEnd() || Helper.getPostAlignment() == 0);
1246 Helper.leaveBundleLockRegion();
1249 // This is the first pass, so roll back for the retry pass.
1251 // Pad to the next bundle if the instruction sequence crossed a bundle
1253 Helper.padToNextBundle();
1254 // Insert additional padding to make AlignToEnd work.
1255 Helper.padForAlignToEnd();
1256 // Prepare for the retry pass after padding is done.
1258 I = Helper.getBundleLockStart();
1263 // I points to a non bundle_lock/bundle_unlock instruction.
1264 if (Helper.isInBundleLockRegion()) {
1266 // Only update stats during the final pass.
1268 updateStats(Func, I);
1270 // Treat it as though there were an implicit bundle_lock and
1271 // bundle_unlock wrapping the instruction.
1272 Helper.enterBundleLock(I);
1274 Helper.enterBundleUnlock();
1276 Helper.padToNextBundle();
1278 updateStats(Func, I);
1279 Helper.leaveBundleLockRegion();
1283 // Don't allow bundle locking across basic blocks, to keep the backtracking
1284 // mechanism simple.
1285 assert(!Helper.isInBundleLockRegion());
1289 void CfgNode::dump(Cfg *Func) const {
1290 if (!BuildDefs::dump())
1292 Func->setCurrentNode(this);
1293 Ostream &Str = Func->getContext()->getStrDump();
1294 Liveness *Liveness = Func->getLiveness();
1295 if (Func->isVerbose(IceV_Instructions) || Func->isVerbose(IceV_Loop))
1296 Str << getName() << ":\n";
1297 // Dump the loop nest depth
1298 if (Func->isVerbose(IceV_Loop))
1299 Str << " // LoopNestDepth = " << getLoopNestDepth() << "\n";
1300 // Dump list of predecessor nodes.
1301 if (Func->isVerbose(IceV_Preds) && !InEdges.empty()) {
1302 Str << " // preds = ";
1304 for (CfgNode *I : InEdges) {
1308 Str << "%" << I->getName();
1312 // Dump the live-in variables.
1315 LiveIn = Liveness->getLiveIn(this);
1316 if (Func->isVerbose(IceV_Liveness) && !LiveIn.empty()) {
1317 Str << " // LiveIn:";
1318 for (SizeT i = 0; i < LiveIn.size(); ++i) {
1320 Variable *Var = Liveness->getVariable(i, this);
1321 Str << " %" << Var->getName(Func);
1322 if (Func->isVerbose(IceV_RegOrigins) && Var->hasReg()) {
1324 << Func->getTarget()->getRegName(Var->getRegNum(),
1331 // Dump each instruction.
1332 if (Func->isVerbose(IceV_Instructions)) {
1333 for (const Inst &I : Phis)
1334 I.dumpDecorated(Func);
1335 for (const Inst &I : Insts)
1336 I.dumpDecorated(Func);
1338 // Dump the live-out variables.
1341 LiveOut = Liveness->getLiveOut(this);
1342 if (Func->isVerbose(IceV_Liveness) && !LiveOut.empty()) {
1343 Str << " // LiveOut:";
1344 for (SizeT i = 0; i < LiveOut.size(); ++i) {
1346 Variable *Var = Liveness->getVariable(i, this);
1347 Str << " %" << Var->getName(Func);
1348 if (Func->isVerbose(IceV_RegOrigins) && Var->hasReg()) {
1350 << Func->getTarget()->getRegName(Var->getRegNum(),
1357 // Dump list of successor nodes.
1358 if (Func->isVerbose(IceV_Succs)) {
1359 Str << " // succs = ";
1361 for (CfgNode *I : OutEdges) {
1365 Str << "%" << I->getName();
1371 void CfgNode::profileExecutionCount(VariableDeclaration *Var) {
1372 constexpr char RMW_I64[] = "llvm.nacl.atomic.rmw.i64";
1374 GlobalContext *Context = Func->getContext();
1376 bool BadIntrinsic = false;
1377 const Intrinsics::FullIntrinsicInfo *Info =
1378 Context->getIntrinsicsInfo().find(RMW_I64, BadIntrinsic);
1379 assert(!BadIntrinsic);
1380 assert(Info != nullptr);
1382 Operand *RMWI64Name = Context->getConstantExternSym(RMW_I64);
1383 constexpr RelocOffsetT Offset = 0;
1384 constexpr bool SuppressMangling = true;
1386 Context->getConstantSym(Offset, Var->getName(), SuppressMangling);
1387 Constant *AtomicRMWOp = Context->getConstantInt32(Intrinsics::AtomicAdd);
1388 Constant *One = Context->getConstantInt64(1);
1389 Constant *OrderAcquireRelease =
1390 Context->getConstantInt32(Intrinsics::MemoryOrderAcquireRelease);
1392 InstIntrinsicCall *Inst = InstIntrinsicCall::create(
1393 Func, 5, Func->makeVariable(IceType_i64), RMWI64Name, Info->Info);
1394 Inst->addArg(AtomicRMWOp);
1395 Inst->addArg(Counter);
1397 Inst->addArg(OrderAcquireRelease);
1398 Insts.push_front(Inst);
1401 } // end of namespace Ice