1 //===- subzero/src/IceCfgNode.cpp - Basic block (node) implementation -----===//
3 // The Subzero Code Generator
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief Implements the CfgNode class, including the complexities of
12 /// instruction insertion and in-edge calculation.
14 //===----------------------------------------------------------------------===//
16 #include "IceCfgNode.h"
18 #include "IceAssembler.h"
20 #include "IceGlobalInits.h"
22 #include "IceInstVarIter.h"
23 #include "IceLiveness.h"
24 #include "IceOperand.h"
25 #include "IceTargetLowering.h"
29 // Adds an instruction to either the Phi list or the regular instruction list.
30 // Validates that all Phis are added before all regular instructions.
31 void CfgNode::appendInst(Inst *Instr) {
34 if (BuildDefs::wasm()) {
35 if (llvm::isa<InstSwitch>(Instr) || llvm::isa<InstBr>(Instr)) {
36 for (auto *N : Instr->getTerminatorEdges()) {
43 if (auto *Phi = llvm::dyn_cast<InstPhi>(Instr)) {
45 Func->setError("Phi instruction added to the middle of a block");
50 Insts.push_back(Instr);
55 template <typename List> void removeDeletedAndRenumber(List *L, Cfg *Func) {
57 BuildDefs::minimal() || !getFlags().getKeepDeletedInsts();
58 auto I = L->begin(), E = L->end(), Next = I;
59 for (++Next; I != E; I = Next++) {
60 if (DoDelete && I->isDeleted()) {
67 } // end of anonymous namespace
69 void CfgNode::renumberInstructions() {
70 InstNumberT FirstNumber = Func->getNextInstNumber();
71 removeDeletedAndRenumber(&Phis, Func);
72 removeDeletedAndRenumber(&Insts, Func);
73 InstCountEstimate = Func->getNextInstNumber() - FirstNumber;
76 // When a node is created, the OutEdges are immediately known, but the InEdges
77 // have to be built up incrementally. After the CFG has been constructed, the
78 // computePredecessors() pass finalizes it by creating the InEdges list.
79 void CfgNode::computePredecessors() {
80 for (CfgNode *Succ : OutEdges)
81 Succ->InEdges.push_back(this);
84 void CfgNode::computeSuccessors() {
87 assert(!Insts.empty());
88 OutEdges = Insts.rbegin()->getTerminatorEdges();
91 // Ensure each Phi instruction in the node is consistent with respect to control
92 // flow. For each predecessor, there must be a phi argument with that label.
93 // If a phi argument's label doesn't appear in the predecessor list (which can
94 // happen as a result of e.g. unreachable node elimination), its value is
95 // modified to be zero, to maintain consistency in liveness analysis. This
96 // allows us to remove some dead control flow without a major rework of the phi
97 // instructions. We don't check that phi arguments with the same label have the
99 void CfgNode::enforcePhiConsistency() {
100 for (Inst &Instr : Phis) {
101 auto *Phi = llvm::cast<InstPhi>(&Instr);
102 // We do a simple O(N^2) algorithm to check for consistency. Even so, it
103 // shows up as only about 0.2% of the total translation time. But if
104 // necessary, we could improve the complexity by using a hash table to
105 // count how many times each node is referenced in the Phi instruction, and
106 // how many times each node is referenced in the incoming edge list, and
107 // compare the two for equality.
108 for (SizeT i = 0; i < Phi->getSrcSize(); ++i) {
109 CfgNode *Label = Phi->getLabel(i);
111 for (CfgNode *InNode : getInEdges()) {
112 if (InNode == Label) {
118 // Predecessor was unreachable, so if (impossibly) the control flow
119 // enters from that predecessor, the value should be zero.
120 Phi->clearOperandForTarget(Label);
123 for (CfgNode *InNode : getInEdges()) {
125 for (SizeT i = 0; i < Phi->getSrcSize(); ++i) {
126 CfgNode *Label = Phi->getLabel(i);
127 if (InNode == Label) {
133 llvm::report_fatal_error("Phi error: missing label for incoming edge");
138 // This does part 1 of Phi lowering, by creating a new dest variable for each
139 // Phi instruction, replacing the Phi instruction's dest with that variable,
140 // and adding an explicit assignment of the old dest to the new dest. For
144 // "a_phi=phi(...); a=a_phi".
146 // This is in preparation for part 2 which deletes the Phi instructions and
147 // appends assignment instructions to predecessor blocks. Note that this
148 // transformation preserves SSA form.
149 void CfgNode::placePhiLoads() {
150 for (Inst &I : Phis) {
151 auto *Phi = llvm::dyn_cast<InstPhi>(&I);
152 Insts.insert(Insts.begin(), Phi->lower(Func));
156 // This does part 2 of Phi lowering. For each Phi instruction at each out-edge,
157 // create a corresponding assignment instruction, and add all the assignments
158 // near the end of this block. They need to be added before any branch
159 // instruction, and also if the block ends with a compare instruction followed
160 // by a branch instruction that we may want to fuse, it's better to insert the
161 // new assignments before the compare instruction. The
162 // tryOptimizedCmpxchgCmpBr() method assumes this ordering of instructions.
164 // Note that this transformation takes the Phi dest variables out of SSA form,
165 // as there may be assignments to the dest variable in multiple blocks.
166 void CfgNode::placePhiStores() {
167 // Find the insertion point.
168 InstList::iterator InsertionPoint = Insts.end();
169 // Every block must end in a terminator instruction, and therefore must have
170 // at least one instruction, so it's valid to decrement InsertionPoint (but
171 // assert just in case).
172 assert(InsertionPoint != Insts.begin());
174 // Confirm that InsertionPoint is a terminator instruction. Calling
175 // getTerminatorEdges() on a non-terminator instruction will cause an
176 // llvm_unreachable().
177 (void)InsertionPoint->getTerminatorEdges();
178 // SafeInsertionPoint is always immediately before the terminator
179 // instruction. If the block ends in a compare and conditional branch, it's
180 // better to place the Phi store before the compare so as not to interfere
181 // with compare/branch fusing. However, if the compare instruction's dest
182 // operand is the same as the new assignment statement's source operand, this
183 // can't be done due to data dependences, so we need to fall back to the
184 // SafeInsertionPoint. To illustrate:
186 // %97 = load i8* %96, align 1
187 // %98 = icmp ne i8 %97, 0
188 // br i1 %98, label %99, label %2132
190 // %100 = phi i8 [ %97, %95 ], [ %110, %108 ]
191 // %101 = phi i1 [ %98, %95 ], [ %111, %108 ]
192 // would be Phi-lowered as:
194 // %97 = load i8* %96, align 1
195 // %100_phi = %97 ; can be at InsertionPoint
196 // %98 = icmp ne i8 %97, 0
197 // %101_phi = %98 ; must be at SafeInsertionPoint
198 // br i1 %98, label %99, label %2132
203 // TODO(stichnot): It may be possible to bypass this whole SafeInsertionPoint
204 // mechanism. If a source basic block ends in a conditional branch:
207 // br i1 %foo, label %labelTrue, label %labelFalse
208 // and a branch target has a Phi involving the branch operand:
210 // %bar = phi i1 [ %foo, %labelSource ], ...
211 // then we actually know the constant i1 value of the Phi operand:
213 // %bar = phi i1 [ true, %labelSource ], ...
214 // It seems that this optimization should be done by clang or opt, but we
215 // could also do it here.
216 InstList::iterator SafeInsertionPoint = InsertionPoint;
217 // Keep track of the dest variable of a compare instruction, so that we
218 // insert the new instruction at the SafeInsertionPoint if the compare's dest
219 // matches the Phi-lowered assignment's source.
220 Variable *CmpInstDest = nullptr;
221 // If the current insertion point is at a conditional branch instruction, and
222 // the previous instruction is a compare instruction, then we move the
223 // insertion point before the compare instruction so as not to interfere with
224 // compare/branch fusing.
225 if (auto *Branch = llvm::dyn_cast<InstBr>(InsertionPoint)) {
226 if (!Branch->isUnconditional()) {
227 if (InsertionPoint != Insts.begin()) {
229 if (llvm::isa<InstIcmp>(InsertionPoint) ||
230 llvm::isa<InstFcmp>(InsertionPoint)) {
231 CmpInstDest = InsertionPoint->getDest();
239 // Consider every out-edge.
240 for (CfgNode *Succ : OutEdges) {
241 // Consider every Phi instruction at the out-edge.
242 for (Inst &I : Succ->Phis) {
243 auto *Phi = llvm::dyn_cast<InstPhi>(&I);
244 Operand *Operand = Phi->getOperandForTarget(this);
246 Variable *Dest = I.getDest();
248 auto *NewInst = InstAssign::create(Func, Dest, Operand);
249 if (CmpInstDest == Operand)
250 Insts.insert(SafeInsertionPoint, NewInst);
252 Insts.insert(InsertionPoint, NewInst);
257 // Deletes the phi instructions after the loads and stores are placed.
258 void CfgNode::deletePhis() {
263 // Splits the edge from Pred to this node by creating a new node and hooking up
264 // the in and out edges appropriately. (The EdgeIndex parameter is only used to
265 // make the new node's name unique when there are multiple edges between the
266 // same pair of nodes.) The new node's instruction list is initialized to the
267 // empty list, with no terminator instruction. There must not be multiple edges
268 // from Pred to this node so all Inst::getTerminatorEdges implementations must
269 // not contain duplicates.
270 CfgNode *CfgNode::splitIncomingEdge(CfgNode *Pred, SizeT EdgeIndex) {
271 CfgNode *NewNode = Func->makeNode();
272 // Depth is the minimum as it works if both are the same, but if one is
273 // outside the loop and the other is inside, the new node should be placed
274 // outside and not be executed multiple times within the loop.
275 NewNode->setLoopNestDepth(
276 std::min(getLoopNestDepth(), Pred->getLoopNestDepth()));
277 if (BuildDefs::dump())
278 NewNode->setName("split_" + Pred->getName() + "_" + getName() + "_" +
279 std::to_string(EdgeIndex));
280 // The new node is added to the end of the node list, and will later need to
281 // be sorted into a reasonable topological order.
282 NewNode->setNeedsPlacement(true);
283 // Repoint Pred's out-edge.
285 for (CfgNode *&I : Pred->OutEdges) {
288 NewNode->InEdges.push_back(Pred);
295 // Repoint this node's in-edge.
297 for (CfgNode *&I : InEdges) {
300 NewNode->OutEdges.push_back(this);
307 // Repoint all suitable branch instructions' target and return.
309 for (Inst &I : Pred->getInsts())
310 if (!I.isDeleted() && I.repointEdges(this, NewNode))
319 // Helpers for advancedPhiLowering().
323 PhiDesc(const PhiDesc &) = delete;
324 PhiDesc &operator=(const PhiDesc &) = delete;
327 PhiDesc(InstPhi *Phi, Variable *Dest) : Phi(Phi), Dest(Dest) {}
328 PhiDesc(PhiDesc &&) = default;
329 InstPhi *Phi = nullptr;
330 Variable *Dest = nullptr;
331 Operand *Src = nullptr;
332 bool Processed = false;
333 size_t NumPred = 0; // number of entries whose Src is this Dest
334 int32_t Weight = 0; // preference for topological order
336 using PhiDescList = llvm::SmallVector<PhiDesc, 32>;
338 // Always pick NumPred=0 over NumPred>0.
339 constexpr int32_t WeightNoPreds = 8;
340 // Prefer Src as a register because the register might free up.
341 constexpr int32_t WeightSrcIsReg = 4;
342 // Prefer Dest not as a register because the register stays free longer.
343 constexpr int32_t WeightDestNotReg = 2;
344 // Prefer NumPred=1 over NumPred>1. This is used as a tiebreaker when a
345 // dependency cycle must be broken so that hopefully only one temporary
346 // assignment has to be added to break the cycle.
347 constexpr int32_t WeightOnePred = 1;
349 bool sameVarOrReg(TargetLowering *Target, const Variable *Var1,
350 const Operand *Opnd) {
353 const auto *Var2 = llvm::dyn_cast<Variable>(Opnd);
357 // If either operand lacks a register, they cannot be the same.
363 const auto RegNum1 = Var1->getRegNum();
364 const auto RegNum2 = Var2->getRegNum();
365 // Quick common-case check.
366 if (RegNum1 == RegNum2)
369 assert(Target->getAliasesForRegister(RegNum1)[RegNum2] ==
370 Target->getAliasesForRegister(RegNum2)[RegNum1]);
371 return Target->getAliasesForRegister(RegNum1)[RegNum2];
374 // Update NumPred for all Phi assignments using Var as their Dest variable.
375 // Also update Weight if NumPred dropped from 2 to 1, or 1 to 0.
376 void updatePreds(PhiDescList &Desc, TargetLowering *Target, Variable *Var) {
377 for (PhiDesc &Item : Desc) {
378 if (!Item.Processed && sameVarOrReg(Target, Var, Item.Dest)) {
380 if (Item.NumPred == 1) {
381 // If NumPred changed from 2 to 1, add in WeightOnePred.
382 Item.Weight += WeightOnePred;
383 } else if (Item.NumPred == 0) {
384 // If NumPred changed from 1 to 0, subtract WeightOnePred and add in
386 Item.Weight += (WeightNoPreds - WeightOnePred);
392 } // end of anonymous namespace
394 // This the "advanced" version of Phi lowering for a basic block, in contrast
395 // to the simple version that lowers through assignments involving temporaries.
397 // All Phi instructions in a basic block are conceptually executed in parallel.
398 // However, if we lower Phis early and commit to a sequential ordering, we may
399 // end up creating unnecessary interferences which lead to worse register
400 // allocation. Delaying Phi scheduling until after register allocation can help
401 // unless there are no free registers for shuffling registers or stack slots
402 // and spilling becomes necessary.
404 // The advanced Phi lowering starts by finding a topological sort of the Phi
405 // instructions, where "A=B" comes before "B=C" due to the anti-dependence on
406 // B. Preexisting register assignments are considered in the topological sort.
407 // If a topological sort is not possible due to a cycle, the cycle is broken by
408 // introducing a non-parallel temporary. For example, a cycle arising from a
409 // permutation like "A=B;B=C;C=A" can become "T=A;A=B;B=C;C=T". All else being
410 // equal, prefer to schedule assignments with register-allocated Src operands
411 // earlier, in case that register becomes free afterwards, and prefer to
412 // schedule assignments with register-allocated Dest variables later, to keep
413 // that register free for longer.
415 // Once the ordering is determined, the Cfg edge is split and the assignment
416 // list is lowered by the target lowering layer. Since the assignment lowering
417 // may create new infinite-weight temporaries, a follow-on register allocation
418 // pass will be needed. To prepare for this, liveness (including live range
419 // calculation) of the split nodes needs to be calculated, and liveness of the
420 // original node need to be updated to "undo" the effects of the phi
423 // The specific placement of the new node within the Cfg node list is deferred
424 // until later, including after empty node contraction.
426 // After phi assignments are lowered across all blocks, another register
427 // allocation pass is run, focusing only on pre-colored and infinite-weight
428 // variables, similar to Om1 register allocation (except without the need to
429 // specially compute these variables' live ranges, since they have already been
430 // precisely calculated). The register allocator in this mode needs the ability
431 // to forcibly spill and reload registers in case none are naturally available.
432 void CfgNode::advancedPhiLowering() {
433 if (getPhis().empty())
438 for (Inst &I : Phis) {
439 auto *Phi = llvm::dyn_cast<InstPhi>(&I);
440 if (!Phi->isDeleted()) {
441 Variable *Dest = Phi->getDest();
442 Desc.emplace_back(Phi, Dest);
443 // Undo the effect of the phi instruction on this node's live-in set by
444 // marking the phi dest variable as live on entry.
445 SizeT VarNum = Func->getLiveness()->getLiveIndex(Dest->getIndex());
446 assert(!Func->getLiveness()->getLiveIn(this)[VarNum]);
447 Func->getLiveness()->getLiveIn(this)[VarNum] = true;
454 TargetLowering *Target = Func->getTarget();
455 SizeT InEdgeIndex = 0;
456 for (CfgNode *Pred : InEdges) {
457 CfgNode *Split = splitIncomingEdge(Pred, InEdgeIndex++);
458 SizeT Remaining = Desc.size();
460 // First pass computes Src and initializes NumPred.
461 for (PhiDesc &Item : Desc) {
462 Variable *Dest = Item.Dest;
463 Operand *Src = Item.Phi->getOperandForTarget(Pred);
465 Item.Processed = false;
467 // Cherry-pick any trivial assignments, so that they don't contribute to
468 // the running complexity of the topological sort.
469 if (sameVarOrReg(Target, Dest, Src)) {
470 Item.Processed = true;
473 // If Dest and Src are syntactically the same, don't bother adding
474 // the assignment, because in all respects it would be redundant, and
475 // if Dest/Src are on the stack, the target lowering may naively
476 // decide to lower it using a temporary register.
477 Split->appendInst(InstAssign::create(Func, Dest, Src));
480 // Second pass computes NumPred by comparing every pair of Phi instructions.
481 for (PhiDesc &Item : Desc) {
484 const Variable *Dest = Item.Dest;
485 for (PhiDesc &Item2 : Desc) {
488 // There shouldn't be two different Phis with the same Dest variable or
490 assert((&Item == &Item2) || !sameVarOrReg(Target, Dest, Item2.Dest));
491 if (sameVarOrReg(Target, Dest, Item2.Src))
496 // Another pass to compute initial Weight values.
497 for (PhiDesc &Item : Desc) {
501 if (Item.NumPred == 0)
502 Weight += WeightNoPreds;
503 if (Item.NumPred == 1)
504 Weight += WeightOnePred;
505 if (auto *Var = llvm::dyn_cast<Variable>(Item.Src))
507 Weight += WeightSrcIsReg;
508 if (!Item.Dest->hasReg())
509 Weight += WeightDestNotReg;
510 Item.Weight = Weight;
513 // Repeatedly choose and process the best candidate in the topological sort,
514 // until no candidates remain. This implementation is O(N^2) where N is the
515 // number of Phi instructions, but with a small constant factor compared to
516 // a likely implementation of O(N) topological sort.
517 for (; Remaining; --Remaining) {
518 int32_t BestWeight = -1;
519 PhiDesc *BestItem = nullptr;
520 // Find the best candidate.
521 for (PhiDesc &Item : Desc) {
524 const int32_t Weight = Item.Weight;
525 if (Weight > BestWeight) {
530 assert(BestWeight >= 0);
531 Variable *Dest = BestItem->Dest;
532 Operand *Src = BestItem->Src;
533 assert(!sameVarOrReg(Target, Dest, Src));
534 // Break a cycle by introducing a temporary.
535 while (BestItem->NumPred > 0) {
537 // If the target instruction "A=B" is part of a cycle, find the "X=A"
538 // assignment in the cycle because it will have to be rewritten as
540 for (PhiDesc &Item : Desc) {
543 Operand *OtherSrc = Item.Src;
544 if (Item.NumPred && sameVarOrReg(Target, Dest, OtherSrc)) {
545 SizeT VarNum = Func->getNumVariables();
546 Variable *Tmp = Func->makeVariable(OtherSrc->getType());
547 if (BuildDefs::dump())
548 Tmp->setName(Func, "__split_" + std::to_string(VarNum));
549 Split->appendInst(InstAssign::create(Func, Tmp, OtherSrc));
551 updatePreds(Desc, Target, llvm::cast<Variable>(OtherSrc));
559 // Now that a cycle (if any) has been broken, create the actual
561 Split->appendInst(InstAssign::create(Func, Dest, Src));
562 if (auto *Var = llvm::dyn_cast<Variable>(Src))
563 updatePreds(Desc, Target, Var);
564 BestItem->Processed = true;
566 Split->appendInst(InstBr::create(Func, this));
569 Func->getVMetadata()->addNode(Split);
570 // Validate to be safe. All items should be marked as processed, and have
572 if (BuildDefs::asserts()) {
573 for (PhiDesc &Item : Desc) {
575 assert(Item.Processed);
576 assert(Item.NumPred == 0);
582 // Does address mode optimization. Pass each instruction to the TargetLowering
583 // object. If it returns a new instruction (representing the optimized address
584 // mode), then insert the new instruction and delete the old.
585 void CfgNode::doAddressOpt() {
586 TargetLowering *Target = Func->getTarget();
587 LoweringContext &Context = Target->getContext();
589 while (!Context.atEnd()) {
590 Target->doAddressOpt();
594 void CfgNode::doNopInsertion(RandomNumberGenerator &RNG) {
595 TargetLowering *Target = Func->getTarget();
596 LoweringContext &Context = Target->getContext();
598 Context.setInsertPoint(Context.getCur());
599 // Do not insert nop in bundle locked instructions.
600 bool PauseNopInsertion = false;
601 while (!Context.atEnd()) {
602 if (llvm::isa<InstBundleLock>(Context.getCur())) {
603 PauseNopInsertion = true;
604 } else if (llvm::isa<InstBundleUnlock>(Context.getCur())) {
605 PauseNopInsertion = false;
607 if (!PauseNopInsertion)
608 Target->doNopInsertion(RNG);
609 // Ensure Cur=Next, so that the nops are inserted before the current
610 // instruction rather than after.
611 Context.advanceCur();
612 Context.advanceNext();
616 // Drives the target lowering. Passes the current instruction and the next
617 // non-deleted instruction for target lowering.
618 void CfgNode::genCode() {
619 TargetLowering *Target = Func->getTarget();
620 LoweringContext &Context = Target->getContext();
621 // Lower the regular instructions.
623 Target->initNodeForLowering(this);
624 while (!Context.atEnd()) {
625 InstList::iterator Orig = Context.getCur();
626 if (llvm::isa<InstRet>(*Orig))
629 // Ensure target lowering actually moved the cursor.
630 assert(Context.getCur() != Orig);
632 Context.availabilityReset();
633 // Do preliminary lowering of the Phi instructions.
634 Target->prelowerPhis();
637 void CfgNode::livenessLightweight() {
638 SizeT NumVars = Func->getNumVariables();
639 LivenessBV Live(NumVars);
640 // Process regular instructions in reverse order.
641 for (Inst &I : reverse_range(Insts)) {
644 I.livenessLightweight(Func, Live);
646 for (Inst &I : Phis) {
649 I.livenessLightweight(Func, Live);
653 // Performs liveness analysis on the block. Returns true if the incoming
654 // liveness changed from before, false if it stayed the same. (If it changes,
655 // the node's predecessors need to be processed again.)
656 bool CfgNode::liveness(Liveness *Liveness) {
657 const SizeT NumVars = Liveness->getNumVarsInNode(this);
658 const SizeT NumGlobalVars = Liveness->getNumGlobalVars();
659 LivenessBV &Live = Liveness->getScratchBV();
662 LiveBeginEndMap *LiveBegin = nullptr;
663 LiveBeginEndMap *LiveEnd = nullptr;
664 // Mark the beginning and ending of each variable's live range with the
665 // sentinel instruction number 0.
666 if (Liveness->getMode() == Liveness_Intervals) {
667 LiveBegin = Liveness->getLiveBegin(this);
668 LiveEnd = Liveness->getLiveEnd(this);
671 // Guess that the number of live ranges beginning is roughly the number of
672 // instructions, and same for live ranges ending.
673 LiveBegin->reserve(getInstCountEstimate());
674 LiveEnd->reserve(getInstCountEstimate());
677 // Initialize Live to be the union of all successors' LiveIn.
678 for (CfgNode *Succ : OutEdges) {
679 const LivenessBV &LiveIn = Liveness->getLiveIn(Succ);
680 assert(LiveIn.empty() || LiveIn.size() == NumGlobalVars);
682 // Mark corresponding argument of phis in successor as live.
683 for (Inst &I : Succ->Phis) {
686 auto *Phi = llvm::cast<InstPhi>(&I);
687 Phi->livenessPhiOperand(Live, this, Liveness);
690 assert(Live.empty() || Live.size() == NumGlobalVars);
691 Liveness->getLiveOut(this) = Live;
693 // Expand Live so it can hold locals in addition to globals.
694 Live.resize(NumVars);
695 // Process regular instructions in reverse order.
696 for (Inst &I : reverse_range(Insts)) {
699 I.liveness(I.getNumber(), Live, Liveness, LiveBegin, LiveEnd);
701 // Process phis in forward order so that we can override the instruction
702 // number to be that of the earliest phi instruction in the block.
703 SizeT NumNonDeadPhis = 0;
704 InstNumberT FirstPhiNumber = Inst::NumberSentinel;
705 for (Inst &I : Phis) {
708 if (FirstPhiNumber == Inst::NumberSentinel)
709 FirstPhiNumber = I.getNumber();
710 if (I.liveness(FirstPhiNumber, Live, Liveness, LiveBegin, LiveEnd))
714 // When using the sparse representation, after traversing the instructions in
715 // the block, the Live bitvector should only contain set bits for global
716 // variables upon block entry. We validate this by testing the upper bits of
717 // the Live bitvector.
718 if (Live.find_next(NumGlobalVars) != -1) {
719 if (BuildDefs::dump()) {
720 // This is a fatal liveness consistency error. Print some diagnostics and
722 Ostream &Str = Func->getContext()->getStrDump();
723 Func->resetCurrentNode();
724 Str << "Invalid Live =";
725 for (SizeT i = NumGlobalVars; i < Live.size(); ++i) {
728 Liveness->getVariable(i, this)->dump(Func);
733 llvm::report_fatal_error("Fatal inconsistency in liveness analysis");
735 // Now truncate Live to prevent LiveIn from growing.
736 Live.resize(NumGlobalVars);
738 bool Changed = false;
739 LivenessBV &LiveIn = Liveness->getLiveIn(this);
740 assert(LiveIn.empty() || LiveIn.size() == NumGlobalVars);
741 // Add in current LiveIn
743 // Check result, set LiveIn=Live
744 SizeT &PrevNumNonDeadPhis = Liveness->getNumNonDeadPhis(this);
745 bool LiveInChanged = (Live != LiveIn);
746 Changed = (NumNonDeadPhis != PrevNumNonDeadPhis || LiveInChanged);
749 PrevNumNonDeadPhis = NumNonDeadPhis;
753 // Validate the integrity of the live ranges in this block. If there are any
754 // errors, it prints details and returns false. On success, it returns true.
755 bool CfgNode::livenessValidateIntervals(Liveness *Liveness) const {
756 if (!BuildDefs::asserts())
759 // Verify there are no duplicates.
761 [](const LiveBeginEndMapEntry &A, const LiveBeginEndMapEntry &B) {
762 return A.first == B.first;
764 LiveBeginEndMap &MapBegin = *Liveness->getLiveBegin(this);
765 LiveBeginEndMap &MapEnd = *Liveness->getLiveEnd(this);
766 if (std::adjacent_find(MapBegin.begin(), MapBegin.end(), ComparePair) ==
768 std::adjacent_find(MapEnd.begin(), MapEnd.end(), ComparePair) ==
772 // There is definitely a liveness error. All paths from here return false.
773 if (!BuildDefs::dump())
776 // Print all the errors.
777 if (BuildDefs::dump()) {
778 GlobalContext *Ctx = Func->getContext();
779 OstreamLocker L(Ctx);
780 Ostream &Str = Ctx->getStrDump();
781 if (Func->isVerbose()) {
782 Str << "Live range errors in the following block:\n";
785 for (auto Start = MapBegin.begin();
786 (Start = std::adjacent_find(Start, MapBegin.end(), ComparePair)) !=
789 auto Next = Start + 1;
790 Str << "Duplicate LR begin, block " << getName() << ", instructions "
791 << Start->second << " & " << Next->second << ", variable "
792 << Liveness->getVariable(Start->first, this)->getName() << "\n";
794 for (auto Start = MapEnd.begin();
795 (Start = std::adjacent_find(Start, MapEnd.end(), ComparePair)) !=
798 auto Next = Start + 1;
799 Str << "Duplicate LR end, block " << getName() << ", instructions "
800 << Start->second << " & " << Next->second << ", variable "
801 << Liveness->getVariable(Start->first, this)->getName() << "\n";
808 // Once basic liveness is complete, compute actual live ranges. It is assumed
809 // that within a single basic block, a live range begins at most once and ends
810 // at most once. This is certainly true for pure SSA form. It is also true once
811 // phis are lowered, since each assignment to the phi-based temporary is in a
812 // different basic block, and there is a single read that ends the live in the
813 // basic block that contained the actual phi instruction.
814 void CfgNode::livenessAddIntervals(Liveness *Liveness, InstNumberT FirstInstNum,
815 InstNumberT LastInstNum) {
816 TimerMarker T1(TimerStack::TT_liveRange, Func);
818 const SizeT NumVars = Liveness->getNumVarsInNode(this);
819 const LivenessBV &LiveIn = Liveness->getLiveIn(this);
820 const LivenessBV &LiveOut = Liveness->getLiveOut(this);
821 LiveBeginEndMap &MapBegin = *Liveness->getLiveBegin(this);
822 LiveBeginEndMap &MapEnd = *Liveness->getLiveEnd(this);
823 std::sort(MapBegin.begin(), MapBegin.end());
824 std::sort(MapEnd.begin(), MapEnd.end());
826 if (!livenessValidateIntervals(Liveness)) {
827 llvm::report_fatal_error("livenessAddIntervals: Liveness error");
831 LivenessBV &LiveInAndOut = Liveness->getScratchBV();
832 LiveInAndOut = LiveIn;
833 LiveInAndOut &= LiveOut;
835 // Iterate in parallel across the sorted MapBegin[] and MapEnd[].
836 auto IBB = MapBegin.begin(), IEB = MapEnd.begin();
837 auto IBE = MapBegin.end(), IEE = MapEnd.end();
838 while (IBB != IBE || IEB != IEE) {
839 SizeT i1 = IBB == IBE ? NumVars : IBB->first;
840 SizeT i2 = IEB == IEE ? NumVars : IEB->first;
841 SizeT i = std::min(i1, i2);
842 // i1 is the Variable number of the next MapBegin entry, and i2 is the
843 // Variable number of the next MapEnd entry. If i1==i2, then the Variable's
844 // live range begins and ends in this block. If i1<i2, then i1's live range
845 // begins at instruction IBB->second and extends through the end of the
846 // block. If i1>i2, then i2's live range begins at the first instruction of
847 // the block and ends at IEB->second. In any case, we choose the lesser of
848 // i1 and i2 and proceed accordingly.
849 InstNumberT LB = i == i1 ? IBB->second : FirstInstNum;
850 InstNumberT LE = i == i2 ? IEB->second : LastInstNum + 1;
852 Variable *Var = Liveness->getVariable(i, this);
854 Var->addLiveRange(FirstInstNum, LE);
855 Var->addLiveRange(LB, LastInstNum + 1);
856 // Assert that Var is a global variable by checking that its liveness
857 // index is less than the number of globals. This ensures that the
858 // LiveInAndOut[] access is valid.
859 assert(i < Liveness->getNumGlobalVars());
860 LiveInAndOut[i] = false;
862 Var->addLiveRange(LB, LE);
869 // Process the variables that are live across the entire block.
870 for (int i = LiveInAndOut.find_first(); i != -1;
871 i = LiveInAndOut.find_next(i)) {
872 Variable *Var = Liveness->getVariable(i, this);
873 if (Liveness->getRangeMask(Var->getIndex()))
874 Var->addLiveRange(FirstInstNum, LastInstNum + 1);
878 // If this node contains only deleted instructions, and ends in an
879 // unconditional branch, contract the node by repointing all its in-edges to
881 void CfgNode::contractIfEmpty() {
884 Inst *Branch = nullptr;
885 for (Inst &I : Insts) {
888 if (I.isUnconditionalBranch())
890 else if (!I.isRedundantAssign())
893 // Make sure there is actually a successor to repoint in-edges to.
894 if (OutEdges.empty())
896 assert(hasSingleOutEdge());
897 // Don't try to delete a self-loop.
898 if (OutEdges[0] == this)
900 // Make sure the node actually contains (ends with) an unconditional branch.
901 if (Branch == nullptr)
904 Branch->setDeleted();
905 CfgNode *Successor = OutEdges.front();
906 // Repoint all this node's in-edges to this node's successor, unless this
907 // node's successor is actually itself (in which case the statement
908 // "OutEdges.front()->InEdges.push_back(Pred)" could invalidate the iterator
909 // over this->InEdges).
910 if (Successor != this) {
911 for (CfgNode *Pred : InEdges) {
912 for (CfgNode *&I : Pred->OutEdges) {
915 Successor->InEdges.push_back(Pred);
918 for (Inst &I : Pred->getInsts()) {
920 I.repointEdges(this, Successor);
924 // Remove the in-edge to the successor to allow node reordering to make
925 // better decisions. For example it's more helpful to place a node after a
926 // reachable predecessor than an unreachable one (like the one we just
928 Successor->InEdges.erase(
929 std::find(Successor->InEdges.begin(), Successor->InEdges.end(), this));
934 void CfgNode::doBranchOpt(const CfgNode *NextNode) {
935 TargetLowering *Target = Func->getTarget();
936 // Find the first opportunity for branch optimization (which will be the last
937 // instruction in the block) and stop. This is sufficient unless there is
938 // some target lowering where we have the possibility of multiple
939 // optimizations per block. Take care with switch lowering as there are
940 // multiple unconditional branches and only the last can be deleted.
941 for (Inst &I : reverse_range(Insts)) {
942 if (!I.isDeleted()) {
943 Target->doBranchOpt(&I, NextNode);
949 // ======================== Dump routines ======================== //
953 // Helper functions for emit().
955 void emitRegisterUsage(Ostream &Str, const Cfg *Func, const CfgNode *Node,
956 bool IsLiveIn, CfgVector<SizeT> &LiveRegCount) {
957 if (!BuildDefs::dump())
959 Liveness *Liveness = Func->getLiveness();
960 const LivenessBV *Live;
961 const auto StackReg = Func->getTarget()->getStackReg();
962 const auto FrameOrStackReg = Func->getTarget()->getFrameOrStackReg();
964 Live = &Liveness->getLiveIn(Node);
965 Str << "\t\t\t\t/* LiveIn=";
967 Live = &Liveness->getLiveOut(Node);
968 Str << "\t\t\t\t/* LiveOut=";
970 if (!Live->empty()) {
971 CfgVector<Variable *> LiveRegs;
972 for (SizeT i = 0; i < Live->size(); ++i) {
975 Variable *Var = Liveness->getVariable(i, Node);
978 const auto RegNum = Var->getRegNum();
979 if (RegNum == StackReg || RegNum == FrameOrStackReg)
982 ++LiveRegCount[RegNum];
983 LiveRegs.push_back(Var);
985 // Sort the variables by regnum so they are always printed in a familiar
987 std::sort(LiveRegs.begin(), LiveRegs.end(),
988 [](const Variable *V1, const Variable *V2) {
989 return unsigned(V1->getRegNum()) < unsigned(V2->getRegNum());
992 for (Variable *Var : LiveRegs) {
1002 /// Returns true if some text was emitted - in which case the caller definitely
1003 /// needs to emit a newline character.
1004 bool emitLiveRangesEnded(Ostream &Str, const Cfg *Func, const Inst *Instr,
1005 CfgVector<SizeT> &LiveRegCount) {
1006 bool Printed = false;
1007 if (!BuildDefs::dump())
1009 Variable *Dest = Instr->getDest();
1010 // Normally we increment the live count for the dest register. But we
1011 // shouldn't if the instruction's IsDestRedefined flag is set, because this
1012 // means that the target lowering created this instruction as a non-SSA
1013 // assignment; i.e., a different, previous instruction started the dest
1014 // variable's live range.
1015 if (!Instr->isDestRedefined() && Dest && Dest->hasReg())
1016 ++LiveRegCount[Dest->getRegNum()];
1017 FOREACH_VAR_IN_INST(Var, *Instr) {
1018 bool ShouldReport = Instr->isLastUse(Var);
1019 if (ShouldReport && Var->hasReg()) {
1020 // Don't report end of live range until the live count reaches 0.
1021 SizeT NewCount = --LiveRegCount[Var->getRegNum()];
1023 ShouldReport = false;
1029 Str << " \t/* END=";
1039 void updateStats(Cfg *Func, const Inst *I) {
1040 if (!BuildDefs::dump())
1042 // Update emitted instruction count, plus fill/spill count for Variable
1043 // operands without a physical register.
1044 if (uint32_t Count = I->getEmitInstCount()) {
1045 Func->getContext()->statsUpdateEmitted(Count);
1046 if (Variable *Dest = I->getDest()) {
1047 if (!Dest->hasReg())
1048 Func->getContext()->statsUpdateFills();
1050 for (SizeT S = 0; S < I->getSrcSize(); ++S) {
1051 if (auto *Src = llvm::dyn_cast<Variable>(I->getSrc(S))) {
1053 Func->getContext()->statsUpdateSpills();
1059 } // end of anonymous namespace
1061 void CfgNode::emit(Cfg *Func) const {
1062 if (!BuildDefs::dump())
1064 Func->setCurrentNode(this);
1065 Ostream &Str = Func->getContext()->getStrEmit();
1066 Liveness *Liveness = Func->getLiveness();
1067 const bool DecorateAsm = Liveness && getFlags().getDecorateAsm();
1068 Str << getAsmName() << ":\n";
1069 // LiveRegCount keeps track of the number of currently live variables that
1070 // each register is assigned to. Normally that would be only 0 or 1, but the
1071 // register allocator's AllowOverlap inference allows it to be greater than 1
1072 // for short periods.
1073 CfgVector<SizeT> LiveRegCount(Func->getTarget()->getNumRegisters());
1075 constexpr bool IsLiveIn = true;
1076 emitRegisterUsage(Str, Func, this, IsLiveIn, LiveRegCount);
1077 if (getInEdges().size()) {
1078 Str << "\t\t\t\t/* preds=";
1080 for (CfgNode *I : getInEdges()) {
1084 Str << "$" << I->getName();
1088 if (getLoopNestDepth()) {
1089 Str << "\t\t\t\t/* loop depth=" << getLoopNestDepth() << " */\n";
1093 for (const Inst &I : Phis) {
1096 // Emitting a Phi instruction should cause an error.
1099 for (const Inst &I : Insts) {
1102 if (I.isRedundantAssign()) {
1103 // Usually, redundant assignments end the live range of the src variable
1104 // and begin the live range of the dest variable, with no net effect on
1105 // the liveness of their register. However, if the register allocator
1106 // infers the AllowOverlap condition, then this may be a redundant
1107 // assignment that does not end the src variable's live range, in which
1108 // case the active variable count for that register needs to be bumped.
1109 // That normally would have happened as part of emitLiveRangesEnded(),
1110 // but that isn't called for redundant assignments.
1111 Variable *Dest = I.getDest();
1112 if (DecorateAsm && Dest->hasReg()) {
1113 ++LiveRegCount[Dest->getRegNum()];
1114 if (I.isLastUse(I.getSrc(0)))
1115 --LiveRegCount[llvm::cast<Variable>(I.getSrc(0))->getRegNum()];
1120 bool Printed = false;
1122 Printed = emitLiveRangesEnded(Str, Func, &I, LiveRegCount);
1123 if (Printed || llvm::isa<InstTarget>(&I))
1125 updateStats(Func, &I);
1128 constexpr bool IsLiveIn = false;
1129 emitRegisterUsage(Str, Func, this, IsLiveIn, LiveRegCount);
1133 // Helper class for emitIAS().
1135 class BundleEmitHelper {
1136 BundleEmitHelper() = delete;
1137 BundleEmitHelper(const BundleEmitHelper &) = delete;
1138 BundleEmitHelper &operator=(const BundleEmitHelper &) = delete;
1141 BundleEmitHelper(Assembler *Asm, const InstList &Insts)
1142 : Asm(Asm), End(Insts.end()), BundleLockStart(End),
1143 BundleSize(1 << Asm->getBundleAlignLog2Bytes()),
1144 BundleMaskLo(BundleSize - 1), BundleMaskHi(~BundleMaskLo) {}
1145 // Check whether we're currently within a bundle_lock region.
1146 bool isInBundleLockRegion() const { return BundleLockStart != End; }
1147 // Check whether the current bundle_lock region has the align_to_end option.
1148 bool isAlignToEnd() const {
1149 assert(isInBundleLockRegion());
1150 return llvm::cast<InstBundleLock>(getBundleLockStart())->getOption() ==
1151 InstBundleLock::Opt_AlignToEnd;
1153 bool isPadToEnd() const {
1154 assert(isInBundleLockRegion());
1155 return llvm::cast<InstBundleLock>(getBundleLockStart())->getOption() ==
1156 InstBundleLock::Opt_PadToEnd;
1158 // Check whether the entire bundle_lock region falls within the same bundle.
1159 bool isSameBundle() const {
1160 assert(isInBundleLockRegion());
1161 return SizeSnapshotPre == SizeSnapshotPost ||
1162 (SizeSnapshotPre & BundleMaskHi) ==
1163 ((SizeSnapshotPost - 1) & BundleMaskHi);
1165 // Get the bundle alignment of the first instruction of the bundle_lock
1167 intptr_t getPreAlignment() const {
1168 assert(isInBundleLockRegion());
1169 return SizeSnapshotPre & BundleMaskLo;
1171 // Get the bundle alignment of the first instruction past the bundle_lock
1173 intptr_t getPostAlignment() const {
1174 assert(isInBundleLockRegion());
1175 return SizeSnapshotPost & BundleMaskLo;
1177 // Get the iterator pointing to the bundle_lock instruction, e.g. to roll
1178 // back the instruction iteration to that point.
1179 InstList::const_iterator getBundleLockStart() const {
1180 assert(isInBundleLockRegion());
1181 return BundleLockStart;
1183 // Set up bookkeeping when the bundle_lock instruction is first processed.
1184 void enterBundleLock(InstList::const_iterator I) {
1185 assert(!isInBundleLockRegion());
1186 BundleLockStart = I;
1187 SizeSnapshotPre = Asm->getBufferSize();
1188 Asm->setPreliminary(true);
1189 assert(isInBundleLockRegion());
1191 // Update bookkeeping when the bundle_unlock instruction is processed.
1192 void enterBundleUnlock() {
1193 assert(isInBundleLockRegion());
1194 SizeSnapshotPost = Asm->getBufferSize();
1196 // Update bookkeeping when we are completely finished with the bundle_lock
1198 void leaveBundleLockRegion() { BundleLockStart = End; }
1199 // Check whether the instruction sequence fits within the current bundle, and
1200 // if not, add nop padding to the end of the current bundle.
1201 void padToNextBundle() {
1202 assert(isInBundleLockRegion());
1203 if (!isSameBundle()) {
1204 intptr_t PadToNextBundle = BundleSize - getPreAlignment();
1205 Asm->padWithNop(PadToNextBundle);
1206 SizeSnapshotPre += PadToNextBundle;
1207 SizeSnapshotPost += PadToNextBundle;
1208 assert((Asm->getBufferSize() & BundleMaskLo) == 0);
1209 assert(Asm->getBufferSize() == SizeSnapshotPre);
1212 // If align_to_end is specified, add padding such that the instruction
1213 // sequences ends precisely at a bundle boundary.
1214 void padForAlignToEnd() {
1215 assert(isInBundleLockRegion());
1216 if (isAlignToEnd()) {
1217 if (intptr_t Offset = getPostAlignment()) {
1218 Asm->padWithNop(BundleSize - Offset);
1219 SizeSnapshotPre = Asm->getBufferSize();
1223 // If pad_to_end is specified, add padding such that the first instruction
1224 // after the instruction sequence starts at a bundle boundary.
1225 void padForPadToEnd() {
1226 assert(isInBundleLockRegion());
1228 if (intptr_t Offset = getPostAlignment()) {
1229 Asm->padWithNop(BundleSize - Offset);
1230 SizeSnapshotPre = Asm->getBufferSize();
1233 } // Update bookkeeping when rolling back for the second pass.
1235 assert(isInBundleLockRegion());
1236 Asm->setBufferSize(SizeSnapshotPre);
1237 Asm->setPreliminary(false);
1241 Assembler *const Asm;
1242 // End is a sentinel value such that BundleLockStart==End implies that we are
1243 // not in a bundle_lock region.
1244 const InstList::const_iterator End;
1245 InstList::const_iterator BundleLockStart;
1246 const intptr_t BundleSize;
1247 // Masking with BundleMaskLo identifies an address's bundle offset.
1248 const intptr_t BundleMaskLo;
1249 // Masking with BundleMaskHi identifies an address's bundle.
1250 const intptr_t BundleMaskHi;
1251 intptr_t SizeSnapshotPre = 0;
1252 intptr_t SizeSnapshotPost = 0;
1255 } // end of anonymous namespace
1257 void CfgNode::emitIAS(Cfg *Func) const {
1258 Func->setCurrentNode(this);
1259 Assembler *Asm = Func->getAssembler<>();
1260 // TODO(stichnot): When sandboxing, defer binding the node label until just
1261 // before the first instruction is emitted, to reduce the chance that a
1262 // padding nop is a branch target.
1263 Asm->bindCfgNodeLabel(this);
1264 for (const Inst &I : Phis) {
1267 // Emitting a Phi instruction should cause an error.
1271 // Do the simple emission if not sandboxed.
1272 if (!getFlags().getUseSandboxing()) {
1273 for (const Inst &I : Insts) {
1274 if (!I.isDeleted() && !I.isRedundantAssign()) {
1276 updateStats(Func, &I);
1282 // The remainder of the function handles emission with sandboxing. There are
1283 // explicit bundle_lock regions delimited by bundle_lock and bundle_unlock
1284 // instructions. All other instructions are treated as an implicit
1285 // one-instruction bundle_lock region. Emission is done twice for each
1286 // bundle_lock region. The first pass is a preliminary pass, after which we
1287 // can figure out what nop padding is needed, then roll back, and make the
1290 // Ideally, the first pass would be speculative and the second pass would
1291 // only be done if nop padding were needed, but the structure of the
1292 // integrated assembler makes it hard to roll back the state of label
1293 // bindings, label links, and relocation fixups. Instead, the first pass just
1294 // disables all mutation of that state.
1296 BundleEmitHelper Helper(Asm, Insts);
1297 InstList::const_iterator End = Insts.end();
1298 // Retrying indicates that we had to roll back to the bundle_lock instruction
1299 // to apply padding before the bundle_lock sequence.
1300 bool Retrying = false;
1301 for (InstList::const_iterator I = Insts.begin(); I != End; ++I) {
1302 if (I->isDeleted() || I->isRedundantAssign())
1305 if (llvm::isa<InstBundleLock>(I)) {
1306 // Set up the initial bundle_lock state. This should not happen while
1307 // retrying, because the retry rolls back to the instruction following
1308 // the bundle_lock instruction.
1310 Helper.enterBundleLock(I);
1314 if (llvm::isa<InstBundleUnlock>(I)) {
1315 Helper.enterBundleUnlock();
1317 // Make sure all instructions are in the same bundle.
1318 assert(Helper.isSameBundle());
1319 // If align_to_end is specified, make sure the next instruction begins
1321 assert(!Helper.isAlignToEnd() || Helper.getPostAlignment() == 0);
1322 Helper.padForPadToEnd();
1323 Helper.leaveBundleLockRegion();
1326 // This is the first pass, so roll back for the retry pass.
1328 // Pad to the next bundle if the instruction sequence crossed a bundle
1330 Helper.padToNextBundle();
1331 // Insert additional padding to make AlignToEnd work.
1332 Helper.padForAlignToEnd();
1333 // Prepare for the retry pass after padding is done.
1335 I = Helper.getBundleLockStart();
1340 // I points to a non bundle_lock/bundle_unlock instruction.
1341 if (Helper.isInBundleLockRegion()) {
1343 // Only update stats during the final pass.
1345 updateStats(Func, iteratorToInst(I));
1347 // Treat it as though there were an implicit bundle_lock and
1348 // bundle_unlock wrapping the instruction.
1349 Helper.enterBundleLock(I);
1351 Helper.enterBundleUnlock();
1353 Helper.padToNextBundle();
1355 updateStats(Func, iteratorToInst(I));
1356 Helper.leaveBundleLockRegion();
1360 // Don't allow bundle locking across basic blocks, to keep the backtracking
1361 // mechanism simple.
1362 assert(!Helper.isInBundleLockRegion());
1366 void CfgNode::dump(Cfg *Func) const {
1367 if (!BuildDefs::dump())
1369 Func->setCurrentNode(this);
1370 Ostream &Str = Func->getContext()->getStrDump();
1371 Liveness *Liveness = Func->getLiveness();
1372 if (Func->isVerbose(IceV_Instructions) || Func->isVerbose(IceV_Loop))
1373 Str << getName() << ":\n";
1374 // Dump the loop nest depth
1375 if (Func->isVerbose(IceV_Loop))
1376 Str << " // LoopNestDepth = " << getLoopNestDepth() << "\n";
1377 // Dump list of predecessor nodes.
1378 if (Func->isVerbose(IceV_Preds) && !InEdges.empty()) {
1379 Str << " // preds = ";
1381 for (CfgNode *I : InEdges) {
1385 Str << "%" << I->getName();
1389 // Dump the live-in variables.
1390 if (Func->isVerbose(IceV_Liveness)) {
1391 if (Liveness != nullptr && !Liveness->getLiveIn(this).empty()) {
1392 const LivenessBV &LiveIn = Liveness->getLiveIn(this);
1393 Str << " // LiveIn:";
1394 for (SizeT i = 0; i < LiveIn.size(); ++i) {
1396 Variable *Var = Liveness->getVariable(i, this);
1397 Str << " %" << Var->getName();
1398 if (Func->isVerbose(IceV_RegOrigins) && Var->hasReg()) {
1400 << Func->getTarget()->getRegName(Var->getRegNum(),
1408 // Dump each instruction.
1409 if (Func->isVerbose(IceV_Instructions)) {
1410 for (const Inst &I : Phis)
1411 I.dumpDecorated(Func);
1412 for (const Inst &I : Insts)
1413 I.dumpDecorated(Func);
1415 // Dump the live-out variables.
1416 if (Func->isVerbose(IceV_Liveness)) {
1417 if (Liveness != nullptr && !Liveness->getLiveOut(this).empty()) {
1418 const LivenessBV &LiveOut = Liveness->getLiveOut(this);
1419 Str << " // LiveOut:";
1420 for (SizeT i = 0; i < LiveOut.size(); ++i) {
1422 Variable *Var = Liveness->getVariable(i, this);
1423 Str << " %" << Var->getName();
1424 if (Func->isVerbose(IceV_RegOrigins) && Var->hasReg()) {
1426 << Func->getTarget()->getRegName(Var->getRegNum(),
1434 // Dump list of successor nodes.
1435 if (Func->isVerbose(IceV_Succs)) {
1436 Str << " // succs = ";
1438 for (CfgNode *I : OutEdges) {
1442 Str << "%" << I->getName();
1448 void CfgNode::profileExecutionCount(VariableDeclaration *Var) {
1449 GlobalContext *Ctx = Func->getContext();
1450 GlobalString RMW_I64 = Ctx->getGlobalString("llvm.nacl.atomic.rmw.i64");
1452 bool BadIntrinsic = false;
1453 const Intrinsics::FullIntrinsicInfo *Info =
1454 Ctx->getIntrinsicsInfo().find(RMW_I64, BadIntrinsic);
1455 assert(!BadIntrinsic);
1456 assert(Info != nullptr);
1458 Operand *RMWI64Name = Ctx->getConstantExternSym(RMW_I64);
1459 constexpr RelocOffsetT Offset = 0;
1460 Constant *Counter = Ctx->getConstantSym(Offset, Var->getName());
1461 Constant *AtomicRMWOp = Ctx->getConstantInt32(Intrinsics::AtomicAdd);
1462 Constant *One = Ctx->getConstantInt64(1);
1463 Constant *OrderAcquireRelease =
1464 Ctx->getConstantInt32(Intrinsics::MemoryOrderAcquireRelease);
1466 auto *Instr = InstIntrinsicCall::create(
1467 Func, 5, Func->makeVariable(IceType_i64), RMWI64Name, Info->Info);
1468 Instr->addArg(AtomicRMWOp);
1469 Instr->addArg(Counter);
1471 Instr->addArg(OrderAcquireRelease);
1472 Insts.push_front(Instr);
1475 } // end of namespace Ice