1 //===- subzero/src/IceCfgNode.cpp - Basic block (node) implementation -----===//
3 // The Subzero Code Generator
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief Implements the CfgNode class, including the complexities of
12 /// instruction insertion and in-edge calculation.
14 //===----------------------------------------------------------------------===//
16 #include "IceCfgNode.h"
18 #include "IceAssembler.h"
20 #include "IceGlobalInits.h"
22 #include "IceInstVarIter.h"
23 #include "IceLiveness.h"
24 #include "IceOperand.h"
25 #include "IceTargetLowering.h"
29 CfgNode::CfgNode(Cfg *Func, SizeT LabelNumber)
30 : Func(Func), Number(LabelNumber), LabelNumber(LabelNumber) {}
32 // Returns the name the node was created with. If no name was given, it
33 // synthesizes a (hopefully) unique name.
34 IceString CfgNode::getName() const {
36 return Func->getIdentifierName(NameIndex);
37 return "__" + std::to_string(LabelNumber);
40 // Adds an instruction to either the Phi list or the regular instruction list.
41 // Validates that all Phis are added before all regular instructions.
42 void CfgNode::appendInst(Inst *Inst) {
44 if (auto *Phi = llvm::dyn_cast<InstPhi>(Inst)) {
46 Func->setError("Phi instruction added to the middle of a block");
51 Insts.push_back(Inst);
55 // Renumbers the non-deleted instructions in the node. This needs to be done in
56 // preparation for live range analysis. The instruction numbers in a block must
57 // be monotonically increasing. The range of instruction numbers in a block,
58 // from lowest to highest, must not overlap with the range of any other block.
59 void CfgNode::renumberInstructions() {
60 InstNumberT FirstNumber = Func->getNextInstNumber();
65 InstCountEstimate = Func->getNextInstNumber() - FirstNumber;
68 // When a node is created, the OutEdges are immediately known, but the InEdges
69 // have to be built up incrementally. After the CFG has been constructed, the
70 // computePredecessors() pass finalizes it by creating the InEdges list.
71 void CfgNode::computePredecessors() {
72 for (CfgNode *Succ : OutEdges)
73 Succ->InEdges.push_back(this);
76 void CfgNode::computeSuccessors() {
77 OutEdges = Insts.rbegin()->getTerminatorEdges();
80 // Validate each Phi instruction in the node with respect to control flow. For
81 // every phi argument, its label must appear in the predecessor list. For each
82 // predecessor, there must be a phi argument with that label. We don't check
83 // that phi arguments with the same label have the same value.
84 void CfgNode::validatePhis() {
85 for (Inst &Instr : Phis) {
86 auto *Phi = llvm::cast<InstPhi>(&Instr);
87 // We do a simple O(N^2) algorithm to check for consistency. Even so, it
88 // shows up as only about 0.2% of the total translation time. But if
89 // necessary, we could improve the complexity by using a hash table to
90 // count how many times each node is referenced in the Phi instruction, and
91 // how many times each node is referenced in the incoming edge list, and
92 // compare the two for equality.
93 for (SizeT i = 0; i < Phi->getSrcSize(); ++i) {
94 CfgNode *Label = Phi->getLabel(i);
96 for (CfgNode *InNode : getInEdges()) {
97 if (InNode == Label) {
103 llvm::report_fatal_error("Phi error: label for bad incoming edge");
105 for (CfgNode *InNode : getInEdges()) {
107 for (SizeT i = 0; i < Phi->getSrcSize(); ++i) {
108 CfgNode *Label = Phi->getLabel(i);
109 if (InNode == Label) {
115 llvm::report_fatal_error("Phi error: missing label for incoming edge");
120 // This does part 1 of Phi lowering, by creating a new dest variable for each
121 // Phi instruction, replacing the Phi instruction's dest with that variable,
122 // and adding an explicit assignment of the old dest to the new dest. For
126 // "a_phi=phi(...); a=a_phi".
128 // This is in preparation for part 2 which deletes the Phi instructions and
129 // appends assignment instructions to predecessor blocks. Note that this
130 // transformation preserves SSA form.
131 void CfgNode::placePhiLoads() {
132 for (Inst &I : Phis) {
133 auto *Phi = llvm::dyn_cast<InstPhi>(&I);
134 Insts.insert(Insts.begin(), Phi->lower(Func));
138 // This does part 2 of Phi lowering. For each Phi instruction at each out-edge,
139 // create a corresponding assignment instruction, and add all the assignments
140 // near the end of this block. They need to be added before any branch
141 // instruction, and also if the block ends with a compare instruction followed
142 // by a branch instruction that we may want to fuse, it's better to insert the
143 // new assignments before the compare instruction. The
144 // tryOptimizedCmpxchgCmpBr() method assumes this ordering of instructions.
146 // Note that this transformation takes the Phi dest variables out of SSA form,
147 // as there may be assignments to the dest variable in multiple blocks.
148 void CfgNode::placePhiStores() {
149 // Find the insertion point.
150 InstList::iterator InsertionPoint = Insts.end();
151 // Every block must end in a terminator instruction, and therefore must have
152 // at least one instruction, so it's valid to decrement InsertionPoint (but
153 // assert just in case).
154 assert(InsertionPoint != Insts.begin());
156 // Confirm that InsertionPoint is a terminator instruction. Calling
157 // getTerminatorEdges() on a non-terminator instruction will cause an
158 // llvm_unreachable().
159 (void)InsertionPoint->getTerminatorEdges();
160 // SafeInsertionPoint is always immediately before the terminator
161 // instruction. If the block ends in a compare and conditional branch, it's
162 // better to place the Phi store before the compare so as not to interfere
163 // with compare/branch fusing. However, if the compare instruction's dest
164 // operand is the same as the new assignment statement's source operand, this
165 // can't be done due to data dependences, so we need to fall back to the
166 // SafeInsertionPoint. To illustrate:
168 // %97 = load i8* %96, align 1
169 // %98 = icmp ne i8 %97, 0
170 // br i1 %98, label %99, label %2132
172 // %100 = phi i8 [ %97, %95 ], [ %110, %108 ]
173 // %101 = phi i1 [ %98, %95 ], [ %111, %108 ]
174 // would be Phi-lowered as:
176 // %97 = load i8* %96, align 1
177 // %100_phi = %97 ; can be at InsertionPoint
178 // %98 = icmp ne i8 %97, 0
179 // %101_phi = %98 ; must be at SafeInsertionPoint
180 // br i1 %98, label %99, label %2132
185 // TODO(stichnot): It may be possible to bypass this whole SafeInsertionPoint
186 // mechanism. If a source basic block ends in a conditional branch:
189 // br i1 %foo, label %labelTrue, label %labelFalse
190 // and a branch target has a Phi involving the branch operand:
192 // %bar = phi i1 [ %foo, %labelSource ], ...
193 // then we actually know the constant i1 value of the Phi operand:
195 // %bar = phi i1 [ true, %labelSource ], ...
196 // It seems that this optimization should be done by clang or opt, but we
197 // could also do it here.
198 InstList::iterator SafeInsertionPoint = InsertionPoint;
199 // Keep track of the dest variable of a compare instruction, so that we
200 // insert the new instruction at the SafeInsertionPoint if the compare's dest
201 // matches the Phi-lowered assignment's source.
202 Variable *CmpInstDest = nullptr;
203 // If the current insertion point is at a conditional branch instruction, and
204 // the previous instruction is a compare instruction, then we move the
205 // insertion point before the compare instruction so as not to interfere with
206 // compare/branch fusing.
207 if (auto *Branch = llvm::dyn_cast<InstBr>(InsertionPoint)) {
208 if (!Branch->isUnconditional()) {
209 if (InsertionPoint != Insts.begin()) {
211 if (llvm::isa<InstIcmp>(InsertionPoint) ||
212 llvm::isa<InstFcmp>(InsertionPoint)) {
213 CmpInstDest = InsertionPoint->getDest();
221 // Consider every out-edge.
222 for (CfgNode *Succ : OutEdges) {
223 // Consider every Phi instruction at the out-edge.
224 for (Inst &I : Succ->Phis) {
225 auto *Phi = llvm::dyn_cast<InstPhi>(&I);
226 Operand *Operand = Phi->getOperandForTarget(this);
228 Variable *Dest = I.getDest();
230 auto *NewInst = InstAssign::create(Func, Dest, Operand);
231 if (CmpInstDest == Operand)
232 Insts.insert(SafeInsertionPoint, NewInst);
234 Insts.insert(InsertionPoint, NewInst);
239 // Deletes the phi instructions after the loads and stores are placed.
240 void CfgNode::deletePhis() {
245 // Splits the edge from Pred to this node by creating a new node and hooking up
246 // the in and out edges appropriately. (The EdgeIndex parameter is only used to
247 // make the new node's name unique when there are multiple edges between the
248 // same pair of nodes.) The new node's instruction list is initialized to the
249 // empty list, with no terminator instruction. There must not be multiple edges
250 // from Pred to this node so all Inst::getTerminatorEdges implementations must
251 // not contain duplicates.
252 CfgNode *CfgNode::splitIncomingEdge(CfgNode *Pred, SizeT EdgeIndex) {
253 CfgNode *NewNode = Func->makeNode();
254 // Depth is the minimum as it works if both are the same, but if one is
255 // outside the loop and the other is inside, the new node should be placed
256 // outside and not be executed multiple times within the loop.
257 NewNode->setLoopNestDepth(
258 std::min(getLoopNestDepth(), Pred->getLoopNestDepth()));
259 if (BuildDefs::dump())
260 NewNode->setName("split_" + Pred->getName() + "_" + getName() + "_" +
261 std::to_string(EdgeIndex));
262 // The new node is added to the end of the node list, and will later need to
263 // be sorted into a reasonable topological order.
264 NewNode->setNeedsPlacement(true);
265 // Repoint Pred's out-edge.
267 for (CfgNode *&I : Pred->OutEdges) {
270 NewNode->InEdges.push_back(Pred);
277 // Repoint this node's in-edge.
279 for (CfgNode *&I : InEdges) {
282 NewNode->OutEdges.push_back(this);
289 // Repoint all suitable branch instructions' target and return.
291 for (Inst &I : Pred->getInsts())
292 if (!I.isDeleted() && I.repointEdges(this, NewNode))
301 // Helpers for advancedPhiLowering().
305 PhiDesc(const PhiDesc &) = delete;
306 PhiDesc &operator=(const PhiDesc &) = delete;
309 PhiDesc(InstPhi *Phi, Variable *Dest) : Phi(Phi), Dest(Dest) {}
310 PhiDesc(PhiDesc &&) = default;
311 InstPhi *Phi = nullptr;
312 Variable *Dest = nullptr;
313 Operand *Src = nullptr;
314 bool Processed = false;
315 size_t NumPred = 0; // number of entries whose Src is this Dest
316 int32_t Weight = 0; // preference for topological order
318 using PhiDescList = llvm::SmallVector<PhiDesc, 32>;
320 // Always pick NumPred=0 over NumPred>0.
321 constexpr int32_t WeightNoPreds = 4;
322 // Prefer Src as a register because the register might free up.
323 constexpr int32_t WeightSrcIsReg = 2;
324 // Prefer Dest not as a register because the register stays free longer.
325 constexpr int32_t WeightDestNotReg = 1;
327 bool sameVarOrReg(TargetLowering *Target, const Variable *Var1,
328 const Operand *Opnd) {
331 const auto *Var2 = llvm::dyn_cast<Variable>(Opnd);
335 // If either operand lacks a register, they cannot be the same.
341 int32_t RegNum1 = Var1->getRegNum();
342 int32_t RegNum2 = Var2->getRegNum();
343 // Quick common-case check.
344 if (RegNum1 == RegNum2)
347 assert(Target->getAliasesForRegister(RegNum1)[RegNum2] ==
348 Target->getAliasesForRegister(RegNum2)[RegNum1]);
349 return Target->getAliasesForRegister(RegNum1)[RegNum2];
352 // Update NumPred for all Phi assignments using Var as their Dest variable.
353 // Also update Weight if NumPred dropped from 1 to 0.
354 void updatePreds(PhiDescList &Desc, TargetLowering *Target, Variable *Var) {
355 for (PhiDesc &Item : Desc) {
356 if (!Item.Processed && sameVarOrReg(Target, Var, Item.Dest)) {
357 if (--Item.NumPred == 0) {
358 Item.Weight += WeightNoPreds;
364 } // end of anonymous namespace
366 // This the "advanced" version of Phi lowering for a basic block, in contrast
367 // to the simple version that lowers through assignments involving temporaries.
369 // All Phi instructions in a basic block are conceptually executed in parallel.
370 // However, if we lower Phis early and commit to a sequential ordering, we may
371 // end up creating unnecessary interferences which lead to worse register
372 // allocation. Delaying Phi scheduling until after register allocation can help
373 // unless there are no free registers for shuffling registers or stack slots
374 // and spilling becomes necessary.
376 // The advanced Phi lowering starts by finding a topological sort of the Phi
377 // instructions, where "A=B" comes before "B=C" due to the anti-dependence on
378 // B. Preexisting register assignments are considered in the topological sort.
379 // If a topological sort is not possible due to a cycle, the cycle is broken by
380 // introducing a non-parallel temporary. For example, a cycle arising from a
381 // permutation like "A=B;B=C;C=A" can become "T=A;A=B;B=C;C=T". All else being
382 // equal, prefer to schedule assignments with register-allocated Src operands
383 // earlier, in case that register becomes free afterwards, and prefer to
384 // schedule assignments with register-allocated Dest variables later, to keep
385 // that register free for longer.
387 // Once the ordering is determined, the Cfg edge is split and the assignment
388 // list is lowered by the target lowering layer. Since the assignment lowering
389 // may create new infinite-weight temporaries, a follow-on register allocation
390 // pass will be needed. To prepare for this, liveness (including live range
391 // calculation) of the split nodes needs to be calculated, and liveness of the
392 // original node need to be updated to "undo" the effects of the phi
395 // The specific placement of the new node within the Cfg node list is deferred
396 // until later, including after empty node contraction.
398 // After phi assignments are lowered across all blocks, another register
399 // allocation pass is run, focusing only on pre-colored and infinite-weight
400 // variables, similar to Om1 register allocation (except without the need to
401 // specially compute these variables' live ranges, since they have already been
402 // precisely calculated). The register allocator in this mode needs the ability
403 // to forcibly spill and reload registers in case none are naturally available.
404 void CfgNode::advancedPhiLowering() {
405 if (getPhis().empty())
410 for (Inst &I : Phis) {
411 auto *Phi = llvm::dyn_cast<InstPhi>(&I);
412 if (!Phi->isDeleted()) {
413 Variable *Dest = Phi->getDest();
414 Desc.emplace_back(Phi, Dest);
415 // Undo the effect of the phi instruction on this node's live-in set by
416 // marking the phi dest variable as live on entry.
417 SizeT VarNum = Func->getLiveness()->getLiveIndex(Dest->getIndex());
418 assert(!Func->getLiveness()->getLiveIn(this)[VarNum]);
419 Func->getLiveness()->getLiveIn(this)[VarNum] = true;
426 TargetLowering *Target = Func->getTarget();
427 SizeT InEdgeIndex = 0;
428 for (CfgNode *Pred : InEdges) {
429 CfgNode *Split = splitIncomingEdge(Pred, InEdgeIndex++);
430 SizeT Remaining = Desc.size();
432 // First pass computes Src and initializes NumPred.
433 for (PhiDesc &Item : Desc) {
434 Variable *Dest = Item.Dest;
435 Operand *Src = Item.Phi->getOperandForTarget(Pred);
437 Item.Processed = false;
439 // Cherry-pick any trivial assignments, so that they don't contribute to
440 // the running complexity of the topological sort.
441 if (sameVarOrReg(Target, Dest, Src)) {
442 Item.Processed = true;
445 // If Dest and Src are syntactically the same, don't bother adding
446 // the assignment, because in all respects it would be redundant, and
447 // if Dest/Src are on the stack, the target lowering may naively
448 // decide to lower it using a temporary register.
449 Split->appendInst(InstAssign::create(Func, Dest, Src));
452 // Second pass computes NumPred by comparing every pair of Phi instructions.
453 for (PhiDesc &Item : Desc) {
456 const Variable *Dest = Item.Dest;
457 for (PhiDesc &Item2 : Desc) {
460 // There shouldn't be two different Phis with the same Dest variable or
462 assert((&Item == &Item2) || !sameVarOrReg(Target, Dest, Item2.Dest));
463 if (sameVarOrReg(Target, Dest, Item2.Src))
468 // Another pass to compute initial Weight values.
469 for (PhiDesc &Item : Desc) {
473 if (Item.NumPred == 0)
474 Weight += WeightNoPreds;
475 if (auto *Var = llvm::dyn_cast<Variable>(Item.Src))
477 Weight += WeightSrcIsReg;
478 if (!Item.Dest->hasReg())
479 Weight += WeightDestNotReg;
480 Item.Weight = Weight;
483 // Repeatedly choose and process the best candidate in the topological sort,
484 // until no candidates remain. This implementation is O(N^2) where N is the
485 // number of Phi instructions, but with a small constant factor compared to
486 // a likely implementation of O(N) topological sort.
487 for (; Remaining; --Remaining) {
488 int32_t BestWeight = -1;
489 PhiDesc *BestItem = nullptr;
490 // Find the best candidate.
491 for (PhiDesc &Item : Desc) {
495 Weight = Item.Weight;
496 if (Weight > BestWeight) {
501 assert(BestWeight >= 0);
502 assert(BestItem->NumPred <= 1);
503 Variable *Dest = BestItem->Dest;
504 Operand *Src = BestItem->Src;
505 assert(!sameVarOrReg(Target, Dest, Src));
506 // Break a cycle by introducing a temporary.
507 if (BestItem->NumPred) {
509 // If the target instruction "A=B" is part of a cycle, find the "X=A"
510 // assignment in the cycle because it will have to be rewritten as
512 for (PhiDesc &Item : Desc) {
515 Operand *OtherSrc = Item.Src;
516 if (Item.NumPred && sameVarOrReg(Target, Dest, OtherSrc)) {
517 SizeT VarNum = Func->getNumVariables();
518 Variable *Tmp = Func->makeVariable(OtherSrc->getType());
519 if (BuildDefs::dump())
520 Tmp->setName(Func, "__split_" + std::to_string(VarNum));
521 Split->appendInst(InstAssign::create(Func, Tmp, OtherSrc));
523 updatePreds(Desc, Target, llvm::cast<Variable>(OtherSrc));
531 // Now that a cycle (if any) has been broken, create the actual
533 Split->appendInst(InstAssign::create(Func, Dest, Src));
534 if (auto *Var = llvm::dyn_cast<Variable>(Src))
535 updatePreds(Desc, Target, Var);
536 BestItem->Processed = true;
538 Split->appendInst(InstBr::create(Func, this));
541 Func->getVMetadata()->addNode(Split);
542 // Validate to be safe. All items should be marked as processed, and have
544 if (BuildDefs::asserts()) {
545 for (PhiDesc &Item : Desc) {
547 assert(Item.Processed);
548 assert(Item.NumPred == 0);
554 // Does address mode optimization. Pass each instruction to the TargetLowering
555 // object. If it returns a new instruction (representing the optimized address
556 // mode), then insert the new instruction and delete the old.
557 void CfgNode::doAddressOpt() {
558 TargetLowering *Target = Func->getTarget();
559 LoweringContext &Context = Target->getContext();
561 while (!Context.atEnd()) {
562 Target->doAddressOpt();
566 void CfgNode::doNopInsertion(RandomNumberGenerator &RNG) {
567 TargetLowering *Target = Func->getTarget();
568 LoweringContext &Context = Target->getContext();
570 Context.setInsertPoint(Context.getCur());
571 // Do not insert nop in bundle locked instructions.
572 bool PauseNopInsertion = false;
573 while (!Context.atEnd()) {
574 if (llvm::isa<InstBundleLock>(Context.getCur())) {
575 PauseNopInsertion = true;
576 } else if (llvm::isa<InstBundleUnlock>(Context.getCur())) {
577 PauseNopInsertion = false;
579 if (!PauseNopInsertion)
580 Target->doNopInsertion(RNG);
581 // Ensure Cur=Next, so that the nops are inserted before the current
582 // instruction rather than after.
583 Context.advanceCur();
584 Context.advanceNext();
588 // Drives the target lowering. Passes the current instruction and the next
589 // non-deleted instruction for target lowering.
590 void CfgNode::genCode() {
591 TargetLowering *Target = Func->getTarget();
592 LoweringContext &Context = Target->getContext();
593 // Lower the regular instructions.
595 Target->initNodeForLowering(this);
596 while (!Context.atEnd()) {
597 InstList::iterator Orig = Context.getCur();
598 if (llvm::isa<InstRet>(*Orig))
601 // Ensure target lowering actually moved the cursor.
602 assert(Context.getCur() != Orig);
604 Context.availabilityReset();
605 // Do preliminary lowering of the Phi instructions.
606 Target->prelowerPhis();
609 void CfgNode::livenessLightweight() {
610 SizeT NumVars = Func->getNumVariables();
611 LivenessBV Live(NumVars);
612 // Process regular instructions in reverse order.
613 for (Inst &I : reverse_range(Insts)) {
616 I.livenessLightweight(Func, Live);
618 for (Inst &I : Phis) {
621 I.livenessLightweight(Func, Live);
625 // Performs liveness analysis on the block. Returns true if the incoming
626 // liveness changed from before, false if it stayed the same. (If it changes,
627 // the node's predecessors need to be processed again.)
628 bool CfgNode::liveness(Liveness *Liveness) {
629 SizeT NumVars = Liveness->getNumVarsInNode(this);
630 LivenessBV Live(NumVars);
631 LiveBeginEndMap *LiveBegin = nullptr;
632 LiveBeginEndMap *LiveEnd = nullptr;
633 // Mark the beginning and ending of each variable's live range with the
634 // sentinel instruction number 0.
635 if (Liveness->getMode() == Liveness_Intervals) {
636 LiveBegin = Liveness->getLiveBegin(this);
637 LiveEnd = Liveness->getLiveEnd(this);
640 // Guess that the number of live ranges beginning is roughly the number of
641 // instructions, and same for live ranges ending.
642 LiveBegin->reserve(getInstCountEstimate());
643 LiveEnd->reserve(getInstCountEstimate());
645 // Initialize Live to be the union of all successors' LiveIn.
646 for (CfgNode *Succ : OutEdges) {
647 Live |= Liveness->getLiveIn(Succ);
648 // Mark corresponding argument of phis in successor as live.
649 for (Inst &I : Succ->Phis) {
652 auto *Phi = llvm::dyn_cast<InstPhi>(&I);
653 Phi->livenessPhiOperand(Live, this, Liveness);
656 Liveness->getLiveOut(this) = Live;
658 // Process regular instructions in reverse order.
659 for (Inst &I : reverse_range(Insts)) {
662 I.liveness(I.getNumber(), Live, Liveness, LiveBegin, LiveEnd);
664 // Process phis in forward order so that we can override the instruction
665 // number to be that of the earliest phi instruction in the block.
666 SizeT NumNonDeadPhis = 0;
667 InstNumberT FirstPhiNumber = Inst::NumberSentinel;
668 for (Inst &I : Phis) {
671 if (FirstPhiNumber == Inst::NumberSentinel)
672 FirstPhiNumber = I.getNumber();
673 if (I.liveness(FirstPhiNumber, Live, Liveness, LiveBegin, LiveEnd))
677 // When using the sparse representation, after traversing the instructions in
678 // the block, the Live bitvector should only contain set bits for global
679 // variables upon block entry. We validate this by shrinking the Live vector
680 // and then testing it against the pre-shrunk version. (The shrinking is
681 // required, but the validation is not.)
682 LivenessBV LiveOrig = Live;
683 Live.resize(Liveness->getNumGlobalVars());
684 if (Live != LiveOrig) {
685 if (BuildDefs::dump()) {
686 // This is a fatal liveness consistency error. Print some diagnostics and
688 Ostream &Str = Func->getContext()->getStrDump();
689 Func->resetCurrentNode();
690 Str << "LiveOrig-Live =";
691 for (SizeT i = Live.size(); i < LiveOrig.size(); ++i) {
692 if (LiveOrig.test(i)) {
694 Liveness->getVariable(i, this)->dump(Func);
699 llvm::report_fatal_error("Fatal inconsistency in liveness analysis");
702 bool Changed = false;
703 LivenessBV &LiveIn = Liveness->getLiveIn(this);
704 // Add in current LiveIn
706 // Check result, set LiveIn=Live
707 SizeT &PrevNumNonDeadPhis = Liveness->getNumNonDeadPhis(this);
708 bool LiveInChanged = (Live != LiveIn);
709 Changed = (NumNonDeadPhis != PrevNumNonDeadPhis || LiveInChanged);
712 PrevNumNonDeadPhis = NumNonDeadPhis;
716 // Validate the integrity of the live ranges in this block. If there are any
717 // errors, it prints details and returns false. On success, it returns true.
718 bool CfgNode::livenessValidateIntervals(Liveness *Liveness) const {
719 if (!BuildDefs::asserts())
722 // Verify there are no duplicates.
724 [](const LiveBeginEndMapEntry &A, const LiveBeginEndMapEntry &B) {
725 return A.first == B.first;
727 LiveBeginEndMap &MapBegin = *Liveness->getLiveBegin(this);
728 LiveBeginEndMap &MapEnd = *Liveness->getLiveEnd(this);
729 if (std::adjacent_find(MapBegin.begin(), MapBegin.end(), ComparePair) ==
731 std::adjacent_find(MapEnd.begin(), MapEnd.end(), ComparePair) ==
735 // There is definitely a liveness error. All paths from here return false.
736 if (!BuildDefs::dump())
739 // Print all the errors.
740 if (BuildDefs::dump()) {
741 GlobalContext *Ctx = Func->getContext();
742 OstreamLocker L(Ctx);
743 Ostream &Str = Ctx->getStrDump();
744 if (Func->isVerbose()) {
745 Str << "Live range errors in the following block:\n";
748 for (auto Start = MapBegin.begin();
749 (Start = std::adjacent_find(Start, MapBegin.end(), ComparePair)) !=
752 auto Next = Start + 1;
753 Str << "Duplicate LR begin, block " << getName() << ", instructions "
754 << Start->second << " & " << Next->second << ", variable "
755 << Liveness->getVariable(Start->first, this)->getName(Func) << "\n";
757 for (auto Start = MapEnd.begin();
758 (Start = std::adjacent_find(Start, MapEnd.end(), ComparePair)) !=
761 auto Next = Start + 1;
762 Str << "Duplicate LR end, block " << getName() << ", instructions "
763 << Start->second << " & " << Next->second << ", variable "
764 << Liveness->getVariable(Start->first, this)->getName(Func) << "\n";
771 // Once basic liveness is complete, compute actual live ranges. It is assumed
772 // that within a single basic block, a live range begins at most once and ends
773 // at most once. This is certainly true for pure SSA form. It is also true once
774 // phis are lowered, since each assignment to the phi-based temporary is in a
775 // different basic block, and there is a single read that ends the live in the
776 // basic block that contained the actual phi instruction.
777 void CfgNode::livenessAddIntervals(Liveness *Liveness, InstNumberT FirstInstNum,
778 InstNumberT LastInstNum) {
779 TimerMarker T1(TimerStack::TT_liveRange, Func);
781 SizeT NumVars = Liveness->getNumVarsInNode(this);
782 LivenessBV &LiveIn = Liveness->getLiveIn(this);
783 LivenessBV &LiveOut = Liveness->getLiveOut(this);
784 LiveBeginEndMap &MapBegin = *Liveness->getLiveBegin(this);
785 LiveBeginEndMap &MapEnd = *Liveness->getLiveEnd(this);
786 std::sort(MapBegin.begin(), MapBegin.end());
787 std::sort(MapEnd.begin(), MapEnd.end());
789 if (!livenessValidateIntervals(Liveness)) {
790 llvm::report_fatal_error("livenessAddIntervals: Liveness error");
794 LivenessBV LiveInAndOut = LiveIn;
795 LiveInAndOut &= LiveOut;
797 // Iterate in parallel across the sorted MapBegin[] and MapEnd[].
798 auto IBB = MapBegin.begin(), IEB = MapEnd.begin();
799 auto IBE = MapBegin.end(), IEE = MapEnd.end();
800 while (IBB != IBE || IEB != IEE) {
801 SizeT i1 = IBB == IBE ? NumVars : IBB->first;
802 SizeT i2 = IEB == IEE ? NumVars : IEB->first;
803 SizeT i = std::min(i1, i2);
804 // i1 is the Variable number of the next MapBegin entry, and i2 is the
805 // Variable number of the next MapEnd entry. If i1==i2, then the Variable's
806 // live range begins and ends in this block. If i1<i2, then i1's live range
807 // begins at instruction IBB->second and extends through the end of the
808 // block. If i1>i2, then i2's live range begins at the first instruction of
809 // the block and ends at IEB->second. In any case, we choose the lesser of
810 // i1 and i2 and proceed accordingly.
811 InstNumberT LB = i == i1 ? IBB->second : FirstInstNum;
812 InstNumberT LE = i == i2 ? IEB->second : LastInstNum + 1;
814 Variable *Var = Liveness->getVariable(i, this);
816 Var->addLiveRange(FirstInstNum, LE);
817 Var->addLiveRange(LB, LastInstNum + 1);
818 // Assert that Var is a global variable by checking that its liveness
819 // index is less than the number of globals. This ensures that the
820 // LiveInAndOut[] access is valid.
821 assert(i < Liveness->getNumGlobalVars());
822 LiveInAndOut[i] = false;
824 Var->addLiveRange(LB, LE);
831 // Process the variables that are live across the entire block.
832 for (int i = LiveInAndOut.find_first(); i != -1;
833 i = LiveInAndOut.find_next(i)) {
834 Variable *Var = Liveness->getVariable(i, this);
835 if (Liveness->getRangeMask(Var->getIndex()))
836 Var->addLiveRange(FirstInstNum, LastInstNum + 1);
840 // If this node contains only deleted instructions, and ends in an
841 // unconditional branch, contract the node by repointing all its in-edges to
843 void CfgNode::contractIfEmpty() {
846 Inst *Branch = nullptr;
847 for (Inst &I : Insts) {
850 if (I.isUnconditionalBranch())
852 else if (!I.isRedundantAssign())
855 // Make sure there is actually a successor to repoint in-edges to.
856 if (OutEdges.empty())
858 assert(OutEdges.size() == 1);
859 // Don't try to delete a self-loop.
860 if (OutEdges[0] == this)
862 // Make sure the node actually contains (ends with) an unconditional branch.
863 if (Branch == nullptr)
866 Branch->setDeleted();
867 CfgNode *Successor = OutEdges.front();
868 // Repoint all this node's in-edges to this node's successor, unless this
869 // node's successor is actually itself (in which case the statement
870 // "OutEdges.front()->InEdges.push_back(Pred)" could invalidate the iterator
871 // over this->InEdges).
872 if (Successor != this) {
873 for (CfgNode *Pred : InEdges) {
874 for (CfgNode *&I : Pred->OutEdges) {
877 Successor->InEdges.push_back(Pred);
880 for (Inst &I : Pred->getInsts()) {
882 I.repointEdges(this, Successor);
886 // Remove the in-edge to the successor to allow node reordering to make
887 // better decisions. For example it's more helpful to place a node after a
888 // reachable predecessor than an unreachable one (like the one we just
890 Successor->InEdges.erase(
891 std::find(Successor->InEdges.begin(), Successor->InEdges.end(), this));
896 void CfgNode::doBranchOpt(const CfgNode *NextNode) {
897 TargetLowering *Target = Func->getTarget();
898 // Find the first opportunity for branch optimization (which will be the last
899 // instruction in the block) and stop. This is sufficient unless there is
900 // some target lowering where we have the possibility of multiple
901 // optimizations per block. Take care with switch lowering as there are
902 // multiple unconditional branches and only the last can be deleted.
903 for (Inst &I : reverse_range(Insts)) {
904 if (!I.isDeleted()) {
905 Target->doBranchOpt(&I, NextNode);
911 // ======================== Dump routines ======================== //
915 // Helper functions for emit().
917 void emitRegisterUsage(Ostream &Str, const Cfg *Func, const CfgNode *Node,
918 bool IsLiveIn, CfgVector<SizeT> &LiveRegCount) {
919 if (!BuildDefs::dump())
921 Liveness *Liveness = Func->getLiveness();
922 const LivenessBV *Live;
923 const int32_t StackReg = Func->getTarget()->getStackReg();
924 const int32_t FrameOrStackReg = Func->getTarget()->getFrameOrStackReg();
926 Live = &Liveness->getLiveIn(Node);
927 Str << "\t\t\t\t/* LiveIn=";
929 Live = &Liveness->getLiveOut(Node);
930 Str << "\t\t\t\t/* LiveOut=";
932 if (!Live->empty()) {
933 CfgVector<Variable *> LiveRegs;
934 for (SizeT i = 0; i < Live->size(); ++i) {
937 Variable *Var = Liveness->getVariable(i, Node);
940 const int32_t RegNum = Var->getRegNum();
941 if (RegNum == StackReg || RegNum == FrameOrStackReg)
944 ++LiveRegCount[RegNum];
945 LiveRegs.push_back(Var);
947 // Sort the variables by regnum so they are always printed in a familiar
949 std::sort(LiveRegs.begin(), LiveRegs.end(),
950 [](const Variable *V1, const Variable *V2) {
951 return V1->getRegNum() < V2->getRegNum();
954 for (Variable *Var : LiveRegs) {
964 /// Returns true if some text was emitted - in which case the caller definitely
965 /// needs to emit a newline character.
966 bool emitLiveRangesEnded(Ostream &Str, const Cfg *Func, const Inst *Instr,
967 CfgVector<SizeT> &LiveRegCount) {
968 bool Printed = false;
969 if (!BuildDefs::dump())
971 Variable *Dest = Instr->getDest();
972 // Normally we increment the live count for the dest register. But we
973 // shouldn't if the instruction's IsDestRedefined flag is set, because this
974 // means that the target lowering created this instruction as a non-SSA
975 // assignment; i.e., a different, previous instruction started the dest
976 // variable's live range.
977 if (!Instr->isDestRedefined() && Dest && Dest->hasReg())
978 ++LiveRegCount[Dest->getRegNum()];
979 FOREACH_VAR_IN_INST(Var, *Instr) {
980 bool ShouldReport = Instr->isLastUse(Var);
981 if (ShouldReport && Var->hasReg()) {
982 // Don't report end of live range until the live count reaches 0.
983 SizeT NewCount = --LiveRegCount[Var->getRegNum()];
985 ShouldReport = false;
1001 void updateStats(Cfg *Func, const Inst *I) {
1002 if (!BuildDefs::dump())
1004 // Update emitted instruction count, plus fill/spill count for Variable
1005 // operands without a physical register.
1006 if (uint32_t Count = I->getEmitInstCount()) {
1007 Func->getContext()->statsUpdateEmitted(Count);
1008 if (Variable *Dest = I->getDest()) {
1009 if (!Dest->hasReg())
1010 Func->getContext()->statsUpdateFills();
1012 for (SizeT S = 0; S < I->getSrcSize(); ++S) {
1013 if (auto *Src = llvm::dyn_cast<Variable>(I->getSrc(S))) {
1015 Func->getContext()->statsUpdateSpills();
1021 } // end of anonymous namespace
1023 void CfgNode::emit(Cfg *Func) const {
1024 if (!BuildDefs::dump())
1026 Func->setCurrentNode(this);
1027 Ostream &Str = Func->getContext()->getStrEmit();
1028 Liveness *Liveness = Func->getLiveness();
1029 const bool DecorateAsm =
1030 Liveness && Func->getContext()->getFlags().getDecorateAsm();
1031 Str << getAsmName() << ":\n";
1032 // LiveRegCount keeps track of the number of currently live variables that
1033 // each register is assigned to. Normally that would be only 0 or 1, but the
1034 // register allocator's AllowOverlap inference allows it to be greater than 1
1035 // for short periods.
1036 CfgVector<SizeT> LiveRegCount(Func->getTarget()->getNumRegisters());
1038 constexpr bool IsLiveIn = true;
1039 emitRegisterUsage(Str, Func, this, IsLiveIn, LiveRegCount);
1040 if (getInEdges().size()) {
1041 Str << "\t\t\t\t/* preds=";
1043 for (CfgNode *I : getInEdges()) {
1047 Str << "$" << I->getName();
1051 if (getLoopNestDepth()) {
1052 Str << "\t\t\t\t/* loop depth=" << getLoopNestDepth() << " */\n";
1056 for (const Inst &I : Phis) {
1059 // Emitting a Phi instruction should cause an error.
1062 for (const Inst &I : Insts) {
1065 if (I.isRedundantAssign()) {
1066 // Usually, redundant assignments end the live range of the src variable
1067 // and begin the live range of the dest variable, with no net effect on
1068 // the liveness of their register. However, if the register allocator
1069 // infers the AllowOverlap condition, then this may be a redundant
1070 // assignment that does not end the src variable's live range, in which
1071 // case the active variable count for that register needs to be bumped.
1072 // That normally would have happened as part of emitLiveRangesEnded(),
1073 // but that isn't called for redundant assignments.
1074 Variable *Dest = I.getDest();
1075 if (DecorateAsm && Dest->hasReg()) {
1076 ++LiveRegCount[Dest->getRegNum()];
1077 if (I.isLastUse(I.getSrc(0)))
1078 --LiveRegCount[llvm::cast<Variable>(I.getSrc(0))->getRegNum()];
1083 bool Printed = false;
1085 Printed = emitLiveRangesEnded(Str, Func, &I, LiveRegCount);
1086 if (Printed || llvm::isa<InstTarget>(&I))
1088 updateStats(Func, &I);
1091 constexpr bool IsLiveIn = false;
1092 emitRegisterUsage(Str, Func, this, IsLiveIn, LiveRegCount);
1096 // Helper class for emitIAS().
1098 class BundleEmitHelper {
1099 BundleEmitHelper() = delete;
1100 BundleEmitHelper(const BundleEmitHelper &) = delete;
1101 BundleEmitHelper &operator=(const BundleEmitHelper &) = delete;
1104 BundleEmitHelper(Assembler *Asm, const InstList &Insts)
1105 : Asm(Asm), End(Insts.end()), BundleLockStart(End),
1106 BundleSize(1 << Asm->getBundleAlignLog2Bytes()),
1107 BundleMaskLo(BundleSize - 1), BundleMaskHi(~BundleMaskLo) {}
1108 // Check whether we're currently within a bundle_lock region.
1109 bool isInBundleLockRegion() const { return BundleLockStart != End; }
1110 // Check whether the current bundle_lock region has the align_to_end option.
1111 bool isAlignToEnd() const {
1112 assert(isInBundleLockRegion());
1113 return llvm::cast<InstBundleLock>(getBundleLockStart())->getOption() ==
1114 InstBundleLock::Opt_AlignToEnd;
1116 bool isPadToEnd() const {
1117 assert(isInBundleLockRegion());
1118 return llvm::cast<InstBundleLock>(getBundleLockStart())->getOption() ==
1119 InstBundleLock::Opt_PadToEnd;
1121 // Check whether the entire bundle_lock region falls within the same bundle.
1122 bool isSameBundle() const {
1123 assert(isInBundleLockRegion());
1124 return SizeSnapshotPre == SizeSnapshotPost ||
1125 (SizeSnapshotPre & BundleMaskHi) ==
1126 ((SizeSnapshotPost - 1) & BundleMaskHi);
1128 // Get the bundle alignment of the first instruction of the bundle_lock
1130 intptr_t getPreAlignment() const {
1131 assert(isInBundleLockRegion());
1132 return SizeSnapshotPre & BundleMaskLo;
1134 // Get the bundle alignment of the first instruction past the bundle_lock
1136 intptr_t getPostAlignment() const {
1137 assert(isInBundleLockRegion());
1138 return SizeSnapshotPost & BundleMaskLo;
1140 // Get the iterator pointing to the bundle_lock instruction, e.g. to roll
1141 // back the instruction iteration to that point.
1142 InstList::const_iterator getBundleLockStart() const {
1143 assert(isInBundleLockRegion());
1144 return BundleLockStart;
1146 // Set up bookkeeping when the bundle_lock instruction is first processed.
1147 void enterBundleLock(InstList::const_iterator I) {
1148 assert(!isInBundleLockRegion());
1149 BundleLockStart = I;
1150 SizeSnapshotPre = Asm->getBufferSize();
1151 Asm->setPreliminary(true);
1152 assert(isInBundleLockRegion());
1154 // Update bookkeeping when the bundle_unlock instruction is processed.
1155 void enterBundleUnlock() {
1156 assert(isInBundleLockRegion());
1157 SizeSnapshotPost = Asm->getBufferSize();
1159 // Update bookkeeping when we are completely finished with the bundle_lock
1161 void leaveBundleLockRegion() { BundleLockStart = End; }
1162 // Check whether the instruction sequence fits within the current bundle, and
1163 // if not, add nop padding to the end of the current bundle.
1164 void padToNextBundle() {
1165 assert(isInBundleLockRegion());
1166 if (!isSameBundle()) {
1167 intptr_t PadToNextBundle = BundleSize - getPreAlignment();
1168 Asm->padWithNop(PadToNextBundle);
1169 SizeSnapshotPre += PadToNextBundle;
1170 SizeSnapshotPost += PadToNextBundle;
1171 assert((Asm->getBufferSize() & BundleMaskLo) == 0);
1172 assert(Asm->getBufferSize() == SizeSnapshotPre);
1175 // If align_to_end is specified, add padding such that the instruction
1176 // sequences ends precisely at a bundle boundary.
1177 void padForAlignToEnd() {
1178 assert(isInBundleLockRegion());
1179 if (isAlignToEnd()) {
1180 if (intptr_t Offset = getPostAlignment()) {
1181 Asm->padWithNop(BundleSize - Offset);
1182 SizeSnapshotPre = Asm->getBufferSize();
1186 // If pad_to_end is specified, add padding such that the first instruction
1187 // after the instruction sequence starts at a bundle boundary.
1188 void padForPadToEnd() {
1189 assert(isInBundleLockRegion());
1191 if (intptr_t Offset = getPostAlignment()) {
1192 Asm->padWithNop(BundleSize - Offset);
1193 SizeSnapshotPre = Asm->getBufferSize();
1196 } // Update bookkeeping when rolling back for the second pass.
1198 assert(isInBundleLockRegion());
1199 Asm->setBufferSize(SizeSnapshotPre);
1200 Asm->setPreliminary(false);
1204 Assembler *const Asm;
1205 // End is a sentinel value such that BundleLockStart==End implies that we are
1206 // not in a bundle_lock region.
1207 const InstList::const_iterator End;
1208 InstList::const_iterator BundleLockStart;
1209 const intptr_t BundleSize;
1210 // Masking with BundleMaskLo identifies an address's bundle offset.
1211 const intptr_t BundleMaskLo;
1212 // Masking with BundleMaskHi identifies an address's bundle.
1213 const intptr_t BundleMaskHi;
1214 intptr_t SizeSnapshotPre = 0;
1215 intptr_t SizeSnapshotPost = 0;
1218 } // end of anonymous namespace
1220 void CfgNode::emitIAS(Cfg *Func) const {
1221 Func->setCurrentNode(this);
1222 Assembler *Asm = Func->getAssembler<>();
1223 // TODO(stichnot): When sandboxing, defer binding the node label until just
1224 // before the first instruction is emitted, to reduce the chance that a
1225 // padding nop is a branch target.
1226 Asm->bindCfgNodeLabel(this);
1227 for (const Inst &I : Phis) {
1230 // Emitting a Phi instruction should cause an error.
1234 // Do the simple emission if not sandboxed.
1235 if (!Func->getContext()->getFlags().getUseSandboxing()) {
1236 for (const Inst &I : Insts) {
1237 if (!I.isDeleted() && !I.isRedundantAssign()) {
1239 updateStats(Func, &I);
1245 // The remainder of the function handles emission with sandboxing. There are
1246 // explicit bundle_lock regions delimited by bundle_lock and bundle_unlock
1247 // instructions. All other instructions are treated as an implicit
1248 // one-instruction bundle_lock region. Emission is done twice for each
1249 // bundle_lock region. The first pass is a preliminary pass, after which we
1250 // can figure out what nop padding is needed, then roll back, and make the
1253 // Ideally, the first pass would be speculative and the second pass would
1254 // only be done if nop padding were needed, but the structure of the
1255 // integrated assembler makes it hard to roll back the state of label
1256 // bindings, label links, and relocation fixups. Instead, the first pass just
1257 // disables all mutation of that state.
1259 BundleEmitHelper Helper(Asm, Insts);
1260 InstList::const_iterator End = Insts.end();
1261 // Retrying indicates that we had to roll back to the bundle_lock instruction
1262 // to apply padding before the bundle_lock sequence.
1263 bool Retrying = false;
1264 for (InstList::const_iterator I = Insts.begin(); I != End; ++I) {
1265 if (I->isDeleted() || I->isRedundantAssign())
1268 if (llvm::isa<InstBundleLock>(I)) {
1269 // Set up the initial bundle_lock state. This should not happen while
1270 // retrying, because the retry rolls back to the instruction following
1271 // the bundle_lock instruction.
1273 Helper.enterBundleLock(I);
1277 if (llvm::isa<InstBundleUnlock>(I)) {
1278 Helper.enterBundleUnlock();
1280 // Make sure all instructions are in the same bundle.
1281 assert(Helper.isSameBundle());
1282 // If align_to_end is specified, make sure the next instruction begins
1284 assert(!Helper.isAlignToEnd() || Helper.getPostAlignment() == 0);
1285 Helper.padForPadToEnd();
1286 Helper.leaveBundleLockRegion();
1289 // This is the first pass, so roll back for the retry pass.
1291 // Pad to the next bundle if the instruction sequence crossed a bundle
1293 Helper.padToNextBundle();
1294 // Insert additional padding to make AlignToEnd work.
1295 Helper.padForAlignToEnd();
1296 // Prepare for the retry pass after padding is done.
1298 I = Helper.getBundleLockStart();
1303 // I points to a non bundle_lock/bundle_unlock instruction.
1304 if (Helper.isInBundleLockRegion()) {
1306 // Only update stats during the final pass.
1308 updateStats(Func, I);
1310 // Treat it as though there were an implicit bundle_lock and
1311 // bundle_unlock wrapping the instruction.
1312 Helper.enterBundleLock(I);
1314 Helper.enterBundleUnlock();
1316 Helper.padToNextBundle();
1318 updateStats(Func, I);
1319 Helper.leaveBundleLockRegion();
1323 // Don't allow bundle locking across basic blocks, to keep the backtracking
1324 // mechanism simple.
1325 assert(!Helper.isInBundleLockRegion());
1329 void CfgNode::dump(Cfg *Func) const {
1330 if (!BuildDefs::dump())
1332 Func->setCurrentNode(this);
1333 Ostream &Str = Func->getContext()->getStrDump();
1334 Liveness *Liveness = Func->getLiveness();
1335 if (Func->isVerbose(IceV_Instructions) || Func->isVerbose(IceV_Loop))
1336 Str << getName() << ":\n";
1337 // Dump the loop nest depth
1338 if (Func->isVerbose(IceV_Loop))
1339 Str << " // LoopNestDepth = " << getLoopNestDepth() << "\n";
1340 // Dump list of predecessor nodes.
1341 if (Func->isVerbose(IceV_Preds) && !InEdges.empty()) {
1342 Str << " // preds = ";
1344 for (CfgNode *I : InEdges) {
1348 Str << "%" << I->getName();
1352 // Dump the live-in variables.
1355 LiveIn = Liveness->getLiveIn(this);
1356 if (Func->isVerbose(IceV_Liveness) && !LiveIn.empty()) {
1357 Str << " // LiveIn:";
1358 for (SizeT i = 0; i < LiveIn.size(); ++i) {
1360 Variable *Var = Liveness->getVariable(i, this);
1361 Str << " %" << Var->getName(Func);
1362 if (Func->isVerbose(IceV_RegOrigins) && Var->hasReg()) {
1364 << Func->getTarget()->getRegName(Var->getRegNum(),
1371 // Dump each instruction.
1372 if (Func->isVerbose(IceV_Instructions)) {
1373 for (const Inst &I : Phis)
1374 I.dumpDecorated(Func);
1375 for (const Inst &I : Insts)
1376 I.dumpDecorated(Func);
1378 // Dump the live-out variables.
1381 LiveOut = Liveness->getLiveOut(this);
1382 if (Func->isVerbose(IceV_Liveness) && !LiveOut.empty()) {
1383 Str << " // LiveOut:";
1384 for (SizeT i = 0; i < LiveOut.size(); ++i) {
1386 Variable *Var = Liveness->getVariable(i, this);
1387 Str << " %" << Var->getName(Func);
1388 if (Func->isVerbose(IceV_RegOrigins) && Var->hasReg()) {
1390 << Func->getTarget()->getRegName(Var->getRegNum(),
1397 // Dump list of successor nodes.
1398 if (Func->isVerbose(IceV_Succs)) {
1399 Str << " // succs = ";
1401 for (CfgNode *I : OutEdges) {
1405 Str << "%" << I->getName();
1411 void CfgNode::profileExecutionCount(VariableDeclaration *Var) {
1412 constexpr char RMW_I64[] = "llvm.nacl.atomic.rmw.i64";
1414 GlobalContext *Context = Func->getContext();
1416 bool BadIntrinsic = false;
1417 const Intrinsics::FullIntrinsicInfo *Info =
1418 Context->getIntrinsicsInfo().find(RMW_I64, BadIntrinsic);
1419 assert(!BadIntrinsic);
1420 assert(Info != nullptr);
1422 Operand *RMWI64Name = Context->getConstantExternSym(RMW_I64);
1423 constexpr RelocOffsetT Offset = 0;
1424 constexpr bool SuppressMangling = true;
1426 Context->getConstantSym(Offset, Var->getName(), SuppressMangling);
1427 Constant *AtomicRMWOp = Context->getConstantInt32(Intrinsics::AtomicAdd);
1428 Constant *One = Context->getConstantInt64(1);
1429 Constant *OrderAcquireRelease =
1430 Context->getConstantInt32(Intrinsics::MemoryOrderAcquireRelease);
1432 auto *Inst = InstIntrinsicCall::create(
1433 Func, 5, Func->makeVariable(IceType_i64), RMWI64Name, Info->Info);
1434 Inst->addArg(AtomicRMWOp);
1435 Inst->addArg(Counter);
1437 Inst->addArg(OrderAcquireRelease);
1438 Insts.push_front(Inst);
1441 } // end of namespace Ice