src/IceCfgNode.cpp

   1 //===- subzero/src/IceCfgNode.cpp - Basic block (node) implementation -----===//
   2 //
   3 //                        The Subzero Code Generator
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 ///
  10 /// \file
  11 /// This file implements the CfgNode class, including the complexities of
  12 /// instruction insertion and in-edge calculation.
  13 ///
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "IceCfgNode.h"
  17
  18 #include "IceAssembler.h"
  19 #include "IceCfg.h"
  20 #include "IceGlobalInits.h"
  21 #include "IceInst.h"
  22 #include "IceInstVarIter.h"
  23 #include "IceLiveness.h"
  24 #include "IceOperand.h"
  25 #include "IceTargetLowering.h"
  26
  27 namespace Ice {
  28
  29 CfgNode::CfgNode(Cfg *Func, SizeT LabelNumber)
  30     : Func(Func), Number(LabelNumber), LabelNumber(LabelNumber) {}
  31
  32 // Returns the name the node was created with. If no name was given, it
  33 // synthesizes a (hopefully) unique name.
  34 IceString CfgNode::getName() const {
  35   if (NameIndex >= 0)
  36     return Func->getIdentifierName(NameIndex);
  37   return "__" + std::to_string(LabelNumber);
  38 }
  39
  40 // Adds an instruction to either the Phi list or the regular instruction list.
  41 // Validates that all Phis are added before all regular instructions.
  42 void CfgNode::appendInst(Inst *Inst) {
  43   ++InstCountEstimate;
  44   if (InstPhi *Phi = llvm::dyn_cast<InstPhi>(Inst)) {
  45     if (!Insts.empty()) {
  46       Func->setError("Phi instruction added to the middle of a block");
  47       return;
  48     }
  49     Phis.push_back(Phi);
  50   } else {
  51     Insts.push_back(Inst);
  52   }
  53 }
  54
  55 // Renumbers the non-deleted instructions in the node. This needs to be done in
  56 // preparation for live range analysis. The instruction numbers in a block must
  57 // be monotonically increasing. The range of instruction numbers in a block,
  58 // from lowest to highest, must not overlap with the range of any other block.
  59 void CfgNode::renumberInstructions() {
  60   InstNumberT FirstNumber = Func->getNextInstNumber();
  61   for (Inst &I : Phis)
  62     I.renumber(Func);
  63   for (Inst &I : Insts)
  64     I.renumber(Func);
  65   InstCountEstimate = Func->getNextInstNumber() - FirstNumber;
  66 }
  67
  68 // When a node is created, the OutEdges are immediately known, but the InEdges
  69 // have to be built up incrementally. After the CFG has been constructed, the
  70 // computePredecessors() pass finalizes it by creating the InEdges list.
  71 void CfgNode::computePredecessors() {
  72   for (CfgNode *Succ : OutEdges)
  73     Succ->InEdges.push_back(this);
  74 }
  75
  76 void CfgNode::computeSuccessors() {
  77   OutEdges = Insts.rbegin()->getTerminatorEdges();
  78 }
  79
  80 // Validate each Phi instruction in the node with respect to control flow. For
  81 // every phi argument, its label must appear in the predecessor list. For each
  82 // predecessor, there must be a phi argument with that label. We don't check
  83 // that phi arguments with the same label have the same value.
  84 void CfgNode::validatePhis() {
  85   for (Inst &Instr : Phis) {
  86     auto *Phi = llvm::cast<InstPhi>(&Instr);
  87     // We do a simple O(N^2) algorithm to check for consistency. Even so, it
  88     // shows up as only about 0.2% of the total translation time. But if
  89     // necessary, we could improve the complexity by using a hash table to
  90     // count how many times each node is referenced in the Phi instruction, and
  91     // how many times each node is referenced in the incoming edge list, and
  92     // compare the two for equality.
  93     for (SizeT i = 0; i < Phi->getSrcSize(); ++i) {
  94       CfgNode *Label = Phi->getLabel(i);
  95       bool Found = false;
  96       for (CfgNode *InNode : getInEdges()) {
  97         if (InNode == Label) {
  98           Found = true;
  99           break;
 100         }
 101       }
 102       if (!Found)
 103         llvm::report_fatal_error("Phi error: label for bad incoming edge");
 104     }
 105     for (CfgNode *InNode : getInEdges()) {
 106       bool Found = false;
 107       for (SizeT i = 0; i < Phi->getSrcSize(); ++i) {
 108         CfgNode *Label = Phi->getLabel(i);
 109         if (InNode == Label) {
 110           Found = true;
 111           break;
 112         }
 113       }
 114       if (!Found)
 115         llvm::report_fatal_error("Phi error: missing label for incoming edge");
 116     }
 117   }
 118 }
 119
 120 // This does part 1 of Phi lowering, by creating a new dest variable for each
 121 // Phi instruction, replacing the Phi instruction's dest with that variable,
 122 // and adding an explicit assignment of the old dest to the new dest. For
 123 // example,
 124 //   a=phi(...)
 125 // changes to
 126 //   "a_phi=phi(...); a=a_phi".
 127 //
 128 // This is in preparation for part 2 which deletes the Phi instructions and
 129 // appends assignment instructions to predecessor blocks. Note that this
 130 // transformation preserves SSA form.
 131 void CfgNode::placePhiLoads() {
 132   for (Inst &I : Phis) {
 133     auto Phi = llvm::dyn_cast<InstPhi>(&I);
 134     Insts.insert(Insts.begin(), Phi->lower(Func));
 135   }
 136 }
 137
 138 // This does part 2 of Phi lowering. For each Phi instruction at each out-edge,
 139 // create a corresponding assignment instruction, and add all the assignments
 140 // near the end of this block. They need to be added before any branch
 141 // instruction, and also if the block ends with a compare instruction followed
 142 // by a branch instruction that we may want to fuse, it's better to insert the
 143 // new assignments before the compare instruction. The
 144 // tryOptimizedCmpxchgCmpBr() method assumes this ordering of instructions.
 145 //
 146 // Note that this transformation takes the Phi dest variables out of SSA form,
 147 // as there may be assignments to the dest variable in multiple blocks.
 148 void CfgNode::placePhiStores() {
 149   // Find the insertion point.
 150   InstList::iterator InsertionPoint = Insts.end();
 151   // Every block must end in a terminator instruction, and therefore must have
 152   // at least one instruction, so it's valid to decrement InsertionPoint (but
 153   // assert just in case).
 154   assert(InsertionPoint != Insts.begin());
 155   --InsertionPoint;
 156   // Confirm that InsertionPoint is a terminator instruction. Calling
 157   // getTerminatorEdges() on a non-terminator instruction will cause an
 158   // llvm_unreachable().
 159   (void)InsertionPoint->getTerminatorEdges();
 160   // SafeInsertionPoint is always immediately before the terminator
 161   // instruction. If the block ends in a compare and conditional branch, it's
 162   // better to place the Phi store before the compare so as not to interfere
 163   // with compare/branch fusing. However, if the compare instruction's dest
 164   // operand is the same as the new assignment statement's source operand, this
 165   // can't be done due to data dependences, so we need to fall back to the
 166   // SafeInsertionPoint. To illustrate:
 167   //   ; <label>:95
 168   //   %97 = load i8* %96, align 1
 169   //   %98 = icmp ne i8 %97, 0
 170   //   br i1 %98, label %99, label %2132
 171   //   ; <label>:99
 172   //   %100 = phi i8 [ %97, %95 ], [ %110, %108 ]
 173   //   %101 = phi i1 [ %98, %95 ], [ %111, %108 ]
 174   // would be Phi-lowered as:
 175   //   ; <label>:95
 176   //   %97 = load i8* %96, align 1
 177   //   %100_phi = %97 ; can be at InsertionPoint
 178   //   %98 = icmp ne i8 %97, 0
 179   //   %101_phi = %98 ; must be at SafeInsertionPoint
 180   //   br i1 %98, label %99, label %2132
 181   //   ; <label>:99
 182   //   %100 = %100_phi
 183   //   %101 = %101_phi
 184   //
 185   // TODO(stichnot): It may be possible to bypass this whole SafeInsertionPoint
 186   // mechanism. If a source basic block ends in a conditional branch:
 187   //   labelSource:
 188   //   ...
 189   //   br i1 %foo, label %labelTrue, label %labelFalse
 190   // and a branch target has a Phi involving the branch operand:
 191   //   labelTrue:
 192   //   %bar = phi i1 [ %foo, %labelSource ], ...
 193   // then we actually know the constant i1 value of the Phi operand:
 194   //   labelTrue:
 195   //   %bar = phi i1 [ true, %labelSource ], ...
 196   // It seems that this optimization should be done by clang or opt, but we
 197   // could also do it here.
 198   InstList::iterator SafeInsertionPoint = InsertionPoint;
 199   // Keep track of the dest variable of a compare instruction, so that we
 200   // insert the new instruction at the SafeInsertionPoint if the compare's dest
 201   // matches the Phi-lowered assignment's source.
 202   Variable *CmpInstDest = nullptr;
 203   // If the current insertion point is at a conditional branch instruction, and
 204   // the previous instruction is a compare instruction, then we move the
 205   // insertion point before the compare instruction so as not to interfere with
 206   // compare/branch fusing.
 207   if (InstBr *Branch = llvm::dyn_cast<InstBr>(InsertionPoint)) {
 208     if (!Branch->isUnconditional()) {
 209       if (InsertionPoint != Insts.begin()) {
 210         --InsertionPoint;
 211         if (llvm::isa<InstIcmp>(InsertionPoint) ||
 212             llvm::isa<InstFcmp>(InsertionPoint)) {
 213           CmpInstDest = InsertionPoint->getDest();
 214         } else {
 215           ++InsertionPoint;
 216         }
 217       }
 218     }
 219   }
 220
 221   // Consider every out-edge.
 222   for (CfgNode *Succ : OutEdges) {
 223     // Consider every Phi instruction at the out-edge.
 224     for (Inst &I : Succ->Phis) {
 225       auto Phi = llvm::dyn_cast<InstPhi>(&I);
 226       Operand *Operand = Phi->getOperandForTarget(this);
 227       assert(Operand);
 228       Variable *Dest = I.getDest();
 229       assert(Dest);
 230       InstAssign *NewInst = InstAssign::create(Func, Dest, Operand);
 231       if (CmpInstDest == Operand)
 232         Insts.insert(SafeInsertionPoint, NewInst);
 233       else
 234         Insts.insert(InsertionPoint, NewInst);
 235     }
 236   }
 237 }
 238
 239 // Deletes the phi instructions after the loads and stores are placed.
 240 void CfgNode::deletePhis() {
 241   for (Inst &I : Phis)
 242     I.setDeleted();
 243 }
 244
 245 // Splits the edge from Pred to this node by creating a new node and hooking up
 246 // the in and out edges appropriately. (The EdgeIndex parameter is only used to
 247 // make the new node's name unique when there are multiple edges between the
 248 // same pair of nodes.) The new node's instruction list is initialized to the
 249 // empty list, with no terminator instruction. There must not be multiple edges
 250 // from Pred to this node so all Inst::getTerminatorEdges implementations must
 251 // not contain duplicates.
 252 CfgNode *CfgNode::splitIncomingEdge(CfgNode *Pred, SizeT EdgeIndex) {
 253   CfgNode *NewNode = Func->makeNode();
 254   // Depth is the minimum as it works if both are the same, but if one is
 255   // outside the loop and the other is inside, the new node should be placed
 256   // outside and not be executed multiple times within the loop.
 257   NewNode->setLoopNestDepth(
 258       std::min(getLoopNestDepth(), Pred->getLoopNestDepth()));
 259   if (BuildDefs::dump())
 260     NewNode->setName("split_" + Pred->getName() + "_" + getName() + "_" +
 261                      std::to_string(EdgeIndex));
 262   // The new node is added to the end of the node list, and will later need to
 263   // be sorted into a reasonable topological order.
 264   NewNode->setNeedsPlacement(true);
 265   // Repoint Pred's out-edge.
 266   bool Found = false;
 267   for (CfgNode *&I : Pred->OutEdges) {
 268     if (I == this) {
 269       I = NewNode;
 270       NewNode->InEdges.push_back(Pred);
 271       Found = true;
 272       break;
 273     }
 274   }
 275   assert(Found);
 276   (void)Found;
 277   // Repoint this node's in-edge.
 278   Found = false;
 279   for (CfgNode *&I : InEdges) {
 280     if (I == Pred) {
 281       I = NewNode;
 282       NewNode->OutEdges.push_back(this);
 283       Found = true;
 284       break;
 285     }
 286   }
 287   assert(Found);
 288   (void)Found;
 289   // Repoint all suitable branch instructions' target and return.
 290   Found = false;
 291   for (Inst &I : Pred->getInsts())
 292     if (!I.isDeleted() && I.repointEdges(this, NewNode))
 293       Found = true;
 294   assert(Found);
 295   (void)Found;
 296   return NewNode;
 297 }
 298
 299 namespace {
 300
 301 // Helper function used by advancedPhiLowering().
 302 bool sameVarOrReg(TargetLowering *Target, const Variable *Var1,
 303                   const Operand *Opnd) {
 304   if (Var1 == Opnd)
 305     return true;
 306   const auto Var2 = llvm::dyn_cast<Variable>(Opnd);
 307   if (Var2 == nullptr)
 308     return false;
 309
 310   // If either operand lacks a register, they cannot be the same.
 311   if (!Var1->hasReg())
 312     return false;
 313   if (!Var2->hasReg())
 314     return false;
 315
 316   int32_t RegNum1 = Var1->getRegNum();
 317   int32_t RegNum2 = Var2->getRegNum();
 318   // Quick common-case check.
 319   if (RegNum1 == RegNum2)
 320     return true;
 321
 322   assert(Target->getAliasesForRegister(RegNum1)[RegNum2] ==
 323          Target->getAliasesForRegister(RegNum2)[RegNum1]);
 324   return Target->getAliasesForRegister(RegNum1)[RegNum2];
 325 }
 326
 327 } // end of anonymous namespace
 328
 329 // This the "advanced" version of Phi lowering for a basic block, in contrast
 330 // to the simple version that lowers through assignments involving temporaries.
 331 //
 332 // All Phi instructions in a basic block are conceptually executed in parallel.
 333 // However, if we lower Phis early and commit to a sequential ordering, we may
 334 // end up creating unnecessary interferences which lead to worse register
 335 // allocation. Delaying Phi scheduling until after register allocation can help
 336 // unless there are no free registers for shuffling registers or stack slots
 337 // and spilling becomes necessary.
 338 //
 339 // The advanced Phi lowering starts by finding a topological sort of the Phi
 340 // instructions, where "A=B" comes before "B=C" due to the anti-dependence on
 341 // B. Preexisting register assignments are considered in the topological sort.
 342 // If a topological sort is not possible due to a cycle, the cycle is broken by
 343 // introducing a non-parallel temporary. For example, a cycle arising from a
 344 // permutation like "A=B;B=C;C=A" can become "T=A;A=B;B=C;C=T". All else being
 345 // equal, prefer to schedule assignments with register-allocated Src operands
 346 // earlier, in case that register becomes free afterwards, and prefer to
 347 // schedule assignments with register-allocated Dest variables later, to keep
 348 // that register free for longer.
 349 //
 350 // Once the ordering is determined, the Cfg edge is split and the assignment
 351 // list is lowered by the target lowering layer. Since the assignment lowering
 352 // may create new infinite-weight temporaries, a follow-on register allocation
 353 // pass will be needed. To prepare for this, liveness (including live range
 354 // calculation) of the split nodes needs to be calculated, and liveness of the
 355 // original node need to be updated to "undo" the effects of the phi
 356 // assignments.
 357
 358 // The specific placement of the new node within the Cfg node list is deferred
 359 // until later, including after empty node contraction.
 360 //
 361 // After phi assignments are lowered across all blocks, another register
 362 // allocation pass is run, focusing only on pre-colored and infinite-weight
 363 // variables, similar to Om1 register allocation (except without the need to
 364 // specially compute these variables' live ranges, since they have already been
 365 // precisely calculated). The register allocator in this mode needs the ability
 366 // to forcibly spill and reload registers in case none are naturally available.
 367 void CfgNode::advancedPhiLowering() {
 368   if (getPhis().empty())
 369     return;
 370
 371   // Count the number of non-deleted Phi instructions.
 372   struct PhiDesc {
 373     InstPhi *Phi;
 374     Variable *Dest;
 375     Operand *Src;
 376     bool Processed;
 377     size_t NumPred; // number of entries whose Src is this Dest
 378     int32_t Weight; // preference for topological order
 379   };
 380   llvm::SmallVector<PhiDesc, 32> Desc(getPhis().size());
 381
 382   size_t NumPhis = 0;
 383   for (Inst &I : Phis) {
 384     auto *Inst = llvm::dyn_cast<InstPhi>(&I);
 385     if (!Inst->isDeleted()) {
 386       Variable *Dest = Inst->getDest();
 387       Desc[NumPhis].Phi = Inst;
 388       Desc[NumPhis].Dest = Dest;
 389       ++NumPhis;
 390       // Undo the effect of the phi instruction on this node's live-in set by
 391       // marking the phi dest variable as live on entry.
 392       SizeT VarNum = Func->getLiveness()->getLiveIndex(Dest->getIndex());
 393       assert(!Func->getLiveness()->getLiveIn(this)[VarNum]);
 394       Func->getLiveness()->getLiveIn(this)[VarNum] = true;
 395       Inst->setDeleted();
 396     }
 397   }
 398   if (NumPhis == 0)
 399     return;
 400
 401   TargetLowering *Target = Func->getTarget();
 402   SizeT InEdgeIndex = 0;
 403   for (CfgNode *Pred : InEdges) {
 404     CfgNode *Split = splitIncomingEdge(Pred, InEdgeIndex++);
 405     SizeT Remaining = NumPhis;
 406
 407     // First pass computes Src and initializes NumPred.
 408     for (size_t I = 0; I < NumPhis; ++I) {
 409       Variable *Dest = Desc[I].Dest;
 410       Operand *Src = Desc[I].Phi->getOperandForTarget(Pred);
 411       Desc[I].Src = Src;
 412       Desc[I].Processed = false;
 413       Desc[I].NumPred = 0;
 414       // Cherry-pick any trivial assignments, so that they don't contribute to
 415       // the running complexity of the topological sort.
 416       if (sameVarOrReg(Target, Dest, Src)) {
 417         Desc[I].Processed = true;
 418         --Remaining;
 419         if (Dest != Src)
 420           // If Dest and Src are syntactically the same, don't bother adding
 421           // the assignment, because in all respects it would be redundant, and
 422           // if Dest/Src are on the stack, the target lowering may naively
 423           // decide to lower it using a temporary register.
 424           Split->appendInst(InstAssign::create(Func, Dest, Src));
 425       }
 426     }
 427     // Second pass computes NumPred by comparing every pair of Phi
 428     // instructions.
 429     for (size_t I = 0; I < NumPhis; ++I) {
 430       if (Desc[I].Processed)
 431         continue;
 432       const Variable *Dest = Desc[I].Dest;
 433       for (size_t J = 0; J < NumPhis; ++J) {
 434         if (Desc[J].Processed)
 435           continue;
 436         if (I != J) {
 437           // There shouldn't be two Phis with the same Dest variable or
 438           // register.
 439           assert(!sameVarOrReg(Target, Dest, Desc[J].Dest));
 440         }
 441         const Operand *Src = Desc[J].Src;
 442         if (sameVarOrReg(Target, Dest, Src))
 443           ++Desc[I].NumPred;
 444       }
 445     }
 446
 447     // Another pass to compute initial Weight values.
 448
 449     // Always pick NumPred=0 over NumPred>0.
 450     constexpr int32_t WeightNoPreds = 4;
 451     // Prefer Src as a register because the register might free up.
 452     constexpr int32_t WeightSrcIsReg = 2;
 453     // Prefer Dest not as a register because the register stays free longer.
 454     constexpr int32_t WeightDestNotReg = 1;
 455
 456     for (size_t I = 0; I < NumPhis; ++I) {
 457       if (Desc[I].Processed)
 458         continue;
 459       int32_t Weight = 0;
 460       if (Desc[I].NumPred == 0)
 461         Weight += WeightNoPreds;
 462       if (auto Var = llvm::dyn_cast<Variable>(Desc[I].Src))
 463         if (Var->hasReg())
 464           Weight += WeightSrcIsReg;
 465       if (!Desc[I].Dest->hasReg())
 466         Weight += WeightDestNotReg;
 467       Desc[I].Weight = Weight;
 468     }
 469
 470     // Repeatedly choose and process the best candidate in the topological
 471     // sort, until no candidates remain. This implementation is O(N^2) where N
 472     // is the number of Phi instructions, but with a small constant factor
 473     // compared to a likely implementation of O(N) topological sort.
 474     for (; Remaining; --Remaining) {
 475       size_t BestIndex = 0;
 476       int32_t BestWeight = -1;
 477       // Find the best candidate.
 478       for (size_t I = 0; I < NumPhis; ++I) {
 479         if (Desc[I].Processed)
 480           continue;
 481         int32_t Weight = 0;
 482         Weight = Desc[I].Weight;
 483         if (Weight > BestWeight) {
 484           BestIndex = I;
 485           BestWeight = Weight;
 486         }
 487       }
 488       assert(BestWeight >= 0);
 489       assert(Desc[BestIndex].NumPred <= 1);
 490       Variable *Dest = Desc[BestIndex].Dest;
 491       Operand *Src = Desc[BestIndex].Src;
 492       assert(!sameVarOrReg(Target, Dest, Src));
 493       // Break a cycle by introducing a temporary.
 494       if (Desc[BestIndex].NumPred) {
 495         bool Found = false;
 496         // If the target instruction "A=B" is part of a cycle, find the "X=A"
 497         // assignment in the cycle because it will have to be rewritten as
 498         // "X=tmp".
 499         for (size_t J = 0; !Found && J < NumPhis; ++J) {
 500           if (Desc[J].Processed)
 501             continue;
 502           Operand *OtherSrc = Desc[J].Src;
 503           if (Desc[J].NumPred && sameVarOrReg(Target, Dest, OtherSrc)) {
 504             SizeT VarNum = Func->getNumVariables();
 505             Variable *Tmp = Func->makeVariable(OtherSrc->getType());
 506             if (BuildDefs::dump())
 507               Tmp->setName(Func, "__split_" + std::to_string(VarNum));
 508             Split->appendInst(InstAssign::create(Func, Tmp, OtherSrc));
 509             Desc[J].Src = Tmp;
 510             Found = true;
 511           }
 512         }
 513         assert(Found);
 514       }
 515       // Now that a cycle (if any) has been broken, create the actual
 516       // assignment.
 517       Split->appendInst(InstAssign::create(Func, Dest, Src));
 518       // Update NumPred for all Phi assignments using this Phi's Src as their
 519       // Dest variable. Also update Weight if NumPred dropped from 1 to 0.
 520       if (auto Var = llvm::dyn_cast<Variable>(Src)) {
 521         for (size_t I = 0; I < NumPhis; ++I) {
 522           if (Desc[I].Processed)
 523             continue;
 524           if (sameVarOrReg(Target, Var, Desc[I].Dest)) {
 525             if (--Desc[I].NumPred == 0)
 526               Desc[I].Weight += WeightNoPreds;
 527           }
 528         }
 529       }
 530       Desc[BestIndex].Processed = true;
 531     }
 532     Split->appendInst(InstBr::create(Func, this));
 533
 534     Split->genCode();
 535     Func->getVMetadata()->addNode(Split);
 536   }
 537 }
 538
 539 // Does address mode optimization. Pass each instruction to the TargetLowering
 540 // object. If it returns a new instruction (representing the optimized address
 541 // mode), then insert the new instruction and delete the old.
 542 void CfgNode::doAddressOpt() {
 543   TargetLowering *Target = Func->getTarget();
 544   LoweringContext &Context = Target->getContext();
 545   Context.init(this);
 546   while (!Context.atEnd()) {
 547     Target->doAddressOpt();
 548   }
 549 }
 550
 551 void CfgNode::doNopInsertion(RandomNumberGenerator &RNG) {
 552   TargetLowering *Target = Func->getTarget();
 553   LoweringContext &Context = Target->getContext();
 554   Context.init(this);
 555   Context.setInsertPoint(Context.getCur());
 556   // Do not insert nop in bundle locked instructions.
 557   bool PauseNopInsertion = false;
 558   while (!Context.atEnd()) {
 559     if (llvm::isa<InstBundleLock>(Context.getCur())) {
 560       PauseNopInsertion = true;
 561     } else if (llvm::isa<InstBundleUnlock>(Context.getCur())) {
 562       PauseNopInsertion = false;
 563     }
 564     if (!PauseNopInsertion)
 565       Target->doNopInsertion(RNG);
 566     // Ensure Cur=Next, so that the nops are inserted before the current
 567     // instruction rather than after.
 568     Context.advanceCur();
 569     Context.advanceNext();
 570   }
 571 }
 572
 573 // Drives the target lowering. Passes the current instruction and the next
 574 // non-deleted instruction for target lowering.
 575 void CfgNode::genCode() {
 576   TargetLowering *Target = Func->getTarget();
 577   LoweringContext &Context = Target->getContext();
 578   // Lower the regular instructions.
 579   Context.init(this);
 580   Target->initNodeForLowering(this);
 581   while (!Context.atEnd()) {
 582     InstList::iterator Orig = Context.getCur();
 583     if (llvm::isa<InstRet>(*Orig))
 584       setHasReturn();
 585     Target->lower();
 586     // Ensure target lowering actually moved the cursor.
 587     assert(Context.getCur() != Orig);
 588   }
 589   Context.availabilityReset();
 590   // Do preliminary lowering of the Phi instructions.
 591   Target->prelowerPhis();
 592 }
 593
 594 void CfgNode::livenessLightweight() {
 595   SizeT NumVars = Func->getNumVariables();
 596   LivenessBV Live(NumVars);
 597   // Process regular instructions in reverse order.
 598   for (Inst &I : reverse_range(Insts)) {
 599     if (I.isDeleted())
 600       continue;
 601     I.livenessLightweight(Func, Live);
 602   }
 603   for (Inst &I : Phis) {
 604     if (I.isDeleted())
 605       continue;
 606     I.livenessLightweight(Func, Live);
 607   }
 608 }
 609
 610 // Performs liveness analysis on the block. Returns true if the incoming
 611 // liveness changed from before, false if it stayed the same. (If it changes,
 612 // the node's predecessors need to be processed again.)
 613 bool CfgNode::liveness(Liveness *Liveness) {
 614   SizeT NumVars = Liveness->getNumVarsInNode(this);
 615   LivenessBV Live(NumVars);
 616   LiveBeginEndMap *LiveBegin = nullptr;
 617   LiveBeginEndMap *LiveEnd = nullptr;
 618   // Mark the beginning and ending of each variable's live range with the
 619   // sentinel instruction number 0.
 620   if (Liveness->getMode() == Liveness_Intervals) {
 621     LiveBegin = Liveness->getLiveBegin(this);
 622     LiveEnd = Liveness->getLiveEnd(this);
 623     LiveBegin->clear();
 624     LiveEnd->clear();
 625     // Guess that the number of live ranges beginning is roughly the number of
 626     // instructions, and same for live ranges ending.
 627     LiveBegin->reserve(getInstCountEstimate());
 628     LiveEnd->reserve(getInstCountEstimate());
 629   }
 630   // Initialize Live to be the union of all successors' LiveIn.
 631   for (CfgNode *Succ : OutEdges) {
 632     Live |= Liveness->getLiveIn(Succ);
 633     // Mark corresponding argument of phis in successor as live.
 634     for (Inst &I : Succ->Phis) {
 635       if (I.isDeleted())
 636         continue;
 637       auto Phi = llvm::dyn_cast<InstPhi>(&I);
 638       Phi->livenessPhiOperand(Live, this, Liveness);
 639     }
 640   }
 641   Liveness->getLiveOut(this) = Live;
 642
 643   // Process regular instructions in reverse order.
 644   for (Inst &I : reverse_range(Insts)) {
 645     if (I.isDeleted())
 646       continue;
 647     I.liveness(I.getNumber(), Live, Liveness, LiveBegin, LiveEnd);
 648   }
 649   // Process phis in forward order so that we can override the instruction
 650   // number to be that of the earliest phi instruction in the block.
 651   SizeT NumNonDeadPhis = 0;
 652   InstNumberT FirstPhiNumber = Inst::NumberSentinel;
 653   for (Inst &I : Phis) {
 654     if (I.isDeleted())
 655       continue;
 656     if (FirstPhiNumber == Inst::NumberSentinel)
 657       FirstPhiNumber = I.getNumber();
 658     if (I.liveness(FirstPhiNumber, Live, Liveness, LiveBegin, LiveEnd))
 659       ++NumNonDeadPhis;
 660   }
 661
 662   // When using the sparse representation, after traversing the instructions in
 663   // the block, the Live bitvector should only contain set bits for global
 664   // variables upon block entry. We validate this by shrinking the Live vector
 665   // and then testing it against the pre-shrunk version. (The shrinking is
 666   // required, but the validation is not.)
 667   LivenessBV LiveOrig = Live;
 668   Live.resize(Liveness->getNumGlobalVars());
 669   if (Live != LiveOrig) {
 670     if (BuildDefs::dump()) {
 671       // This is a fatal liveness consistency error. Print some diagnostics and
 672       // abort.
 673       Ostream &Str = Func->getContext()->getStrDump();
 674       Func->resetCurrentNode();
 675       Str << "LiveOrig-Live =";
 676       for (SizeT i = Live.size(); i < LiveOrig.size(); ++i) {
 677         if (LiveOrig.test(i)) {
 678           Str << " ";
 679           Liveness->getVariable(i, this)->dump(Func);
 680         }
 681       }
 682       Str << "\n";
 683     }
 684     llvm::report_fatal_error("Fatal inconsistency in liveness analysis");
 685   }
 686
 687   bool Changed = false;
 688   LivenessBV &LiveIn = Liveness->getLiveIn(this);
 689   // Add in current LiveIn
 690   Live |= LiveIn;
 691   // Check result, set LiveIn=Live
 692   SizeT &PrevNumNonDeadPhis = Liveness->getNumNonDeadPhis(this);
 693   bool LiveInChanged = (Live != LiveIn);
 694   Changed = (NumNonDeadPhis != PrevNumNonDeadPhis || LiveInChanged);
 695   if (LiveInChanged)
 696     LiveIn = Live;
 697   PrevNumNonDeadPhis = NumNonDeadPhis;
 698   return Changed;
 699 }
 700
 701 // Validate the integrity of the live ranges in this block.  If there are any
 702 // errors, it prints details and returns false.  On success, it returns true.
 703 bool CfgNode::livenessValidateIntervals(Liveness *Liveness) const {
 704   if (!BuildDefs::asserts())
 705     return true;
 706
 707   // Verify there are no duplicates.
 708   auto ComparePair =
 709       [](const LiveBeginEndMapEntry &A, const LiveBeginEndMapEntry &B) {
 710         return A.first == B.first;
 711       };
 712   LiveBeginEndMap &MapBegin = *Liveness->getLiveBegin(this);
 713   LiveBeginEndMap &MapEnd = *Liveness->getLiveEnd(this);
 714   if (std::adjacent_find(MapBegin.begin(), MapBegin.end(), ComparePair) ==
 715           MapBegin.end() &&
 716       std::adjacent_find(MapEnd.begin(), MapEnd.end(), ComparePair) ==
 717           MapEnd.end())
 718     return true;
 719
 720   // There is definitely a liveness error.  All paths from here return false.
 721   if (!BuildDefs::dump())
 722     return false;
 723
 724   // Print all the errors.
 725   if (BuildDefs::dump()) {
 726     GlobalContext *Ctx = Func->getContext();
 727     OstreamLocker L(Ctx);
 728     Ostream &Str = Ctx->getStrDump();
 729     if (Func->isVerbose()) {
 730       Str << "Live range errors in the following block:\n";
 731       dump(Func);
 732     }
 733     for (auto Start = MapBegin.begin();
 734          (Start = std::adjacent_find(Start, MapBegin.end(), ComparePair)) !=
 735          MapBegin.end();
 736          ++Start) {
 737       auto Next = Start + 1;
 738       Str << "Duplicate LR begin, block " << getName() << ", instructions "
 739           << Start->second << " & " << Next->second << ", variable "
 740           << Liveness->getVariable(Start->first, this)->getName(Func) << "\n";
 741     }
 742     for (auto Start = MapEnd.begin();
 743          (Start = std::adjacent_find(Start, MapEnd.end(), ComparePair)) !=
 744          MapEnd.end();
 745          ++Start) {
 746       auto Next = Start + 1;
 747       Str << "Duplicate LR end,   block " << getName() << ", instructions "
 748           << Start->second << " & " << Next->second << ", variable "
 749           << Liveness->getVariable(Start->first, this)->getName(Func) << "\n";
 750     }
 751   }
 752
 753   return false;
 754 }
 755
 756 // Once basic liveness is complete, compute actual live ranges. It is assumed
 757 // that within a single basic block, a live range begins at most once and ends
 758 // at most once. This is certainly true for pure SSA form. It is also true once
 759 // phis are lowered, since each assignment to the phi-based temporary is in a
 760 // different basic block, and there is a single read that ends the live in the
 761 // basic block that contained the actual phi instruction.
 762 void CfgNode::livenessAddIntervals(Liveness *Liveness, InstNumberT FirstInstNum,
 763                                    InstNumberT LastInstNum) {
 764   TimerMarker T1(TimerStack::TT_liveRange, Func);
 765
 766   SizeT NumVars = Liveness->getNumVarsInNode(this);
 767   LivenessBV &LiveIn = Liveness->getLiveIn(this);
 768   LivenessBV &LiveOut = Liveness->getLiveOut(this);
 769   LiveBeginEndMap &MapBegin = *Liveness->getLiveBegin(this);
 770   LiveBeginEndMap &MapEnd = *Liveness->getLiveEnd(this);
 771   std::sort(MapBegin.begin(), MapBegin.end());
 772   std::sort(MapEnd.begin(), MapEnd.end());
 773
 774   if (!livenessValidateIntervals(Liveness)) {
 775     llvm::report_fatal_error("livenessAddIntervals: Liveness error");
 776     return;
 777   }
 778
 779   LivenessBV LiveInAndOut = LiveIn;
 780   LiveInAndOut &= LiveOut;
 781
 782   // Iterate in parallel across the sorted MapBegin[] and MapEnd[].
 783   auto IBB = MapBegin.begin(), IEB = MapEnd.begin();
 784   auto IBE = MapBegin.end(), IEE = MapEnd.end();
 785   while (IBB != IBE || IEB != IEE) {
 786     SizeT i1 = IBB == IBE ? NumVars : IBB->first;
 787     SizeT i2 = IEB == IEE ? NumVars : IEB->first;
 788     SizeT i = std::min(i1, i2);
 789     // i1 is the Variable number of the next MapBegin entry, and i2 is the
 790     // Variable number of the next MapEnd entry. If i1==i2, then the Variable's
 791     // live range begins and ends in this block. If i1<i2, then i1's live range
 792     // begins at instruction IBB->second and extends through the end of the
 793     // block. If i1>i2, then i2's live range begins at the first instruction of
 794     // the block and ends at IEB->second. In any case, we choose the lesser of
 795     // i1 and i2 and proceed accordingly.
 796     InstNumberT LB = i == i1 ? IBB->second : FirstInstNum;
 797     InstNumberT LE = i == i2 ? IEB->second : LastInstNum + 1;
 798
 799     Variable *Var = Liveness->getVariable(i, this);
 800     if (LB > LE) {
 801       Var->addLiveRange(FirstInstNum, LE);
 802       Var->addLiveRange(LB, LastInstNum + 1);
 803       // Assert that Var is a global variable by checking that its liveness
 804       // index is less than the number of globals. This ensures that the
 805       // LiveInAndOut[] access is valid.
 806       assert(i < Liveness->getNumGlobalVars());
 807       LiveInAndOut[i] = false;
 808     } else {
 809       Var->addLiveRange(LB, LE);
 810     }
 811     if (i == i1)
 812       ++IBB;
 813     if (i == i2)
 814       ++IEB;
 815   }
 816   // Process the variables that are live across the entire block.
 817   for (int i = LiveInAndOut.find_first(); i != -1;
 818        i = LiveInAndOut.find_next(i)) {
 819     Variable *Var = Liveness->getVariable(i, this);
 820     if (Liveness->getRangeMask(Var->getIndex()))
 821       Var->addLiveRange(FirstInstNum, LastInstNum + 1);
 822   }
 823 }
 824
 825 // If this node contains only deleted instructions, and ends in an
 826 // unconditional branch, contract the node by repointing all its in-edges to
 827 // its successor.
 828 void CfgNode::contractIfEmpty() {
 829   if (InEdges.empty())
 830     return;
 831   Inst *Branch = nullptr;
 832   for (Inst &I : Insts) {
 833     if (I.isDeleted())
 834       continue;
 835     if (I.isUnconditionalBranch())
 836       Branch = &I;
 837     else if (!I.isRedundantAssign())
 838       return;
 839   }
 840   assert(OutEdges.size() == 1);
 841   // Don't try to delete a self-loop.
 842   if (OutEdges[0] == this)
 843     return;
 844
 845   Branch->setDeleted();
 846   CfgNode *Successor = OutEdges.front();
 847   // Repoint all this node's in-edges to this node's successor, unless this
 848   // node's successor is actually itself (in which case the statement
 849   // "OutEdges.front()->InEdges.push_back(Pred)" could invalidate the iterator
 850   // over this->InEdges).
 851   if (Successor != this) {
 852     for (CfgNode *Pred : InEdges) {
 853       for (CfgNode *&I : Pred->OutEdges) {
 854         if (I == this) {
 855           I = Successor;
 856           Successor->InEdges.push_back(Pred);
 857         }
 858       }
 859       for (Inst &I : Pred->getInsts()) {
 860         if (!I.isDeleted())
 861           I.repointEdges(this, Successor);
 862       }
 863     }
 864
 865     // Remove the in-edge to the successor to allow node reordering to make
 866     // better decisions. For example it's more helpful to place a node after a
 867     // reachable predecessor than an unreachable one (like the one we just
 868     // contracted).
 869     Successor->InEdges.erase(
 870         std::find(Successor->InEdges.begin(), Successor->InEdges.end(), this));
 871   }
 872   InEdges.clear();
 873 }
 874
 875 void CfgNode::doBranchOpt(const CfgNode *NextNode) {
 876   TargetLowering *Target = Func->getTarget();
 877   // Find the first opportunity for branch optimization (which will be the last
 878   // instruction in the block) and stop. This is sufficient unless there is
 879   // some target lowering where we have the possibility of multiple
 880   // optimizations per block. Take care with switch lowering as there are
 881   // multiple unconditional branches and only the last can be deleted.
 882   for (Inst &I : reverse_range(Insts)) {
 883     if (!I.isDeleted()) {
 884       Target->doBranchOpt(&I, NextNode);
 885       return;
 886     }
 887   }
 888 }
 889
 890 // ======================== Dump routines ======================== //
 891
 892 namespace {
 893
 894 // Helper functions for emit().
 895
 896 void emitRegisterUsage(Ostream &Str, const Cfg *Func, const CfgNode *Node,
 897                        bool IsLiveIn, CfgVector<SizeT> &LiveRegCount) {
 898   if (!BuildDefs::dump())
 899     return;
 900   Liveness *Liveness = Func->getLiveness();
 901   const LivenessBV *Live;
 902   const int32_t StackReg = Func->getTarget()->getStackReg();
 903   const int32_t FrameOrStackReg = Func->getTarget()->getFrameOrStackReg();
 904   if (IsLiveIn) {
 905     Live = &Liveness->getLiveIn(Node);
 906     Str << "\t\t\t\t# LiveIn=";
 907   } else {
 908     Live = &Liveness->getLiveOut(Node);
 909     Str << "\t\t\t\t# LiveOut=";
 910   }
 911   if (!Live->empty()) {
 912     CfgVector<Variable *> LiveRegs;
 913     for (SizeT i = 0; i < Live->size(); ++i) {
 914       if (!(*Live)[i])
 915         continue;
 916       Variable *Var = Liveness->getVariable(i, Node);
 917       if (!Var->hasReg())
 918         continue;
 919       const int32_t RegNum = Var->getRegNum();
 920       if (RegNum == StackReg || RegNum == FrameOrStackReg)
 921         continue;
 922       if (IsLiveIn)
 923         ++LiveRegCount[RegNum];
 924       LiveRegs.push_back(Var);
 925     }
 926     // Sort the variables by regnum so they are always printed in a familiar
 927     // order.
 928     std::sort(LiveRegs.begin(), LiveRegs.end(),
 929               [](const Variable *V1, const Variable *V2) {
 930                 return V1->getRegNum() < V2->getRegNum();
 931               });
 932     bool First = true;
 933     for (Variable *Var : LiveRegs) {
 934       if (!First)
 935         Str << ",";
 936       First = false;
 937       Var->emit(Func);
 938     }
 939   }
 940   Str << "\n";
 941 }
 942
 943 void emitLiveRangesEnded(Ostream &Str, const Cfg *Func, const Inst *Instr,
 944                          CfgVector<SizeT> &LiveRegCount) {
 945   if (!BuildDefs::dump())
 946     return;
 947   bool First = true;
 948   Variable *Dest = Instr->getDest();
 949   // Normally we increment the live count for the dest register. But we
 950   // shouldn't if the instruction's IsDestRedefined flag is set, because this
 951   // means that the target lowering created this instruction as a non-SSA
 952   // assignment; i.e., a different, previous instruction started the dest
 953   // variable's live range.
 954   if (!Instr->isDestRedefined() && Dest && Dest->hasReg())
 955     ++LiveRegCount[Dest->getRegNum()];
 956   FOREACH_VAR_IN_INST(Var, *Instr) {
 957     bool ShouldReport = Instr->isLastUse(Var);
 958     if (ShouldReport && Var->hasReg()) {
 959       // Don't report end of live range until the live count reaches 0.
 960       SizeT NewCount = --LiveRegCount[Var->getRegNum()];
 961       if (NewCount)
 962         ShouldReport = false;
 963     }
 964     if (ShouldReport) {
 965       if (First)
 966         Str << " \t# END=";
 967       else
 968         Str << ",";
 969       Var->emit(Func);
 970       First = false;
 971     }
 972   }
 973 }
 974
 975 void updateStats(Cfg *Func, const Inst *I) {
 976   if (!BuildDefs::dump())
 977     return;
 978   // Update emitted instruction count, plus fill/spill count for Variable
 979   // operands without a physical register.
 980   if (uint32_t Count = I->getEmitInstCount()) {
 981     Func->getContext()->statsUpdateEmitted(Count);
 982     if (Variable *Dest = I->getDest()) {
 983       if (!Dest->hasReg())
 984         Func->getContext()->statsUpdateFills();
 985     }
 986     for (SizeT S = 0; S < I->getSrcSize(); ++S) {
 987       if (Variable *Src = llvm::dyn_cast<Variable>(I->getSrc(S))) {
 988         if (!Src->hasReg())
 989           Func->getContext()->statsUpdateSpills();
 990       }
 991     }
 992   }
 993 }
 994
 995 } // end of anonymous namespace
 996
 997 void CfgNode::emit(Cfg *Func) const {
 998   if (!BuildDefs::dump())
 999     return;
1000   Func->setCurrentNode(this);
1001   Ostream &Str = Func->getContext()->getStrEmit();
1002   Liveness *Liveness = Func->getLiveness();
1003   const bool DecorateAsm =
1004       Liveness && Func->getContext()->getFlags().getDecorateAsm();
1005   Str << getAsmName() << ":\n";
1006   // LiveRegCount keeps track of the number of currently live variables that
1007   // each register is assigned to. Normally that would be only 0 or 1, but the
1008   // register allocator's AllowOverlap inference allows it to be greater than 1
1009   // for short periods.
1010   CfgVector<SizeT> LiveRegCount(Func->getTarget()->getNumRegisters());
1011   if (DecorateAsm) {
1012     constexpr bool IsLiveIn = true;
1013     emitRegisterUsage(Str, Func, this, IsLiveIn, LiveRegCount);
1014     if (getInEdges().size()) {
1015       Str << "\t\t\t\t# preds=";
1016       bool First = true;
1017       for (CfgNode *I : getInEdges()) {
1018         if (!First)
1019           Str << ",";
1020         First = false;
1021         Str << "$" << I->getName();
1022       }
1023       Str << "\n";
1024     }
1025     if (getLoopNestDepth()) {
1026       Str << "\t\t\t\t# loop depth=" << getLoopNestDepth() << "\n";
1027     }
1028   }
1029
1030   for (const Inst &I : Phis) {
1031     if (I.isDeleted())
1032       continue;
1033     // Emitting a Phi instruction should cause an error.
1034     I.emit(Func);
1035   }
1036   for (const Inst &I : Insts) {
1037     if (I.isDeleted())
1038       continue;
1039     if (I.isRedundantAssign()) {
1040       // Usually, redundant assignments end the live range of the src variable
1041       // and begin the live range of the dest variable, with no net effect on
1042       // the liveness of their register. However, if the register allocator
1043       // infers the AllowOverlap condition, then this may be a redundant
1044       // assignment that does not end the src variable's live range, in which
1045       // case the active variable count for that register needs to be bumped.
1046       // That normally would have happened as part of emitLiveRangesEnded(),
1047       // but that isn't called for redundant assignments.
1048       Variable *Dest = I.getDest();
1049       if (DecorateAsm && Dest->hasReg()) {
1050         ++LiveRegCount[Dest->getRegNum()];
1051         if (I.isLastUse(I.getSrc(0)))
1052           --LiveRegCount[llvm::cast<Variable>(I.getSrc(0))->getRegNum()];
1053       }
1054       continue;
1055     }
1056     I.emit(Func);
1057     if (DecorateAsm)
1058       emitLiveRangesEnded(Str, Func, &I, LiveRegCount);
1059     Str << "\n";
1060     updateStats(Func, &I);
1061   }
1062   if (DecorateAsm) {
1063     constexpr bool IsLiveIn = false;
1064     emitRegisterUsage(Str, Func, this, IsLiveIn, LiveRegCount);
1065   }
1066 }
1067
1068 // Helper class for emitIAS().
1069 namespace {
1070 class BundleEmitHelper {
1071   BundleEmitHelper() = delete;
1072   BundleEmitHelper(const BundleEmitHelper &) = delete;
1073   BundleEmitHelper &operator=(const BundleEmitHelper &) = delete;
1074
1075 public:
1076   BundleEmitHelper(Assembler *Asm, TargetLowering *Target,
1077                    const InstList &Insts)
1078       : Asm(Asm), Target(Target), End(Insts.end()), BundleLockStart(End),
1079         BundleSize(1 << Asm->getBundleAlignLog2Bytes()),
1080         BundleMaskLo(BundleSize - 1), BundleMaskHi(~BundleMaskLo) {}
1081   // Check whether we're currently within a bundle_lock region.
1082   bool isInBundleLockRegion() const { return BundleLockStart != End; }
1083   // Check whether the current bundle_lock region has the align_to_end option.
1084   bool isAlignToEnd() const {
1085     assert(isInBundleLockRegion());
1086     return llvm::cast<InstBundleLock>(getBundleLockStart())->getOption() ==
1087            InstBundleLock::Opt_AlignToEnd;
1088   }
1089   // Check whether the entire bundle_lock region falls within the same bundle.
1090   bool isSameBundle() const {
1091     assert(isInBundleLockRegion());
1092     return SizeSnapshotPre == SizeSnapshotPost ||
1093            (SizeSnapshotPre & BundleMaskHi) ==
1094                ((SizeSnapshotPost - 1) & BundleMaskHi);
1095   }
1096   // Get the bundle alignment of the first instruction of the bundle_lock
1097   // region.
1098   intptr_t getPreAlignment() const {
1099     assert(isInBundleLockRegion());
1100     return SizeSnapshotPre & BundleMaskLo;
1101   }
1102   // Get the bundle alignment of the first instruction past the bundle_lock
1103   // region.
1104   intptr_t getPostAlignment() const {
1105     assert(isInBundleLockRegion());
1106     return SizeSnapshotPost & BundleMaskLo;
1107   }
1108   // Get the iterator pointing to the bundle_lock instruction, e.g. to roll
1109   // back the instruction iteration to that point.
1110   InstList::const_iterator getBundleLockStart() const {
1111     assert(isInBundleLockRegion());
1112     return BundleLockStart;
1113   }
1114   // Set up bookkeeping when the bundle_lock instruction is first processed.
1115   void enterBundleLock(InstList::const_iterator I) {
1116     assert(!isInBundleLockRegion());
1117     BundleLockStart = I;
1118     SizeSnapshotPre = Asm->getBufferSize();
1119     Asm->setPreliminary(true);
1120     Target->snapshotEmitState();
1121     assert(isInBundleLockRegion());
1122   }
1123   // Update bookkeeping when the bundle_unlock instruction is processed.
1124   void enterBundleUnlock() {
1125     assert(isInBundleLockRegion());
1126     SizeSnapshotPost = Asm->getBufferSize();
1127   }
1128   // Update bookkeeping when we are completely finished with the bundle_lock
1129   // region.
1130   void leaveBundleLockRegion() { BundleLockStart = End; }
1131   // Check whether the instruction sequence fits within the current bundle, and
1132   // if not, add nop padding to the end of the current bundle.
1133   void padToNextBundle() {
1134     assert(isInBundleLockRegion());
1135     if (!isSameBundle()) {
1136       intptr_t PadToNextBundle = BundleSize - getPreAlignment();
1137       Asm->padWithNop(PadToNextBundle);
1138       SizeSnapshotPre += PadToNextBundle;
1139       SizeSnapshotPost += PadToNextBundle;
1140       assert((Asm->getBufferSize() & BundleMaskLo) == 0);
1141       assert(Asm->getBufferSize() == SizeSnapshotPre);
1142     }
1143   }
1144   // If align_to_end is specified, add padding such that the instruction
1145   // sequences ends precisely at a bundle boundary.
1146   void padForAlignToEnd() {
1147     assert(isInBundleLockRegion());
1148     if (isAlignToEnd()) {
1149       if (intptr_t Offset = getPostAlignment()) {
1150         Asm->padWithNop(BundleSize - Offset);
1151         SizeSnapshotPre = Asm->getBufferSize();
1152       }
1153     }
1154   }
1155   // Update bookkeeping when rolling back for the second pass.
1156   void rollback() {
1157     assert(isInBundleLockRegion());
1158     Asm->setBufferSize(SizeSnapshotPre);
1159     Asm->setPreliminary(false);
1160     Target->rollbackEmitState();
1161   }
1162
1163 private:
1164   Assembler *const Asm;
1165   TargetLowering *const Target;
1166   // End is a sentinel value such that BundleLockStart==End implies that we are
1167   // not in a bundle_lock region.
1168   const InstList::const_iterator End;
1169   InstList::const_iterator BundleLockStart;
1170   const intptr_t BundleSize;
1171   // Masking with BundleMaskLo identifies an address's bundle offset.
1172   const intptr_t BundleMaskLo;
1173   // Masking with BundleMaskHi identifies an address's bundle.
1174   const intptr_t BundleMaskHi;
1175   intptr_t SizeSnapshotPre = 0;
1176   intptr_t SizeSnapshotPost = 0;
1177 };
1178
1179 } // end of anonymous namespace
1180
1181 void CfgNode::emitIAS(Cfg *Func) const {
1182   Func->setCurrentNode(this);
1183   Assembler *Asm = Func->getAssembler<>();
1184   // TODO(stichnot): When sandboxing, defer binding the node label until just
1185   // before the first instruction is emitted, to reduce the chance that a
1186   // padding nop is a branch target.
1187   Asm->bindCfgNodeLabel(getIndex());
1188   for (const Inst &I : Phis) {
1189     if (I.isDeleted())
1190       continue;
1191     // Emitting a Phi instruction should cause an error.
1192     I.emitIAS(Func);
1193   }
1194
1195   // Do the simple emission if not sandboxed.
1196   if (!Func->getContext()->getFlags().getUseSandboxing()) {
1197     for (const Inst &I : Insts) {
1198       if (!I.isDeleted() && !I.isRedundantAssign()) {
1199         I.emitIAS(Func);
1200         updateStats(Func, &I);
1201       }
1202     }
1203     return;
1204   }
1205
1206   // The remainder of the function handles emission with sandboxing. There are
1207   // explicit bundle_lock regions delimited by bundle_lock and bundle_unlock
1208   // instructions. All other instructions are treated as an implicit
1209   // one-instruction bundle_lock region. Emission is done twice for each
1210   // bundle_lock region. The first pass is a preliminary pass, after which we
1211   // can figure out what nop padding is needed, then roll back, and make the
1212   // final pass.
1213   //
1214   // Ideally, the first pass would be speculative and the second pass would
1215   // only be done if nop padding were needed, but the structure of the
1216   // integrated assembler makes it hard to roll back the state of label
1217   // bindings, label links, and relocation fixups. Instead, the first pass just
1218   // disables all mutation of that state.
1219
1220   BundleEmitHelper Helper(Asm, Func->getTarget(), Insts);
1221   InstList::const_iterator End = Insts.end();
1222   // Retrying indicates that we had to roll back to the bundle_lock instruction
1223   // to apply padding before the bundle_lock sequence.
1224   bool Retrying = false;
1225   for (InstList::const_iterator I = Insts.begin(); I != End; ++I) {
1226     if (I->isDeleted() || I->isRedundantAssign())
1227       continue;
1228
1229     if (llvm::isa<InstBundleLock>(I)) {
1230       // Set up the initial bundle_lock state. This should not happen while
1231       // retrying, because the retry rolls back to the instruction following
1232       // the bundle_lock instruction.
1233       assert(!Retrying);
1234       Helper.enterBundleLock(I);
1235       continue;
1236     }
1237
1238     if (llvm::isa<InstBundleUnlock>(I)) {
1239       Helper.enterBundleUnlock();
1240       if (Retrying) {
1241         // Make sure all instructions are in the same bundle.
1242         assert(Helper.isSameBundle());
1243         // If align_to_end is specified, make sure the next instruction begins
1244         // the bundle.
1245         assert(!Helper.isAlignToEnd() || Helper.getPostAlignment() == 0);
1246         Helper.leaveBundleLockRegion();
1247         Retrying = false;
1248       } else {
1249         // This is the first pass, so roll back for the retry pass.
1250         Helper.rollback();
1251         // Pad to the next bundle if the instruction sequence crossed a bundle
1252         // boundary.
1253         Helper.padToNextBundle();
1254         // Insert additional padding to make AlignToEnd work.
1255         Helper.padForAlignToEnd();
1256         // Prepare for the retry pass after padding is done.
1257         Retrying = true;
1258         I = Helper.getBundleLockStart();
1259       }
1260       continue;
1261     }
1262
1263     // I points to a non bundle_lock/bundle_unlock instruction.
1264     if (Helper.isInBundleLockRegion()) {
1265       I->emitIAS(Func);
1266       // Only update stats during the final pass.
1267       if (Retrying)
1268         updateStats(Func, I);
1269     } else {
1270       // Treat it as though there were an implicit bundle_lock and
1271       // bundle_unlock wrapping the instruction.
1272       Helper.enterBundleLock(I);
1273       I->emitIAS(Func);
1274       Helper.enterBundleUnlock();
1275       Helper.rollback();
1276       Helper.padToNextBundle();
1277       I->emitIAS(Func);
1278       updateStats(Func, I);
1279       Helper.leaveBundleLockRegion();
1280     }
1281   }
1282
1283   // Don't allow bundle locking across basic blocks, to keep the backtracking
1284   // mechanism simple.
1285   assert(!Helper.isInBundleLockRegion());
1286   assert(!Retrying);
1287 }
1288
1289 void CfgNode::dump(Cfg *Func) const {
1290   if (!BuildDefs::dump())
1291     return;
1292   Func->setCurrentNode(this);
1293   Ostream &Str = Func->getContext()->getStrDump();
1294   Liveness *Liveness = Func->getLiveness();
1295   if (Func->isVerbose(IceV_Instructions) || Func->isVerbose(IceV_Loop))
1296     Str << getName() << ":\n";
1297   // Dump the loop nest depth
1298   if (Func->isVerbose(IceV_Loop))
1299     Str << "    // LoopNestDepth = " << getLoopNestDepth() << "\n";
1300   // Dump list of predecessor nodes.
1301   if (Func->isVerbose(IceV_Preds) && !InEdges.empty()) {
1302     Str << "    // preds = ";
1303     bool First = true;
1304     for (CfgNode *I : InEdges) {
1305       if (!First)
1306         Str << ", ";
1307       First = false;
1308       Str << "%" << I->getName();
1309     }
1310     Str << "\n";
1311   }
1312   // Dump the live-in variables.
1313   LivenessBV LiveIn;
1314   if (Liveness)
1315     LiveIn = Liveness->getLiveIn(this);
1316   if (Func->isVerbose(IceV_Liveness) && !LiveIn.empty()) {
1317     Str << "    // LiveIn:";
1318     for (SizeT i = 0; i < LiveIn.size(); ++i) {
1319       if (LiveIn[i]) {
1320         Variable *Var = Liveness->getVariable(i, this);
1321         Str << " %" << Var->getName(Func);
1322         if (Func->isVerbose(IceV_RegOrigins) && Var->hasReg()) {
1323           Str << ":"
1324               << Func->getTarget()->getRegName(Var->getRegNum(),
1325                                                Var->getType());
1326         }
1327       }
1328     }
1329     Str << "\n";
1330   }
1331   // Dump each instruction.
1332   if (Func->isVerbose(IceV_Instructions)) {
1333     for (const Inst &I : Phis)
1334       I.dumpDecorated(Func);
1335     for (const Inst &I : Insts)
1336       I.dumpDecorated(Func);
1337   }
1338   // Dump the live-out variables.
1339   LivenessBV LiveOut;
1340   if (Liveness)
1341     LiveOut = Liveness->getLiveOut(this);
1342   if (Func->isVerbose(IceV_Liveness) && !LiveOut.empty()) {
1343     Str << "    // LiveOut:";
1344     for (SizeT i = 0; i < LiveOut.size(); ++i) {
1345       if (LiveOut[i]) {
1346         Variable *Var = Liveness->getVariable(i, this);
1347         Str << " %" << Var->getName(Func);
1348         if (Func->isVerbose(IceV_RegOrigins) && Var->hasReg()) {
1349           Str << ":"
1350               << Func->getTarget()->getRegName(Var->getRegNum(),
1351                                                Var->getType());
1352         }
1353       }
1354     }
1355     Str << "\n";
1356   }
1357   // Dump list of successor nodes.
1358   if (Func->isVerbose(IceV_Succs)) {
1359     Str << "    // succs = ";
1360     bool First = true;
1361     for (CfgNode *I : OutEdges) {
1362       if (!First)
1363         Str << ", ";
1364       First = false;
1365       Str << "%" << I->getName();
1366     }
1367     Str << "\n";
1368   }
1369 }
1370
1371 void CfgNode::profileExecutionCount(VariableDeclaration *Var) {
1372   constexpr char RMW_I64[] = "llvm.nacl.atomic.rmw.i64";
1373
1374   GlobalContext *Context = Func->getContext();
1375
1376   bool BadIntrinsic = false;
1377   const Intrinsics::FullIntrinsicInfo *Info =
1378       Context->getIntrinsicsInfo().find(RMW_I64, BadIntrinsic);
1379   assert(!BadIntrinsic);
1380   assert(Info != nullptr);
1381
1382   Operand *RMWI64Name = Context->getConstantExternSym(RMW_I64);
1383   constexpr RelocOffsetT Offset = 0;
1384   constexpr bool SuppressMangling = true;
1385   Constant *Counter =
1386       Context->getConstantSym(Offset, Var->getName(), SuppressMangling);
1387   Constant *AtomicRMWOp = Context->getConstantInt32(Intrinsics::AtomicAdd);
1388   Constant *One = Context->getConstantInt64(1);
1389   Constant *OrderAcquireRelease =
1390       Context->getConstantInt32(Intrinsics::MemoryOrderAcquireRelease);
1391
1392   InstIntrinsicCall *Inst = InstIntrinsicCall::create(
1393       Func, 5, Func->makeVariable(IceType_i64), RMWI64Name, Info->Info);
1394   Inst->addArg(AtomicRMWOp);
1395   Inst->addArg(Counter);
1396   Inst->addArg(One);
1397   Inst->addArg(OrderAcquireRelease);
1398   Insts.push_front(Inst);
1399 }
1400
1401 } // end of namespace Ice