1 //===- subzero/src/IceTargetLowering.cpp - Basic lowering implementation --===//
3 // The Subzero Code Generator
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief Implements the skeleton of the TargetLowering class.
13 /// Specifically this invokes the appropriate lowering method for a given
14 /// instruction kind and driving global register allocation. It also implements
15 /// the non-deleted instruction iteration in LoweringContext.
17 //===----------------------------------------------------------------------===//
19 #include "IceTargetLowering.h"
21 #include "IceBitVector.h"
22 #include "IceCfg.h" // setError()
23 #include "IceCfgNode.h"
24 #include "IceGlobalContext.h"
25 #include "IceGlobalInits.h"
26 #include "IceInstVarIter.h"
27 #include "IceOperand.h"
28 #include "IceRegAlloc.h"
33 #define TARGET_LOWERING_CLASS_FOR(t) Target_##t
35 // We prevent target-specific implementation details from leaking outside their
36 // implementations by forbidding #include of target-specific header files
37 // anywhere outside their own files. To create target-specific objects
38 // (TargetLowering, TargetDataLowering, and TargetHeaderLowering) we use the
39 // following named constructors. For reference, each target Foo needs to
40 // implement the following named constructors and initializer:
43 // unique_ptr<Ice::TargetLowering> createTargetLowering(Ice::Cfg *);
44 // unique_ptr<Ice::TargetDataLowering>
45 // createTargetDataLowering(Ice::GlobalContext*);
46 // unique_ptr<Ice::TargetHeaderLowering>
47 // createTargetHeaderLowering(Ice::GlobalContext *);
48 // void staticInit(::Ice::GlobalContext *);
50 #define SUBZERO_TARGET(X) \
52 std::unique_ptr<::Ice::TargetLowering> \
53 createTargetLowering(::Ice::Cfg *Func); \
54 std::unique_ptr<::Ice::TargetDataLowering> \
55 createTargetDataLowering(::Ice::GlobalContext *Ctx); \
56 std::unique_ptr<::Ice::TargetHeaderLowering> \
57 createTargetHeaderLowering(::Ice::GlobalContext *Ctx); \
58 void staticInit(::Ice::GlobalContext *Ctx); \
59 bool shouldBePooled(const ::Ice::Constant *C); \
60 } // end of namespace X
61 #include "SZTargets.def"
65 void LoweringContext::init(CfgNode *N) {
67 End = getNode()->getInsts().end();
72 void LoweringContext::rewind() {
73 Begin = getNode()->getInsts().begin();
80 void LoweringContext::insert(Inst *Instr) {
81 getNode()->getInsts().insert(Next, Instr);
85 void LoweringContext::skipDeleted(InstList::iterator &I) const {
86 while (I != End && I->isDeleted())
90 void LoweringContext::advanceForward(InstList::iterator &I) const {
97 Inst *LoweringContext::getLastInserted() const {
102 void LoweringContext::availabilityReset() {
107 void LoweringContext::availabilityUpdate() {
109 Inst *Instr = LastInserted;
110 if (Instr == nullptr)
112 if (!Instr->isVarAssign())
114 // Since isVarAssign() is true, the source operand must be a Variable.
115 LastDest = Instr->getDest();
116 LastSrc = llvm::cast<Variable>(Instr->getSrc(0));
119 Variable *LoweringContext::availabilityGet(Operand *Src) const {
128 void printRegisterSet(Ostream &Str, const SmallBitVector &Bitset,
129 std::function<std::string(RegNumT)> getRegName,
130 const std::string &LineIndentString) {
131 constexpr size_t RegistersPerLine = 16;
133 for (RegNumT RegNum : RegNumBVIter(Bitset)) {
135 Str << LineIndentString;
139 if (Count > 0 && Count % RegistersPerLine == 0)
140 Str << "\n" << LineIndentString;
142 Str << getRegName(RegNum);
148 // Splits "<class>:<reg>" into "<class>" plus "<reg>". If there is no <class>
149 // component, the result is "" plus "<reg>".
150 void splitToClassAndName(const std::string &RegName, std::string *SplitRegClass,
151 std::string *SplitRegName) {
152 constexpr const char Separator[] = ":";
153 constexpr size_t SeparatorWidth = llvm::array_lengthof(Separator) - 1;
154 size_t Pos = RegName.find(Separator);
155 if (Pos == std::string::npos) {
157 *SplitRegName = RegName;
159 *SplitRegClass = RegName.substr(0, Pos);
160 *SplitRegName = RegName.substr(Pos + SeparatorWidth);
164 LLVM_ATTRIBUTE_NORETURN void badTargetFatalError(TargetArch Target) {
165 llvm::report_fatal_error("Unsupported target: " +
166 std::string(targetArchString(Target)));
169 } // end of anonymous namespace
171 void TargetLowering::filterTypeToRegisterSet(
172 GlobalContext *Ctx, int32_t NumRegs, SmallBitVector TypeToRegisterSet[],
173 size_t TypeToRegisterSetSize,
174 std::function<std::string(RegNumT)> getRegName,
175 std::function<const char *(RegClass)> getRegClassName) {
176 std::vector<SmallBitVector> UseSet(TypeToRegisterSetSize,
177 SmallBitVector(NumRegs));
178 std::vector<SmallBitVector> ExcludeSet(TypeToRegisterSetSize,
179 SmallBitVector(NumRegs));
181 std::unordered_map<std::string, RegNumT> RegNameToIndex;
182 for (int32_t RegIndex = 0; RegIndex < NumRegs; ++RegIndex) {
183 const auto RegNum = RegNumT::fromInt(RegIndex);
184 RegNameToIndex[getRegName(RegNum)] = RegNum;
187 std::vector<std::string> BadRegNames;
189 // The processRegList function iterates across the RegNames vector. Each
190 // entry in the vector is a string of the form "<reg>" or "<class>:<reg>".
191 // The register class and register number are computed, and the corresponding
192 // bit is set in RegSet[][]. If "<class>:" is missing, then the bit is set
194 auto processRegList = [&](const std::vector<std::string> &RegNames,
195 std::vector<SmallBitVector> &RegSet) {
196 for (const std::string &RegClassAndName : RegNames) {
199 splitToClassAndName(RegClassAndName, &RClass, &RName);
200 if (!RegNameToIndex.count(RName)) {
201 BadRegNames.push_back(RName);
204 const int32_t RegIndex = RegNameToIndex.at(RName);
205 for (SizeT TypeIndex = 0; TypeIndex < TypeToRegisterSetSize;
207 if (RClass.empty() ||
208 RClass == getRegClassName(static_cast<RegClass>(TypeIndex))) {
209 RegSet[TypeIndex][RegIndex] = TypeToRegisterSet[TypeIndex][RegIndex];
215 processRegList(getFlags().getUseRestrictedRegisters(), UseSet);
216 processRegList(getFlags().getExcludedRegisters(), ExcludeSet);
218 if (!BadRegNames.empty()) {
220 llvm::raw_string_ostream StrBuf(Buffer);
221 StrBuf << "Unrecognized use/exclude registers:";
222 for (const auto &RegName : BadRegNames)
223 StrBuf << " " << RegName;
224 llvm::report_fatal_error(StrBuf.str());
228 for (size_t TypeIndex = 0; TypeIndex < TypeToRegisterSetSize; ++TypeIndex) {
229 SmallBitVector *TypeBitSet = &TypeToRegisterSet[TypeIndex];
230 SmallBitVector *UseBitSet = &UseSet[TypeIndex];
231 SmallBitVector *ExcludeBitSet = &ExcludeSet[TypeIndex];
232 if (UseBitSet->any())
233 *TypeBitSet = *UseBitSet;
234 (*TypeBitSet).reset(*ExcludeBitSet);
237 // Display filtered register sets, if requested.
238 if (BuildDefs::dump() && NumRegs &&
239 (getFlags().getVerbose() & IceV_AvailableRegs)) {
240 Ostream &Str = Ctx->getStrDump();
241 const std::string Indent = " ";
242 const std::string IndentTwice = Indent + Indent;
243 Str << "Registers available for register allocation:\n";
244 for (size_t TypeIndex = 0; TypeIndex < TypeToRegisterSetSize; ++TypeIndex) {
245 Str << Indent << getRegClassName(static_cast<RegClass>(TypeIndex))
247 printRegisterSet(Str, TypeToRegisterSet[TypeIndex], getRegName,
254 std::unique_ptr<TargetLowering>
255 TargetLowering::createLowering(TargetArch Target, Cfg *Func) {
258 badTargetFatalError(Target);
259 #define SUBZERO_TARGET(X) \
260 case TARGET_LOWERING_CLASS_FOR(X): \
261 return ::X::createTargetLowering(Func);
262 #include "SZTargets.def"
263 #undef SUBZERO_TARGET
267 void TargetLowering::staticInit(GlobalContext *Ctx) {
268 const TargetArch Target = getFlags().getTargetArch();
269 // Call the specified target's static initializer.
272 badTargetFatalError(Target);
273 #define SUBZERO_TARGET(X) \
274 case TARGET_LOWERING_CLASS_FOR(X): { \
275 static bool InitGuard##X = false; \
276 if (InitGuard##X) { \
279 InitGuard##X = true; \
280 ::X::staticInit(Ctx); \
282 #include "SZTargets.def"
283 #undef SUBZERO_TARGET
287 bool TargetLowering::shouldBePooled(const Constant *C) {
288 const TargetArch Target = getFlags().getTargetArch();
292 #define SUBZERO_TARGET(X) \
293 case TARGET_LOWERING_CLASS_FOR(X): \
294 return ::X::shouldBePooled(C);
295 #include "SZTargets.def"
296 #undef SUBZERO_TARGET
300 TargetLowering::SandboxType
301 TargetLowering::determineSandboxTypeFromFlags(const ClFlags &Flags) {
302 assert(!Flags.getUseSandboxing() || !Flags.getUseNonsfi());
303 if (Flags.getUseNonsfi()) {
304 return TargetLowering::ST_Nonsfi;
306 if (Flags.getUseSandboxing()) {
307 return TargetLowering::ST_NaCl;
309 return TargetLowering::ST_None;
312 TargetLowering::TargetLowering(Cfg *Func)
313 : Func(Func), Ctx(Func->getContext()),
314 SandboxingType(determineSandboxTypeFromFlags(getFlags())) {}
316 TargetLowering::AutoBundle::AutoBundle(TargetLowering *Target,
317 InstBundleLock::Option Option)
318 : Target(Target), NeedSandboxing(getFlags().getUseSandboxing()) {
319 assert(!Target->AutoBundling);
320 Target->AutoBundling = true;
321 if (NeedSandboxing) {
322 Target->_bundle_lock(Option);
326 TargetLowering::AutoBundle::~AutoBundle() {
327 assert(Target->AutoBundling);
328 Target->AutoBundling = false;
329 if (NeedSandboxing) {
330 Target->_bundle_unlock();
334 void TargetLowering::genTargetHelperCalls() {
335 TimerMarker T(TimerStack::TT_genHelpers, Func);
336 Utils::BoolFlagSaver _(GeneratingTargetHelpers, true);
337 for (CfgNode *Node : Func->getNodes()) {
339 while (!Context.atEnd()) {
340 PostIncrLoweringContext _(Context);
341 genTargetHelperCallFor(Context.getCur());
346 void TargetLowering::doAddressOpt() {
347 if (llvm::isa<InstLoad>(*Context.getCur()))
349 else if (llvm::isa<InstStore>(*Context.getCur()))
351 Context.advanceCur();
352 Context.advanceNext();
355 void TargetLowering::doNopInsertion(RandomNumberGenerator &RNG) {
356 Inst *I = Context.getCur();
357 bool ShouldSkip = llvm::isa<InstFakeUse>(I) || llvm::isa<InstFakeDef>(I) ||
358 llvm::isa<InstFakeKill>(I) || I->isRedundantAssign() ||
361 int Probability = getFlags().getNopProbabilityAsPercentage();
362 for (int I = 0; I < getFlags().getMaxNopsPerInstruction(); ++I) {
363 randomlyInsertNop(Probability / 100.0, RNG);
368 // Lowers a single instruction according to the information in Context, by
369 // checking the Context.Cur instruction kind and calling the appropriate
370 // lowering method. The lowering method should insert target instructions at
371 // the Cur.Next insertion point, and should not delete the Context.Cur
372 // instruction or advance Context.Cur.
374 // The lowering method may look ahead in the instruction stream as desired, and
375 // lower additional instructions in conjunction with the current one, for
376 // example fusing a compare and branch. If it does, it should advance
377 // Context.Cur to point to the next non-deleted instruction to process, and it
378 // should delete any additional instructions it consumes.
379 void TargetLowering::lower() {
380 assert(!Context.atEnd());
381 Inst *Instr = Context.getCur();
382 Instr->deleteIfDead();
383 if (!Instr->isDeleted() && !llvm::isa<InstFakeDef>(Instr) &&
384 !llvm::isa<InstFakeUse>(Instr)) {
385 // Mark the current instruction as deleted before lowering, otherwise the
386 // Dest variable will likely get marked as non-SSA. See
387 // Variable::setDefinition(). However, just pass-through FakeDef and
388 // FakeUse instructions that might have been inserted prior to lowering.
390 switch (Instr->getKind()) {
392 lowerAlloca(llvm::cast<InstAlloca>(Instr));
394 case Inst::Arithmetic:
395 lowerArithmetic(llvm::cast<InstArithmetic>(Instr));
398 lowerAssign(llvm::cast<InstAssign>(Instr));
401 lowerBr(llvm::cast<InstBr>(Instr));
403 case Inst::Breakpoint:
404 lowerBreakpoint(llvm::cast<InstBreakpoint>(Instr));
407 lowerCall(llvm::cast<InstCall>(Instr));
410 lowerCast(llvm::cast<InstCast>(Instr));
412 case Inst::ExtractElement:
413 lowerExtractElement(llvm::cast<InstExtractElement>(Instr));
416 lowerFcmp(llvm::cast<InstFcmp>(Instr));
419 lowerIcmp(llvm::cast<InstIcmp>(Instr));
421 case Inst::InsertElement:
422 lowerInsertElement(llvm::cast<InstInsertElement>(Instr));
424 case Inst::IntrinsicCall: {
425 auto *Call = llvm::cast<InstIntrinsicCall>(Instr);
426 if (Call->getIntrinsicInfo().ReturnsTwice)
427 setCallsReturnsTwice(true);
428 lowerIntrinsicCall(Call);
432 lowerLoad(llvm::cast<InstLoad>(Instr));
435 lowerPhi(llvm::cast<InstPhi>(Instr));
438 lowerRet(llvm::cast<InstRet>(Instr));
441 lowerSelect(llvm::cast<InstSelect>(Instr));
443 case Inst::ShuffleVector:
444 lowerShuffleVector(llvm::cast<InstShuffleVector>(Instr));
447 lowerStore(llvm::cast<InstStore>(Instr));
450 lowerSwitch(llvm::cast<InstSwitch>(Instr));
452 case Inst::Unreachable:
453 lowerUnreachable(llvm::cast<InstUnreachable>(Instr));
463 Context.advanceCur();
464 Context.advanceNext();
467 void TargetLowering::lowerInst(CfgNode *Node, InstList::iterator Next,
468 InstHighLevel *Instr) {
469 // TODO(stichnot): Consider modifying the design/implementation to avoid
470 // multiple init() calls when using lowerInst() to lower several instructions
473 Context.setNext(Next);
474 Context.insert(Instr);
476 assert(&*Next == Instr);
477 Context.setCur(Next);
481 void TargetLowering::lowerOther(const Inst *Instr) {
483 Func->setError("Can't lower unsupported instruction type");
486 // Drives register allocation, allowing all physical registers (except perhaps
487 // for the frame pointer) to be allocated. This set of registers could
488 // potentially be parameterized if we want to restrict registers e.g. for
489 // performance testing.
490 void TargetLowering::regAlloc(RegAllocKind Kind) {
491 TimerMarker T(TimerStack::TT_regAlloc, Func);
492 LinearScan LinearScan(Func);
493 RegSetMask RegInclude = RegSet_None;
494 RegSetMask RegExclude = RegSet_None;
495 RegInclude |= RegSet_CallerSave;
496 RegInclude |= RegSet_CalleeSave;
497 if (hasFramePointer())
498 RegExclude |= RegSet_FramePointer;
499 SmallBitVector RegMask = getRegisterSet(RegInclude, RegExclude);
500 bool Repeat = (Kind == RAK_Global && getFlags().getRepeatRegAlloc());
502 LinearScan.init(Kind);
503 LinearScan.scan(RegMask, getFlags().getRandomizeRegisterAllocation());
504 if (!LinearScan.hasEvictions())
506 Kind = RAK_SecondChance;
508 // TODO(stichnot): Run the register allocator one more time to do stack slot
509 // coalescing. The idea would be to initialize the Unhandled list with the
510 // set of Variables that have no register and a non-empty live range, and
511 // model an infinite number of registers. Maybe use the register aliasing
512 // mechanism to get better packing of narrower slots.
515 void TargetLowering::markRedefinitions() {
516 // Find (non-SSA) instructions where the Dest variable appears in some source
517 // operand, and set the IsDestRedefined flag to keep liveness analysis
519 for (auto Instr = Context.getCur(), E = Context.getNext(); Instr != E;
521 if (Instr->isDeleted())
523 Variable *Dest = Instr->getDest();
526 FOREACH_VAR_IN_INST(Var, *Instr) {
528 Instr->setDestRedefined();
535 void TargetLowering::addFakeDefUses(const Inst *Instr) {
536 FOREACH_VAR_IN_INST(Var, *Instr) {
537 if (auto *Var64 = llvm::dyn_cast<Variable64On32>(Var)) {
538 Context.insert<InstFakeUse>(Var64->getLo());
539 Context.insert<InstFakeUse>(Var64->getHi());
541 Context.insert<InstFakeUse>(Var);
544 Variable *Dest = Instr->getDest();
547 if (auto *Var64 = llvm::dyn_cast<Variable64On32>(Dest)) {
548 Context.insert<InstFakeDef>(Var64->getLo());
549 Context.insert<InstFakeDef>(Var64->getHi());
551 Context.insert<InstFakeDef>(Dest);
555 void TargetLowering::sortVarsByAlignment(VarList &Dest,
556 const VarList &Source) const {
558 // Instead of std::sort, we could do a bucket sort with log2(alignment) as
559 // the buckets, if performance is an issue.
560 std::sort(Dest.begin(), Dest.end(),
561 [this](const Variable *V1, const Variable *V2) {
562 return typeWidthInBytesOnStack(V1->getType()) >
563 typeWidthInBytesOnStack(V2->getType());
567 void TargetLowering::getVarStackSlotParams(
568 VarList &SortedSpilledVariables, SmallBitVector &RegsUsed,
569 size_t *GlobalsSize, size_t *SpillAreaSizeBytes,
570 uint32_t *SpillAreaAlignmentBytes, uint32_t *LocalsSlotsAlignmentBytes,
571 std::function<bool(Variable *)> TargetVarHook) {
572 const VariablesMetadata *VMetadata = Func->getVMetadata();
573 BitVector IsVarReferenced(Func->getNumVariables());
574 for (CfgNode *Node : Func->getNodes()) {
575 for (Inst &Instr : Node->getInsts()) {
576 if (Instr.isDeleted())
578 if (const Variable *Var = Instr.getDest())
579 IsVarReferenced[Var->getIndex()] = true;
580 FOREACH_VAR_IN_INST(Var, Instr) {
581 IsVarReferenced[Var->getIndex()] = true;
586 // If SimpleCoalescing is false, each variable without a register gets its
587 // own unique stack slot, which leads to large stack frames. If
588 // SimpleCoalescing is true, then each "global" variable without a register
589 // gets its own slot, but "local" variable slots are reused across basic
590 // blocks. E.g., if A and B are local to block 1 and C is local to block 2,
591 // then C may share a slot with A or B.
593 // We cannot coalesce stack slots if this function calls a "returns twice"
594 // function. In that case, basic blocks may be revisited, and variables local
595 // to those basic blocks are actually live until after the called function
596 // returns a second time.
597 const bool SimpleCoalescing = !callsReturnsTwice();
599 CfgVector<size_t> LocalsSize(Func->getNumNodes());
600 const VarList &Variables = Func->getVariables();
601 VarList SpilledVariables;
602 for (Variable *Var : Variables) {
604 // Don't consider a rematerializable variable to be an actual register use
605 // (specifically of the frame pointer). Otherwise, the prolog may decide
606 // to save the frame pointer twice - once because of the explicit need for
607 // a frame pointer, and once because of an active use of a callee-save
609 if (!Var->isRematerializable())
610 RegsUsed[Var->getRegNum()] = true;
613 // An argument either does not need a stack slot (if passed in a register)
614 // or already has one (if passed on the stack).
617 // An unreferenced variable doesn't need a stack slot.
618 if (!IsVarReferenced[Var->getIndex()])
620 // Check a target-specific variable (it may end up sharing stack slots) and
621 // not need accounting here.
622 if (TargetVarHook(Var))
624 SpilledVariables.push_back(Var);
627 SortedSpilledVariables.reserve(SpilledVariables.size());
628 sortVarsByAlignment(SortedSpilledVariables, SpilledVariables);
630 for (Variable *Var : SortedSpilledVariables) {
631 size_t Increment = typeWidthInBytesOnStack(Var->getType());
632 // We have sorted by alignment, so the first variable we encounter that is
633 // located in each area determines the max alignment for the area.
634 if (!*SpillAreaAlignmentBytes)
635 *SpillAreaAlignmentBytes = Increment;
636 if (SimpleCoalescing && VMetadata->isTracked(Var)) {
637 if (VMetadata->isMultiBlock(Var)) {
638 *GlobalsSize += Increment;
640 SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex();
641 LocalsSize[NodeIndex] += Increment;
642 if (LocalsSize[NodeIndex] > *SpillAreaSizeBytes)
643 *SpillAreaSizeBytes = LocalsSize[NodeIndex];
644 if (!*LocalsSlotsAlignmentBytes)
645 *LocalsSlotsAlignmentBytes = Increment;
648 *SpillAreaSizeBytes += Increment;
651 // For testing legalization of large stack offsets on targets with limited
652 // offset bits in instruction encodings, add some padding.
653 *SpillAreaSizeBytes += getFlags().getTestStackExtra();
656 void TargetLowering::alignStackSpillAreas(uint32_t SpillAreaStartOffset,
657 uint32_t SpillAreaAlignmentBytes,
659 uint32_t LocalsSlotsAlignmentBytes,
660 uint32_t *SpillAreaPaddingBytes,
661 uint32_t *LocalsSlotsPaddingBytes) {
662 if (SpillAreaAlignmentBytes) {
663 uint32_t PaddingStart = SpillAreaStartOffset;
664 uint32_t SpillAreaStart =
665 Utils::applyAlignment(PaddingStart, SpillAreaAlignmentBytes);
666 *SpillAreaPaddingBytes = SpillAreaStart - PaddingStart;
669 // If there are separate globals and locals areas, make sure the locals area
670 // is aligned by padding the end of the globals area.
671 if (LocalsSlotsAlignmentBytes) {
672 uint32_t GlobalsAndSubsequentPaddingSize = GlobalsSize;
673 GlobalsAndSubsequentPaddingSize =
674 Utils::applyAlignment(GlobalsSize, LocalsSlotsAlignmentBytes);
675 *LocalsSlotsPaddingBytes = GlobalsAndSubsequentPaddingSize - GlobalsSize;
679 void TargetLowering::assignVarStackSlots(VarList &SortedSpilledVariables,
680 size_t SpillAreaPaddingBytes,
681 size_t SpillAreaSizeBytes,
682 size_t GlobalsAndSubsequentPaddingSize,
683 bool UsesFramePointer) {
684 const VariablesMetadata *VMetadata = Func->getVMetadata();
685 // For testing legalization of large stack offsets on targets with limited
686 // offset bits in instruction encodings, add some padding. This assumes that
687 // SpillAreaSizeBytes has accounted for the extra test padding. When
688 // UseFramePointer is true, the offset depends on the padding, not just the
689 // SpillAreaSizeBytes. On the other hand, when UseFramePointer is false, the
690 // offsets depend on the gap between SpillAreaSizeBytes and
691 // SpillAreaPaddingBytes, so we don't increment that.
692 size_t TestPadding = getFlags().getTestStackExtra();
693 if (UsesFramePointer)
694 SpillAreaPaddingBytes += TestPadding;
695 size_t GlobalsSpaceUsed = SpillAreaPaddingBytes;
696 size_t NextStackOffset = SpillAreaPaddingBytes;
697 CfgVector<size_t> LocalsSize(Func->getNumNodes());
698 const bool SimpleCoalescing = !callsReturnsTwice();
700 for (Variable *Var : SortedSpilledVariables) {
701 size_t Increment = typeWidthInBytesOnStack(Var->getType());
702 if (SimpleCoalescing && VMetadata->isTracked(Var)) {
703 if (VMetadata->isMultiBlock(Var)) {
704 GlobalsSpaceUsed += Increment;
705 NextStackOffset = GlobalsSpaceUsed;
707 SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex();
708 LocalsSize[NodeIndex] += Increment;
709 NextStackOffset = SpillAreaPaddingBytes +
710 GlobalsAndSubsequentPaddingSize +
711 LocalsSize[NodeIndex];
714 NextStackOffset += Increment;
716 if (UsesFramePointer)
717 Var->setStackOffset(-NextStackOffset);
719 Var->setStackOffset(SpillAreaSizeBytes - NextStackOffset);
723 InstCall *TargetLowering::makeHelperCall(RuntimeHelper FuncID, Variable *Dest,
725 constexpr bool HasTailCall = false;
726 Constant *CallTarget = Ctx->getRuntimeHelperFunc(FuncID);
728 InstCall::create(Func, MaxSrcs, Dest, CallTarget, HasTailCall);
732 bool TargetLowering::shouldOptimizeMemIntrins() {
733 return getFlags().getOptLevel() >= Opt_1 || getFlags().getForceMemIntrinOpt();
736 void TargetLowering::scalarizeArithmetic(InstArithmetic::OpKind Kind,
737 Variable *Dest, Operand *Src0,
739 scalarizeInstruction(
740 Dest, [this, Kind](Variable *Dest, Operand *Src0, Operand *Src1) {
741 return Context.insert<InstArithmetic>(Kind, Dest, Src0, Src1);
745 void TargetLowering::emitWithoutPrefix(const ConstantRelocatable *C,
746 const char *Suffix) const {
747 if (!BuildDefs::dump())
749 Ostream &Str = Ctx->getStrEmit();
750 const std::string &EmitStr = C->getEmitString();
751 if (!EmitStr.empty()) {
752 // C has a custom emit string, so we use it instead of the canonical
753 // Name + Offset form.
757 Str << C->getName() << Suffix;
758 RelocOffsetT Offset = C->getOffset();
766 std::unique_ptr<TargetDataLowering>
767 TargetDataLowering::createLowering(GlobalContext *Ctx) {
768 TargetArch Target = getFlags().getTargetArch();
771 badTargetFatalError(Target);
772 #define SUBZERO_TARGET(X) \
773 case TARGET_LOWERING_CLASS_FOR(X): \
774 return ::X::createTargetDataLowering(Ctx);
775 #include "SZTargets.def"
776 #undef SUBZERO_TARGET
780 TargetDataLowering::~TargetDataLowering() = default;
784 // dataSectionSuffix decides whether to use SectionSuffix or VarName as data
785 // section suffix. Essentially, when using separate data sections for globals
786 // SectionSuffix is not necessary.
787 std::string dataSectionSuffix(const std::string &SectionSuffix,
788 const std::string &VarName,
789 const bool DataSections) {
790 if (SectionSuffix.empty() && !DataSections) {
795 // With data sections we don't need to use the SectionSuffix.
796 return "." + VarName;
799 assert(!SectionSuffix.empty());
800 return "." + SectionSuffix;
803 } // end of anonymous namespace
805 void TargetDataLowering::emitGlobal(const VariableDeclaration &Var,
806 const std::string &SectionSuffix) {
807 if (!BuildDefs::dump())
810 // If external and not initialized, this must be a cross test. Don't generate
811 // a declaration for such cases.
812 const bool IsExternal = Var.isExternal() || getFlags().getDisableInternal();
813 if (IsExternal && !Var.hasInitializer())
816 Ostream &Str = Ctx->getStrEmit();
817 const bool HasNonzeroInitializer = Var.hasNonzeroInitializer();
818 const bool IsConstant = Var.getIsConstant();
819 const SizeT Size = Var.getNumBytes();
820 const std::string Name = Var.getName().toString();
822 Str << "\t.type\t" << Name << ",%object\n";
824 const bool UseDataSections = getFlags().getDataSections();
825 const bool UseNonsfi = getFlags().getUseNonsfi();
826 const std::string Suffix =
827 dataSectionSuffix(SectionSuffix, Name, UseDataSections);
828 if (IsConstant && UseNonsfi)
829 Str << "\t.section\t.data.rel.ro" << Suffix << ",\"aw\",%progbits\n";
831 Str << "\t.section\t.rodata" << Suffix << ",\"a\",%progbits\n";
832 else if (HasNonzeroInitializer)
833 Str << "\t.section\t.data" << Suffix << ",\"aw\",%progbits\n";
835 Str << "\t.section\t.bss" << Suffix << ",\"aw\",%nobits\n";
838 Str << "\t.globl\t" << Name << "\n";
840 const uint32_t Align = Var.getAlignment();
842 assert(llvm::isPowerOf2_32(Align));
843 // Use the .p2align directive, since the .align N directive can either
844 // interpret N as bytes, or power of 2 bytes, depending on the target.
845 Str << "\t.p2align\t" << llvm::Log2_32(Align) << "\n";
848 Str << Name << ":\n";
850 if (HasNonzeroInitializer) {
851 for (const auto *Init : Var.getInitializers()) {
852 switch (Init->getKind()) {
853 case VariableDeclaration::Initializer::DataInitializerKind: {
855 llvm::cast<VariableDeclaration::DataInitializer>(Init)
857 for (SizeT i = 0; i < Init->getNumBytes(); ++i) {
858 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
862 case VariableDeclaration::Initializer::ZeroInitializerKind:
863 Str << "\t.zero\t" << Init->getNumBytes() << "\n";
865 case VariableDeclaration::Initializer::RelocInitializerKind: {
867 llvm::cast<VariableDeclaration::RelocInitializer>(Init);
868 Str << "\t" << getEmit32Directive() << "\t";
869 Str << Reloc->getDeclaration()->getName();
870 if (Reloc->hasFixup()) {
871 // TODO(jpp): this is ARM32 specific.
874 if (RelocOffsetT Offset = Reloc->getOffset()) {
875 if (Offset >= 0 || (Offset == INT32_MIN))
876 Str << " + " << Offset;
878 Str << " - " << -Offset;
886 // NOTE: for non-constant zero initializers, this is BSS (no bits), so an
887 // ELF writer would not write to the file, and only track virtual offsets,
888 // but the .s writer still needs this .zero and cannot simply use the .size
889 // to advance offsets.
890 Str << "\t.zero\t" << Size << "\n";
893 Str << "\t.size\t" << Name << ", " << Size << "\n";
896 std::unique_ptr<TargetHeaderLowering>
897 TargetHeaderLowering::createLowering(GlobalContext *Ctx) {
898 TargetArch Target = getFlags().getTargetArch();
901 badTargetFatalError(Target);
902 #define SUBZERO_TARGET(X) \
903 case TARGET_LOWERING_CLASS_FOR(X): \
904 return ::X::createTargetHeaderLowering(Ctx);
905 #include "SZTargets.def"
906 #undef SUBZERO_TARGET
910 TargetHeaderLowering::~TargetHeaderLowering() = default;
912 } // end of namespace Ice