1 //===- subzero/src/IceTargetLowering.cpp - Basic lowering implementation --===//
3 // The Subzero Code Generator
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// This file implements the skeleton of the TargetLowering class, specifically
12 /// invoking the appropriate lowering method for a given instruction kind and
13 /// driving global register allocation. It also implements the non-deleted
14 /// instruction iteration in LoweringContext.
16 //===----------------------------------------------------------------------===//
18 #include "IceTargetLowering.h"
20 #include "IceAssemblerARM32.h"
21 #include "IceAssemblerMIPS32.h"
22 #include "IceAssemblerX8632.h"
23 #include "IceAssemblerX8664.h"
24 #include "IceCfg.h" // setError()
25 #include "IceCfgNode.h"
26 #include "IceGlobalInits.h"
27 #include "IceInstVarIter.h"
28 #include "IceOperand.h"
29 #include "IceRegAlloc.h"
30 #include "IceTargetLoweringARM32.h"
31 #include "IceTargetLoweringMIPS32.h"
32 #include "IceTargetLoweringX8632.h"
33 #include "IceTargetLoweringX8664.h"
37 void LoweringContext::init(CfgNode *N) {
39 End = getNode()->getInsts().end();
44 void LoweringContext::rewind() {
45 Begin = getNode()->getInsts().begin();
52 void LoweringContext::insert(Inst *Inst) {
53 getNode()->getInsts().insert(Next, Inst);
57 void LoweringContext::skipDeleted(InstList::iterator &I) const {
58 while (I != End && I->isDeleted())
62 void LoweringContext::advanceForward(InstList::iterator &I) const {
69 Inst *LoweringContext::getLastInserted() const {
74 void LoweringContext::availabilityReset() {
79 void LoweringContext::availabilityUpdate() {
81 Inst *Instr = LastInserted;
84 if (!Instr->isVarAssign())
86 // Since isVarAssign() is true, the source operand must be a Variable.
87 LastDest = Instr->getDest();
88 LastSrc = llvm::cast<Variable>(Instr->getSrc(0));
91 Variable *LoweringContext::availabilityGet(Operand *Src) const {
98 TargetLowering *TargetLowering::createLowering(TargetArch Target, Cfg *Func) {
99 #define SUBZERO_TARGET(X) \
100 if (Target == Target_##X) \
101 return Target##X::create(Func);
102 #include "llvm/Config/SZTargets.def"
104 Func->setError("Unsupported target");
108 TargetLowering::TargetLowering(Cfg *Func)
109 : Func(Func), Ctx(Func->getContext()), Context() {}
111 std::unique_ptr<Assembler> TargetLowering::createAssembler(TargetArch Target,
113 #define SUBZERO_TARGET(X) \
114 if (Target == Target_##X) \
115 return std::unique_ptr<Assembler>(new X::Assembler##X());
116 #include "llvm/Config/SZTargets.def"
118 Func->setError("Unsupported target assembler");
122 void TargetLowering::doAddressOpt() {
123 if (llvm::isa<InstLoad>(*Context.getCur()))
125 else if (llvm::isa<InstStore>(*Context.getCur()))
127 Context.advanceCur();
128 Context.advanceNext();
131 void TargetLowering::doNopInsertion(RandomNumberGenerator &RNG) {
132 Inst *I = Context.getCur();
133 bool ShouldSkip = llvm::isa<InstFakeUse>(I) || llvm::isa<InstFakeDef>(I) ||
134 llvm::isa<InstFakeKill>(I) || I->isRedundantAssign() ||
137 int Probability = Ctx->getFlags().getNopProbabilityAsPercentage();
138 for (int I = 0; I < Ctx->getFlags().getMaxNopsPerInstruction(); ++I) {
139 randomlyInsertNop(Probability / 100.0, RNG);
144 // Lowers a single instruction according to the information in Context, by
145 // checking the Context.Cur instruction kind and calling the appropriate
146 // lowering method. The lowering method should insert target instructions at
147 // the Cur.Next insertion point, and should not delete the Context.Cur
148 // instruction or advance Context.Cur.
150 // The lowering method may look ahead in the instruction stream as desired, and
151 // lower additional instructions in conjunction with the current one, for
152 // example fusing a compare and branch. If it does, it should advance
153 // Context.Cur to point to the next non-deleted instruction to process, and it
154 // should delete any additional instructions it consumes.
155 void TargetLowering::lower() {
156 assert(!Context.atEnd());
157 Inst *Inst = Context.getCur();
158 Inst->deleteIfDead();
159 if (!Inst->isDeleted() && !llvm::isa<InstFakeDef>(Inst) &&
160 !llvm::isa<InstFakeUse>(Inst)) {
161 // Mark the current instruction as deleted before lowering, otherwise the
162 // Dest variable will likely get marked as non-SSA. See
163 // Variable::setDefinition(). However, just pass-through FakeDef and
164 // FakeUse instructions that might have been inserted prior to lowering.
166 switch (Inst->getKind()) {
168 lowerAlloca(llvm::cast<InstAlloca>(Inst));
170 case Inst::Arithmetic:
171 lowerArithmetic(llvm::cast<InstArithmetic>(Inst));
174 lowerAssign(llvm::cast<InstAssign>(Inst));
177 lowerBr(llvm::cast<InstBr>(Inst));
180 lowerCall(llvm::cast<InstCall>(Inst));
183 lowerCast(llvm::cast<InstCast>(Inst));
185 case Inst::ExtractElement:
186 lowerExtractElement(llvm::cast<InstExtractElement>(Inst));
189 lowerFcmp(llvm::cast<InstFcmp>(Inst));
192 lowerIcmp(llvm::cast<InstIcmp>(Inst));
194 case Inst::InsertElement:
195 lowerInsertElement(llvm::cast<InstInsertElement>(Inst));
197 case Inst::IntrinsicCall: {
198 InstIntrinsicCall *Call = llvm::cast<InstIntrinsicCall>(Inst);
199 if (Call->getIntrinsicInfo().ReturnsTwice)
200 setCallsReturnsTwice(true);
201 lowerIntrinsicCall(Call);
205 lowerLoad(llvm::cast<InstLoad>(Inst));
208 lowerPhi(llvm::cast<InstPhi>(Inst));
211 lowerRet(llvm::cast<InstRet>(Inst));
214 lowerSelect(llvm::cast<InstSelect>(Inst));
217 lowerStore(llvm::cast<InstStore>(Inst));
220 lowerSwitch(llvm::cast<InstSwitch>(Inst));
222 case Inst::Unreachable:
223 lowerUnreachable(llvm::cast<InstUnreachable>(Inst));
233 Context.advanceCur();
234 Context.advanceNext();
237 void TargetLowering::lowerInst(CfgNode *Node, InstList::iterator Next,
238 InstHighLevel *Instr) {
239 // TODO(stichnot): Consider modifying the design/implementation to avoid
240 // multiple init() calls when using lowerInst() to lower several instructions
243 Context.setNext(Next);
244 Context.insert(Instr);
246 assert(&*Next == Instr);
247 Context.setCur(Next);
251 void TargetLowering::lowerOther(const Inst *Instr) {
253 Func->setError("Can't lower unsupported instruction type");
256 // Drives register allocation, allowing all physical registers (except perhaps
257 // for the frame pointer) to be allocated. This set of registers could
258 // potentially be parameterized if we want to restrict registers e.g. for
259 // performance testing.
260 void TargetLowering::regAlloc(RegAllocKind Kind) {
261 TimerMarker T(TimerStack::TT_regAlloc, Func);
262 LinearScan LinearScan(Func);
263 RegSetMask RegInclude = RegSet_None;
264 RegSetMask RegExclude = RegSet_None;
265 RegInclude |= RegSet_CallerSave;
266 RegInclude |= RegSet_CalleeSave;
267 if (hasFramePointer())
268 RegExclude |= RegSet_FramePointer;
269 llvm::SmallBitVector RegMask = getRegisterSet(RegInclude, RegExclude);
270 bool Repeat = (Kind == RAK_Global && Ctx->getFlags().shouldRepeatRegAlloc());
272 LinearScan.init(Kind);
273 LinearScan.scan(RegMask, Ctx->getFlags().shouldRandomizeRegAlloc());
274 if (!LinearScan.hasEvictions())
276 Kind = RAK_SecondChance;
280 void TargetLowering::markRedefinitions() {
281 // Find (non-SSA) instructions where the Dest variable appears in some source
282 // operand, and set the IsDestRedefined flag to keep liveness analysis
284 for (auto Inst = Context.getCur(), E = Context.getNext(); Inst != E; ++Inst) {
285 if (Inst->isDeleted())
287 Variable *Dest = Inst->getDest();
290 FOREACH_VAR_IN_INST(Var, *Inst) {
292 Inst->setDestRedefined();
299 void TargetLowering::sortVarsByAlignment(VarList &Dest,
300 const VarList &Source) const {
302 // Instead of std::sort, we could do a bucket sort with log2(alignment) as
303 // the buckets, if performance is an issue.
304 std::sort(Dest.begin(), Dest.end(),
305 [this](const Variable *V1, const Variable *V2) {
306 return typeWidthInBytesOnStack(V1->getType()) >
307 typeWidthInBytesOnStack(V2->getType());
311 void TargetLowering::getVarStackSlotParams(
312 VarList &SortedSpilledVariables, llvm::SmallBitVector &RegsUsed,
313 size_t *GlobalsSize, size_t *SpillAreaSizeBytes,
314 uint32_t *SpillAreaAlignmentBytes, uint32_t *LocalsSlotsAlignmentBytes,
315 std::function<bool(Variable *)> TargetVarHook) {
316 const VariablesMetadata *VMetadata = Func->getVMetadata();
317 llvm::BitVector IsVarReferenced(Func->getNumVariables());
318 for (CfgNode *Node : Func->getNodes()) {
319 for (Inst &Inst : Node->getInsts()) {
320 if (Inst.isDeleted())
322 if (const Variable *Var = Inst.getDest())
323 IsVarReferenced[Var->getIndex()] = true;
324 FOREACH_VAR_IN_INST(Var, Inst) {
325 IsVarReferenced[Var->getIndex()] = true;
330 // If SimpleCoalescing is false, each variable without a register gets its
331 // own unique stack slot, which leads to large stack frames. If
332 // SimpleCoalescing is true, then each "global" variable without a register
333 // gets its own slot, but "local" variable slots are reused across basic
334 // blocks. E.g., if A and B are local to block 1 and C is local to block 2,
335 // then C may share a slot with A or B.
337 // We cannot coalesce stack slots if this function calls a "returns twice"
338 // function. In that case, basic blocks may be revisited, and variables local
339 // to those basic blocks are actually live until after the called function
340 // returns a second time.
341 const bool SimpleCoalescing = !callsReturnsTwice();
343 std::vector<size_t> LocalsSize(Func->getNumNodes());
344 const VarList &Variables = Func->getVariables();
345 VarList SpilledVariables;
346 for (Variable *Var : Variables) {
348 RegsUsed[Var->getRegNum()] = true;
351 // An argument either does not need a stack slot (if passed in a register)
352 // or already has one (if passed on the stack).
355 // An unreferenced variable doesn't need a stack slot.
356 if (!IsVarReferenced[Var->getIndex()])
358 // Check a target-specific variable (it may end up sharing stack slots) and
359 // not need accounting here.
360 if (TargetVarHook(Var))
362 SpilledVariables.push_back(Var);
365 SortedSpilledVariables.reserve(SpilledVariables.size());
366 sortVarsByAlignment(SortedSpilledVariables, SpilledVariables);
368 for (Variable *Var : SortedSpilledVariables) {
369 size_t Increment = typeWidthInBytesOnStack(Var->getType());
370 // We have sorted by alignment, so the first variable we encounter that is
371 // located in each area determines the max alignment for the area.
372 if (!*SpillAreaAlignmentBytes)
373 *SpillAreaAlignmentBytes = Increment;
374 if (SimpleCoalescing && VMetadata->isTracked(Var)) {
375 if (VMetadata->isMultiBlock(Var)) {
376 *GlobalsSize += Increment;
378 SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex();
379 LocalsSize[NodeIndex] += Increment;
380 if (LocalsSize[NodeIndex] > *SpillAreaSizeBytes)
381 *SpillAreaSizeBytes = LocalsSize[NodeIndex];
382 if (!*LocalsSlotsAlignmentBytes)
383 *LocalsSlotsAlignmentBytes = Increment;
386 *SpillAreaSizeBytes += Increment;
389 // For testing legalization of large stack offsets on targets with limited
390 // offset bits in instruction encodings, add some padding.
391 *SpillAreaSizeBytes += Ctx->getFlags().getTestStackExtra();
394 void TargetLowering::alignStackSpillAreas(uint32_t SpillAreaStartOffset,
395 uint32_t SpillAreaAlignmentBytes,
397 uint32_t LocalsSlotsAlignmentBytes,
398 uint32_t *SpillAreaPaddingBytes,
399 uint32_t *LocalsSlotsPaddingBytes) {
400 if (SpillAreaAlignmentBytes) {
401 uint32_t PaddingStart = SpillAreaStartOffset;
402 uint32_t SpillAreaStart =
403 Utils::applyAlignment(PaddingStart, SpillAreaAlignmentBytes);
404 *SpillAreaPaddingBytes = SpillAreaStart - PaddingStart;
407 // If there are separate globals and locals areas, make sure the locals area
408 // is aligned by padding the end of the globals area.
409 if (LocalsSlotsAlignmentBytes) {
410 uint32_t GlobalsAndSubsequentPaddingSize = GlobalsSize;
411 GlobalsAndSubsequentPaddingSize =
412 Utils::applyAlignment(GlobalsSize, LocalsSlotsAlignmentBytes);
413 *LocalsSlotsPaddingBytes = GlobalsAndSubsequentPaddingSize - GlobalsSize;
417 void TargetLowering::assignVarStackSlots(VarList &SortedSpilledVariables,
418 size_t SpillAreaPaddingBytes,
419 size_t SpillAreaSizeBytes,
420 size_t GlobalsAndSubsequentPaddingSize,
421 bool UsesFramePointer) {
422 const VariablesMetadata *VMetadata = Func->getVMetadata();
423 // For testing legalization of large stack offsets on targets with limited
424 // offset bits in instruction encodings, add some padding. This assumes that
425 // SpillAreaSizeBytes has accounted for the extra test padding. When
426 // UseFramePointer is true, the offset depends on the padding, not just the
427 // SpillAreaSizeBytes. On the other hand, when UseFramePointer is false, the
428 // offsets depend on the gap between SpillAreaSizeBytes and
429 // SpillAreaPaddingBytes, so we don't increment that.
430 size_t TestPadding = Ctx->getFlags().getTestStackExtra();
431 if (UsesFramePointer)
432 SpillAreaPaddingBytes += TestPadding;
433 size_t GlobalsSpaceUsed = SpillAreaPaddingBytes;
434 size_t NextStackOffset = SpillAreaPaddingBytes;
435 std::vector<size_t> LocalsSize(Func->getNumNodes());
436 const bool SimpleCoalescing = !callsReturnsTwice();
438 for (Variable *Var : SortedSpilledVariables) {
439 size_t Increment = typeWidthInBytesOnStack(Var->getType());
440 if (SimpleCoalescing && VMetadata->isTracked(Var)) {
441 if (VMetadata->isMultiBlock(Var)) {
442 GlobalsSpaceUsed += Increment;
443 NextStackOffset = GlobalsSpaceUsed;
445 SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex();
446 LocalsSize[NodeIndex] += Increment;
447 NextStackOffset = SpillAreaPaddingBytes +
448 GlobalsAndSubsequentPaddingSize +
449 LocalsSize[NodeIndex];
452 NextStackOffset += Increment;
454 if (UsesFramePointer)
455 Var->setStackOffset(-NextStackOffset);
457 Var->setStackOffset(SpillAreaSizeBytes - NextStackOffset);
461 InstCall *TargetLowering::makeHelperCall(const IceString &Name, Variable *Dest,
463 constexpr bool HasTailCall = false;
464 Constant *CallTarget = Ctx->getConstantExternSym(Name);
466 InstCall::create(Func, MaxSrcs, Dest, CallTarget, HasTailCall);
470 bool TargetLowering::shouldOptimizeMemIntrins() {
471 return Ctx->getFlags().getOptLevel() >= Opt_1 ||
472 Ctx->getFlags().getForceMemIntrinOpt();
475 void TargetLowering::emitWithoutPrefix(const ConstantRelocatable *C) const {
476 if (!BuildDefs::dump())
478 Ostream &Str = Ctx->getStrEmit();
479 if (C->getSuppressMangling())
482 Str << Ctx->mangleName(C->getName());
483 RelocOffsetT Offset = C->getOffset();
491 void TargetLowering::emit(const ConstantRelocatable *C) const {
492 if (!BuildDefs::dump())
494 Ostream &Str = Ctx->getStrEmit();
495 Str << getConstantPrefix();
496 emitWithoutPrefix(C);
499 std::unique_ptr<TargetDataLowering>
500 TargetDataLowering::createLowering(GlobalContext *Ctx) {
501 TargetArch Target = Ctx->getFlags().getTargetArch();
502 #define SUBZERO_TARGET(X) \
503 if (Target == Target_##X) \
504 return TargetData##X::create(Ctx);
505 #include "llvm/Config/SZTargets.def"
507 llvm::report_fatal_error("Unsupported target data lowering");
510 TargetDataLowering::~TargetDataLowering() = default;
514 // dataSectionSuffix decides whether to use SectionSuffix or MangledVarName as
515 // data section suffix. Essentially, when using separate data sections for
516 // globals SectionSuffix is not necessary.
517 IceString dataSectionSuffix(const IceString &SectionSuffix,
518 const IceString &MangledVarName,
519 const bool DataSections) {
520 if (SectionSuffix.empty() && !DataSections) {
525 // With data sections we don't need to use the SectionSuffix.
526 return "." + MangledVarName;
529 assert(!SectionSuffix.empty());
530 return "." + SectionSuffix;
533 } // end of anonymous namespace
535 void TargetDataLowering::emitGlobal(const VariableDeclaration &Var,
536 const IceString &SectionSuffix) {
537 if (!BuildDefs::dump())
540 // If external and not initialized, this must be a cross test. Don't generate
541 // a declaration for such cases.
542 const bool IsExternal =
543 Var.isExternal() || Ctx->getFlags().getDisableInternal();
544 if (IsExternal && !Var.hasInitializer())
547 Ostream &Str = Ctx->getStrEmit();
548 const bool HasNonzeroInitializer = Var.hasNonzeroInitializer();
549 const bool IsConstant = Var.getIsConstant();
550 const SizeT Size = Var.getNumBytes();
551 const IceString MangledName = Var.mangleName(Ctx);
553 Str << "\t.type\t" << MangledName << ",%object\n";
555 const bool UseDataSections = Ctx->getFlags().getDataSections();
556 const IceString Suffix =
557 dataSectionSuffix(SectionSuffix, MangledName, UseDataSections);
559 Str << "\t.section\t.rodata" << Suffix << ",\"a\",%progbits\n";
560 else if (HasNonzeroInitializer)
561 Str << "\t.section\t.data" << Suffix << ",\"aw\",%progbits\n";
563 Str << "\t.section\t.bss" << Suffix << ",\"aw\",%nobits\n";
566 Str << "\t.globl\t" << MangledName << "\n";
568 const uint32_t Align = Var.getAlignment();
570 assert(llvm::isPowerOf2_32(Align));
571 // Use the .p2align directive, since the .align N directive can either
572 // interpret N as bytes, or power of 2 bytes, depending on the target.
573 Str << "\t.p2align\t" << llvm::Log2_32(Align) << "\n";
576 Str << MangledName << ":\n";
578 if (HasNonzeroInitializer) {
579 for (const std::unique_ptr<VariableDeclaration::Initializer> &Init :
580 Var.getInitializers()) {
581 switch (Init->getKind()) {
582 case VariableDeclaration::Initializer::DataInitializerKind: {
584 llvm::cast<VariableDeclaration::DataInitializer>(Init.get())
586 for (SizeT i = 0; i < Init->getNumBytes(); ++i) {
587 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
591 case VariableDeclaration::Initializer::ZeroInitializerKind:
592 Str << "\t.zero\t" << Init->getNumBytes() << "\n";
594 case VariableDeclaration::Initializer::RelocInitializerKind: {
596 llvm::cast<VariableDeclaration::RelocInitializer>(Init.get());
597 Str << "\t" << getEmit32Directive() << "\t";
598 Str << Reloc->getDeclaration()->mangleName(Ctx);
599 if (RelocOffsetT Offset = Reloc->getOffset()) {
600 if (Offset >= 0 || (Offset == INT32_MIN))
601 Str << " + " << Offset;
603 Str << " - " << -Offset;
611 // NOTE: for non-constant zero initializers, this is BSS (no bits), so an
612 // ELF writer would not write to the file, and only track virtual offsets,
613 // but the .s writer still needs this .zero and cannot simply use the .size
614 // to advance offsets.
615 Str << "\t.zero\t" << Size << "\n";
618 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
621 std::unique_ptr<TargetHeaderLowering>
622 TargetHeaderLowering::createLowering(GlobalContext *Ctx) {
623 TargetArch Target = Ctx->getFlags().getTargetArch();
624 #define SUBZERO_TARGET(X) \
625 if (Target == Target_##X) \
626 return TargetHeader##X::create(Ctx);
627 #include "llvm/Config/SZTargets.def"
629 llvm::report_fatal_error("Unsupported target header lowering");
632 TargetHeaderLowering::~TargetHeaderLowering() = default;
634 } // end of namespace Ice