1 //===- subzero/src/IceTargetLowering.h - Lowering interface -----*- C++ -*-===//
3 // The Subzero Code Generator
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief Declares the TargetLowering, LoweringContext, and TargetDataLowering
14 /// TargetLowering is an abstract class used to drive the translation/lowering
15 /// process. LoweringContext maintains a context for lowering each instruction,
16 /// offering conveniences such as iterating over non-deleted instructions.
17 /// TargetDataLowering is an abstract class used to drive the lowering/emission
18 /// of global initializers, external global declarations, and internal constant
21 //===----------------------------------------------------------------------===//
23 #ifndef SUBZERO_SRC_ICETARGETLOWERING_H
24 #define SUBZERO_SRC_ICETARGETLOWERING_H
26 #include "IceCfgNode.h"
28 #include "IceInst.h" // for the names of the Inst subtypes
29 #include "IceOperand.h"
36 // UnimplementedError is defined as a macro so that we can get actual line
38 #define UnimplementedError(Flags) \
40 if (!static_cast<const ClFlags &>(Flags).getSkipUnimplemented()) { \
41 /* Use llvm_unreachable instead of report_fatal_error, which gives \
42 better stack traces. */ \
43 llvm_unreachable("Not yet implemented"); \
48 // UnimplementedLoweringError is similar in style to UnimplementedError. Given
49 // a TargetLowering object pointer and an Inst pointer, it adds appropriate
50 // FakeDef and FakeUse instructions to try maintain liveness consistency.
51 #define UnimplementedLoweringError(Target, Instr) \
53 if ((Target)->Ctx->getFlags().getSkipUnimplemented()) { \
54 (Target)->addFakeDefUses(Instr); \
56 /* Use llvm_unreachable instead of report_fatal_error, which gives \
57 better stack traces. */ \
59 ("Not yet implemented: " + Instr->getInstName()).c_str()); \
64 /// LoweringContext makes it easy to iterate through non-deleted instructions in
65 /// a node, and insert new (lowered) instructions at the current point. Along
66 /// with the instruction list container and associated iterators, it holds the
67 /// current node, which is needed when inserting new instructions in order to
68 /// track whether variables are used as single-block or multi-block.
69 class LoweringContext {
70 LoweringContext(const LoweringContext &) = delete;
71 LoweringContext &operator=(const LoweringContext &) = delete;
74 LoweringContext() = default;
75 ~LoweringContext() = default;
76 void init(CfgNode *Node);
77 Inst *getNextInst() const {
82 Inst *getNextInst(InstList::iterator &Iter) const {
88 CfgNode *getNode() const { return Node; }
89 bool atEnd() const { return Cur == End; }
90 InstList::iterator getCur() const { return Cur; }
91 InstList::iterator getNext() const { return Next; }
92 InstList::iterator getEnd() const { return End; }
93 void insert(Inst *Inst);
94 template <typename Inst, typename... Args> Inst *insert(Args &&... A) {
95 auto *New = Inst::create(Node->getCfg(), std::forward<Args>(A)...);
99 Inst *getLastInserted() const;
100 void advanceCur() { Cur = Next; }
101 void advanceNext() { advanceForward(Next); }
102 void setCur(InstList::iterator C) { Cur = C; }
103 void setNext(InstList::iterator N) { Next = N; }
105 void setInsertPoint(const InstList::iterator &Position) { Next = Position; }
106 void availabilityReset();
107 void availabilityUpdate();
108 Variable *availabilityGet(Operand *Src) const;
111 /// Node is the argument to Inst::updateVars().
112 CfgNode *Node = nullptr;
113 Inst *LastInserted = nullptr;
114 /// Cur points to the current instruction being considered. It is guaranteed
115 /// to point to a non-deleted instruction, or to be End.
116 InstList::iterator Cur;
117 /// Next doubles as a pointer to the next valid instruction (if any), and the
118 /// new-instruction insertion point. It is also updated for the caller in case
119 /// the lowering consumes more than one high-level instruction. It is
120 /// guaranteed to point to a non-deleted instruction after Cur, or to be End.
121 // TODO: Consider separating the notion of "next valid instruction" and "new
122 // instruction insertion point", to avoid confusion when previously-deleted
123 // instructions come between the two points.
124 InstList::iterator Next;
125 /// Begin is a copy of Insts.begin(), used if iterators are moved backward.
126 InstList::iterator Begin;
127 /// End is a copy of Insts.end(), used if Next needs to be advanced.
128 InstList::iterator End;
129 /// LastDest and LastSrc capture the parameters of the last "Dest=Src" simple
130 /// assignment inserted (provided Src is a variable). This is used for simple
131 /// availability analysis.
132 Variable *LastDest = nullptr;
133 Variable *LastSrc = nullptr;
135 void skipDeleted(InstList::iterator &I) const;
136 void advanceForward(InstList::iterator &I) const;
139 /// A helper class to advance the LoweringContext at each loop iteration.
140 class PostIncrLoweringContext {
141 PostIncrLoweringContext() = delete;
142 PostIncrLoweringContext(const PostIncrLoweringContext &) = delete;
143 PostIncrLoweringContext &operator=(const PostIncrLoweringContext &) = delete;
146 explicit PostIncrLoweringContext(LoweringContext &Context)
147 : Context(Context) {}
148 ~PostIncrLoweringContext() {
149 Context.advanceCur();
150 Context.advanceNext();
154 LoweringContext &Context;
157 /// TargetLowering is the base class for all backends in Subzero. In addition to
158 /// implementing the abstract methods in this class, each concrete target must
159 /// also implement a named constructor in its own namespace. For instance, for
162 /// namespace X8632 {
163 /// void createTargetLowering(Cfg *Func);
165 class TargetLowering {
166 TargetLowering() = delete;
167 TargetLowering(const TargetLowering &) = delete;
168 TargetLowering &operator=(const TargetLowering &) = delete;
171 static void staticInit(GlobalContext *Ctx);
172 // Each target must define a public static method:
173 // static void staticInit(GlobalContext *Ctx);
175 static std::unique_ptr<TargetLowering> createLowering(TargetArch Target,
178 virtual std::unique_ptr<Assembler> createAssembler() const = 0;
181 switch (Ctx->getFlags().getOptLevel()) {
196 virtual void translateOm1() {
197 Func->setError("Target doesn't specify Om1 lowering steps.");
199 virtual void translateO0() {
200 Func->setError("Target doesn't specify O0 lowering steps.");
202 virtual void translateO1() {
203 Func->setError("Target doesn't specify O1 lowering steps.");
205 virtual void translateO2() {
206 Func->setError("Target doesn't specify O2 lowering steps.");
209 /// Generates calls to intrinsics for operations the Target can't handle.
210 void genTargetHelperCalls();
211 /// Tries to do address mode optimization on a single instruction.
213 /// Randomly insert NOPs.
214 void doNopInsertion(RandomNumberGenerator &RNG);
215 /// Lowers a single non-Phi instruction.
217 /// Inserts and lowers a single high-level instruction at a specific insertion
219 void lowerInst(CfgNode *Node, InstList::iterator Next, InstHighLevel *Instr);
220 /// Does preliminary lowering of the set of Phi instructions in the current
221 /// node. The main intention is to do what's needed to keep the unlowered Phi
222 /// instructions consistent with the lowered non-Phi instructions, e.g. to
223 /// lower 64-bit operands on a 32-bit target.
224 virtual void prelowerPhis() {}
225 /// Tries to do branch optimization on a single instruction. Returns true if
226 /// some optimization was done.
227 virtual bool doBranchOpt(Inst * /*I*/, const CfgNode * /*NextNode*/) {
231 virtual SizeT getNumRegisters() const = 0;
232 /// Returns a variable pre-colored to the specified physical register. This is
233 /// generally used to get very direct access to the register such as in the
234 /// prolog or epilog or for marking scratch registers as killed by a call. If
235 /// a Type is not provided, a target-specific default type is used.
236 virtual Variable *getPhysicalRegister(SizeT RegNum,
237 Type Ty = IceType_void) = 0;
238 /// Returns a printable name for the register.
239 virtual IceString getRegName(SizeT RegNum, Type Ty) const = 0;
241 virtual bool hasFramePointer() const { return false; }
242 virtual void setHasFramePointer() = 0;
243 virtual SizeT getStackReg() const = 0;
244 virtual SizeT getFrameReg() const = 0;
245 virtual SizeT getFrameOrStackReg() const = 0;
246 virtual size_t typeWidthInBytesOnStack(Type Ty) const = 0;
247 virtual uint32_t getStackAlignment() const = 0;
248 virtual void reserveFixedAllocaArea(size_t Size, size_t Align) = 0;
249 virtual int32_t getFrameFixedAllocaOffset() const = 0;
250 virtual uint32_t maxOutArgsSizeBytes() const { return 0; }
252 /// Return whether a 64-bit Variable should be split into a Variable64On32.
253 virtual bool shouldSplitToVariable64On32(Type Ty) const = 0;
255 bool hasComputedFrame() const { return HasComputedFrame; }
256 /// Returns true if this function calls a function that has the "returns
257 /// twice" attribute.
258 bool callsReturnsTwice() const { return CallsReturnsTwice; }
259 void setCallsReturnsTwice(bool RetTwice) { CallsReturnsTwice = RetTwice; }
260 SizeT makeNextLabelNumber() { return NextLabelNumber++; }
261 SizeT makeNextJumpTableNumber() { return NextJumpTableNumber++; }
262 LoweringContext &getContext() { return Context; }
263 Cfg *getFunc() const { return Func; }
264 GlobalContext *getGlobalContext() const { return Ctx; }
268 RegSet_CallerSave = 1 << 0,
269 RegSet_CalleeSave = 1 << 1,
270 RegSet_StackPointer = 1 << 2,
271 RegSet_FramePointer = 1 << 3,
272 RegSet_All = ~RegSet_None
274 using RegSetMask = uint32_t;
276 virtual llvm::SmallBitVector getRegisterSet(RegSetMask Include,
277 RegSetMask Exclude) const = 0;
278 /// Get the set of physical registers available for the specified Variable's
279 /// register class, applying register restrictions from the command line.
280 virtual const llvm::SmallBitVector &
281 getRegistersForVariable(const Variable *Var) const = 0;
282 /// Get the set of *all* physical registers available for the specified
283 /// Variable's register class, *not* applying register restrictions from the
285 virtual const llvm::SmallBitVector &
286 getAllRegistersForVariable(const Variable *Var) const = 0;
287 virtual const llvm::SmallBitVector &getAliasesForRegister(SizeT) const = 0;
289 void regAlloc(RegAllocKind Kind);
292 makeRandomRegisterPermutation(llvm::SmallVectorImpl<int32_t> &Permutation,
293 const llvm::SmallBitVector &ExcludeRegisters,
294 uint64_t Salt) const = 0;
296 /// Get the minimum number of clusters required for a jump table to be
298 virtual SizeT getMinJumpTableSize() const = 0;
299 virtual void emitJumpTable(const Cfg *Func,
300 const InstJumpTable *JumpTable) const = 0;
302 virtual void emitVariable(const Variable *Var) const = 0;
304 void emitWithoutPrefix(const ConstantRelocatable *CR,
305 const char *Suffix = "") const;
307 virtual void emit(const ConstantInteger32 *C) const = 0;
308 virtual void emit(const ConstantInteger64 *C) const = 0;
309 virtual void emit(const ConstantFloat *C) const = 0;
310 virtual void emit(const ConstantDouble *C) const = 0;
311 virtual void emit(const ConstantUndef *C) const = 0;
312 virtual void emit(const ConstantRelocatable *CR) const = 0;
314 /// Performs target-specific argument lowering.
315 virtual void lowerArguments() = 0;
317 virtual void initNodeForLowering(CfgNode *) {}
318 virtual void addProlog(CfgNode *Node) = 0;
319 virtual void addEpilog(CfgNode *Node) = 0;
321 virtual ~TargetLowering() = default;
324 // This control variable is used by AutoBundle (RAII-style bundle
325 // locking/unlocking) to prevent nested bundles.
326 bool AutoBundling = false;
328 // _bundle_lock(), and _bundle_unlock(), were made private to force subtargets
329 // to use the AutoBundle helper.
331 _bundle_lock(InstBundleLock::Option BundleOption = InstBundleLock::Opt_None) {
332 Context.insert<InstBundleLock>(BundleOption);
334 void _bundle_unlock() { Context.insert<InstBundleUnlock>(); }
337 /// AutoBundle provides RIAA-style bundling. Sub-targets are expected to use
338 /// it when emitting NaCl Bundles to ensure proper bundle_unlocking, and
339 /// prevent nested bundles.
341 /// AutoBundle objects will emit a _bundle_lock during construction (but only
342 /// if sandboxed code generation was requested), and a bundle_unlock() during
343 /// destruction. By carefully scoping objects of this type, Subtargets can
344 /// ensure proper bundle emission.
346 AutoBundle() = delete;
347 AutoBundle(const AutoBundle &) = delete;
348 AutoBundle &operator=(const AutoBundle &) = delete;
351 explicit AutoBundle(TargetLowering *Target, InstBundleLock::Option Option =
352 InstBundleLock::Opt_None);
356 TargetLowering *const Target;
357 const bool NeedSandboxing;
360 explicit TargetLowering(Cfg *Func);
361 // Applies command line filters to TypeToRegisterSet array.
363 filterTypeToRegisterSet(GlobalContext *Ctx, int32_t NumRegs,
364 llvm::SmallBitVector TypeToRegisterSet[],
365 size_t TypeToRegisterSetSize,
366 std::function<IceString(int32_t)> getRegName,
367 std::function<IceString(RegClass)> getRegClassName);
368 virtual void lowerAlloca(const InstAlloca *Inst) = 0;
369 virtual void lowerArithmetic(const InstArithmetic *Inst) = 0;
370 virtual void lowerAssign(const InstAssign *Inst) = 0;
371 virtual void lowerBr(const InstBr *Inst) = 0;
372 virtual void lowerCall(const InstCall *Inst) = 0;
373 virtual void lowerCast(const InstCast *Inst) = 0;
374 virtual void lowerFcmp(const InstFcmp *Inst) = 0;
375 virtual void lowerExtractElement(const InstExtractElement *Inst) = 0;
376 virtual void lowerIcmp(const InstIcmp *Inst) = 0;
377 virtual void lowerInsertElement(const InstInsertElement *Inst) = 0;
378 virtual void lowerIntrinsicCall(const InstIntrinsicCall *Inst) = 0;
379 virtual void lowerLoad(const InstLoad *Inst) = 0;
380 virtual void lowerPhi(const InstPhi *Inst) = 0;
381 virtual void lowerRet(const InstRet *Inst) = 0;
382 virtual void lowerSelect(const InstSelect *Inst) = 0;
383 virtual void lowerStore(const InstStore *Inst) = 0;
384 virtual void lowerSwitch(const InstSwitch *Inst) = 0;
385 virtual void lowerUnreachable(const InstUnreachable *Inst) = 0;
386 virtual void lowerOther(const Inst *Instr);
388 virtual void genTargetHelperCallFor(Inst *Instr) = 0;
389 virtual uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) = 0;
391 virtual void doAddressOptLoad() {}
392 virtual void doAddressOptStore() {}
393 virtual void doMockBoundsCheck(Operand *) {}
394 virtual void randomlyInsertNop(float Probability,
395 RandomNumberGenerator &RNG) = 0;
396 /// This gives the target an opportunity to post-process the lowered expansion
397 /// before returning.
398 virtual void postLower() {}
400 /// When the SkipUnimplemented flag is set, addFakeDefUses() gets invoked by
401 /// the UnimplementedLoweringError macro to insert fake uses of all the
402 /// instruction variables and a fake def of the instruction dest, in order to
403 /// preserve integrity of liveness analysis.
404 void addFakeDefUses(const Inst *Instr);
406 /// Find (non-SSA) instructions where the Dest variable appears in some source
407 /// operand, and set the IsDestRedefined flag. This keeps liveness analysis
409 void markRedefinitions();
411 /// Make a pass over the Cfg to determine which variables need stack slots and
412 /// place them in a sorted list (SortedSpilledVariables). Among those, vars,
413 /// classify the spill variables as local to the basic block vs global
414 /// (multi-block) in order to compute the parameters GlobalsSize and
415 /// SpillAreaSizeBytes (represents locals or general vars if the coalescing of
416 /// locals is disallowed) along with alignments required for variables in each
417 /// area. We rely on accurate VMetadata in order to classify a variable as
418 /// global vs local (otherwise the variable is conservatively global). The
419 /// in-args should be initialized to 0.
421 /// This is only a pre-pass and the actual stack slot assignment is handled
424 /// There may be target-specific Variable types, which will be handled by
425 /// TargetVarHook. If the TargetVarHook returns true, then the variable is
426 /// skipped and not considered with the rest of the spilled variables.
427 void getVarStackSlotParams(VarList &SortedSpilledVariables,
428 llvm::SmallBitVector &RegsUsed,
429 size_t *GlobalsSize, size_t *SpillAreaSizeBytes,
430 uint32_t *SpillAreaAlignmentBytes,
431 uint32_t *LocalsSlotsAlignmentBytes,
432 std::function<bool(Variable *)> TargetVarHook);
434 /// Calculate the amount of padding needed to align the local and global areas
435 /// to the required alignment. This assumes the globals/locals layout used by
436 /// getVarStackSlotParams and assignVarStackSlots.
437 void alignStackSpillAreas(uint32_t SpillAreaStartOffset,
438 uint32_t SpillAreaAlignmentBytes,
440 uint32_t LocalsSlotsAlignmentBytes,
441 uint32_t *SpillAreaPaddingBytes,
442 uint32_t *LocalsSlotsPaddingBytes);
444 /// Make a pass through the SortedSpilledVariables and actually assign stack
445 /// slots. SpillAreaPaddingBytes takes into account stack alignment padding.
446 /// The SpillArea starts after that amount of padding. This matches the scheme
447 /// in getVarStackSlotParams, where there may be a separate multi-block global
448 /// var spill area and a local var spill area.
449 void assignVarStackSlots(VarList &SortedSpilledVariables,
450 size_t SpillAreaPaddingBytes,
451 size_t SpillAreaSizeBytes,
452 size_t GlobalsAndSubsequentPaddingSize,
453 bool UsesFramePointer);
455 /// Sort the variables in Source based on required alignment. The variables
456 /// with the largest alignment need are placed in the front of the Dest list.
457 void sortVarsByAlignment(VarList &Dest, const VarList &Source) const;
459 /// Make a call to an external helper function.
460 InstCall *makeHelperCall(const IceString &Name, Variable *Dest,
463 void _set_dest_redefined() { Context.getLastInserted()->setDestRedefined(); }
465 bool shouldOptimizeMemIntrins();
467 /// SandboxType enumerates all possible sandboxing strategies that
474 static SandboxType determineSandboxTypeFromFlags(const ClFlags &Flags);
478 bool HasComputedFrame = false;
479 bool CallsReturnsTwice = false;
480 SizeT NextLabelNumber = 0;
481 SizeT NextJumpTableNumber = 0;
482 LoweringContext Context;
483 const SandboxType SandboxingType = ST_None;
485 // Runtime helper function names
486 const static constexpr char *H_bitcast_16xi1_i16 = "__Sz_bitcast_16xi1_i16";
487 const static constexpr char *H_bitcast_8xi1_i8 = "__Sz_bitcast_8xi1_i8";
488 const static constexpr char *H_bitcast_i16_16xi1 = "__Sz_bitcast_i16_16xi1";
489 const static constexpr char *H_bitcast_i8_8xi1 = "__Sz_bitcast_i8_8xi1";
490 const static constexpr char *H_call_ctpop_i32 = "__popcountsi2";
491 const static constexpr char *H_call_ctpop_i64 = "__popcountdi2";
492 const static constexpr char *H_call_longjmp = "longjmp";
493 const static constexpr char *H_call_memcpy = "memcpy";
494 const static constexpr char *H_call_memmove = "memmove";
495 const static constexpr char *H_call_memset = "memset";
496 const static constexpr char *H_call_read_tp = "__nacl_read_tp";
497 const static constexpr char *H_call_setjmp = "setjmp";
498 const static constexpr char *H_fptosi_f32_i64 = "__Sz_fptosi_f32_i64";
499 const static constexpr char *H_fptosi_f64_i64 = "__Sz_fptosi_f64_i64";
500 const static constexpr char *H_fptoui_4xi32_f32 = "__Sz_fptoui_4xi32_f32";
501 const static constexpr char *H_fptoui_f32_i32 = "__Sz_fptoui_f32_i32";
502 const static constexpr char *H_fptoui_f32_i64 = "__Sz_fptoui_f32_i64";
503 const static constexpr char *H_fptoui_f64_i32 = "__Sz_fptoui_f64_i32";
504 const static constexpr char *H_fptoui_f64_i64 = "__Sz_fptoui_f64_i64";
505 const static constexpr char *H_frem_f32 = "fmodf";
506 const static constexpr char *H_frem_f64 = "fmod";
507 const static constexpr char *H_getIP_prefix = "__Sz_getIP_";
508 const static constexpr char *H_sdiv_i32 = "__divsi3";
509 const static constexpr char *H_sdiv_i64 = "__divdi3";
510 const static constexpr char *H_sitofp_i64_f32 = "__Sz_sitofp_i64_f32";
511 const static constexpr char *H_sitofp_i64_f64 = "__Sz_sitofp_i64_f64";
512 const static constexpr char *H_srem_i32 = "__modsi3";
513 const static constexpr char *H_srem_i64 = "__moddi3";
514 const static constexpr char *H_udiv_i32 = "__udivsi3";
515 const static constexpr char *H_udiv_i64 = "__udivdi3";
516 const static constexpr char *H_uitofp_4xi32_4xf32 = "__Sz_uitofp_4xi32_4xf32";
517 const static constexpr char *H_uitofp_i32_f32 = "__Sz_uitofp_i32_f32";
518 const static constexpr char *H_uitofp_i32_f64 = "__Sz_uitofp_i32_f64";
519 const static constexpr char *H_uitofp_i64_f32 = "__Sz_uitofp_i64_f32";
520 const static constexpr char *H_uitofp_i64_f64 = "__Sz_uitofp_i64_f64";
521 const static constexpr char *H_urem_i32 = "__umodsi3";
522 const static constexpr char *H_urem_i64 = "__umoddi3";
525 /// TargetDataLowering is used for "lowering" data including initializers for
526 /// global variables, and the internal constant pools. It is separated out from
527 /// TargetLowering because it does not require a Cfg.
528 class TargetDataLowering {
529 TargetDataLowering() = delete;
530 TargetDataLowering(const TargetDataLowering &) = delete;
531 TargetDataLowering &operator=(const TargetDataLowering &) = delete;
534 static std::unique_ptr<TargetDataLowering> createLowering(GlobalContext *Ctx);
535 virtual ~TargetDataLowering();
537 virtual void lowerGlobals(const VariableDeclarationList &Vars,
538 const IceString &SectionSuffix) = 0;
539 virtual void lowerConstants() = 0;
540 virtual void lowerJumpTables() = 0;
543 void emitGlobal(const VariableDeclaration &Var,
544 const IceString &SectionSuffix);
546 /// For now, we assume .long is the right directive for emitting 4 byte emit
547 /// global relocations. However, LLVM MIPS usually uses .4byte instead.
548 /// Perhaps there is some difference when the location is unaligned.
549 static const char *getEmit32Directive() { return ".long"; }
551 explicit TargetDataLowering(GlobalContext *Ctx) : Ctx(Ctx) {}
555 /// TargetHeaderLowering is used to "lower" the header of an output file. It
556 /// writes out the target-specific header attributes. E.g., for ARM this writes
557 /// out the build attributes (float ABI, etc.).
558 class TargetHeaderLowering {
559 TargetHeaderLowering() = delete;
560 TargetHeaderLowering(const TargetHeaderLowering &) = delete;
561 TargetHeaderLowering &operator=(const TargetHeaderLowering &) = delete;
564 static std::unique_ptr<TargetHeaderLowering>
565 createLowering(GlobalContext *Ctx);
566 virtual ~TargetHeaderLowering();
568 virtual void lower() {}
571 explicit TargetHeaderLowering(GlobalContext *Ctx) : Ctx(Ctx) {}
575 } // end of namespace Ice
577 #endif // SUBZERO_SRC_ICETARGETLOWERING_H