third_party/subzero/src/IceTargetLowering.h

   1 //===- subzero/src/IceTargetLowering.h - Lowering interface -----*- C++ -*-===//
   2 //
   3 //                        The Subzero Code Generator
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 ///
  10 /// \file
  11 /// \brief Declares the TargetLowering, LoweringContext, and TargetDataLowering
  12 /// classes.
  13 ///
  14 /// TargetLowering is an abstract class used to drive the translation/lowering
  15 /// process. LoweringContext maintains a context for lowering each instruction,
  16 /// offering conveniences such as iterating over non-deleted instructions.
  17 /// TargetDataLowering is an abstract class used to drive the lowering/emission
  18 /// of global initializers, external global declarations, and internal constant
  19 /// pools.
  20 ///
  21 //===----------------------------------------------------------------------===//
  22
  23 #ifndef SUBZERO_SRC_ICETARGETLOWERING_H
  24 #define SUBZERO_SRC_ICETARGETLOWERING_H
  25
  26 #include "IceBitVector.h"
  27 #include "IceCfgNode.h"
  28 #include "IceDefs.h"
  29 #include "IceInst.h" // for the names of the Inst subtypes
  30 #include "IceOperand.h"
  31 #include "IceRegAlloc.h"
  32 #include "IceTypes.h"
  33
  34 #include <utility>
  35
  36 namespace Ice {
  37
  38 // UnimplementedError is defined as a macro so that we can get actual line
  39 // numbers.
  40 #define UnimplementedError(Flags)                                              \
  41   do {                                                                         \
  42     if (!static_cast<const ClFlags &>(Flags).getSkipUnimplemented()) {         \
  43       /* Use llvm_unreachable instead of report_fatal_error, which gives       \
  44          better stack traces. */                                               \
  45       llvm_unreachable("Not yet implemented");                                 \
  46       abort();                                                                 \
  47     }                                                                          \
  48   } while (0)
  49
  50 // UnimplementedLoweringError is similar in style to UnimplementedError.  Given
  51 // a TargetLowering object pointer and an Inst pointer, it adds appropriate
  52 // FakeDef and FakeUse instructions to try maintain liveness consistency.
  53 #define UnimplementedLoweringError(Target, Instr)                              \
  54   do {                                                                         \
  55     if (getFlags().getSkipUnimplemented()) {                                   \
  56       (Target)->addFakeDefUses(Instr);                                         \
  57     } else {                                                                   \
  58       /* Use llvm_unreachable instead of report_fatal_error, which gives       \
  59          better stack traces. */                                               \
  60       llvm_unreachable(                                                        \
  61           (std::string("Not yet implemented: ") + Instr->getInstName())        \
  62               .c_str());                                                       \
  63       abort();                                                                 \
  64     }                                                                          \
  65   } while (0)
  66
  67 /// LoweringContext makes it easy to iterate through non-deleted instructions in
  68 /// a node, and insert new (lowered) instructions at the current point. Along
  69 /// with the instruction list container and associated iterators, it holds the
  70 /// current node, which is needed when inserting new instructions in order to
  71 /// track whether variables are used as single-block or multi-block.
  72 class LoweringContext {
  73   LoweringContext(const LoweringContext &) = delete;
  74   LoweringContext &operator=(const LoweringContext &) = delete;
  75
  76 public:
  77   LoweringContext() = default;
  78   ~LoweringContext() = default;
  79   void init(CfgNode *Node);
  80   Inst *getNextInst() const {
  81     if (Next == End)
  82       return nullptr;
  83     return iteratorToInst(Next);
  84   }
  85   Inst *getNextInst(InstList::iterator &Iter) const {
  86     advanceForward(Iter);
  87     if (Iter == End)
  88       return nullptr;
  89     return iteratorToInst(Iter);
  90   }
  91   CfgNode *getNode() const { return Node; }
  92   bool atEnd() const { return Cur == End; }
  93   InstList::iterator getCur() const { return Cur; }
  94   InstList::iterator getNext() const { return Next; }
  95   InstList::iterator getEnd() const { return End; }
  96   void insert(Inst *Instr);
  97   template <typename Inst, typename... Args> Inst *insert(Args &&... A) {
  98     auto *New = Inst::create(Node->getCfg(), std::forward<Args>(A)...);
  99     insert(New);
 100     return New;
 101   }
 102   Inst *getLastInserted() const;
 103   void advanceCur() { Cur = Next; }
 104   void advanceNext() { advanceForward(Next); }
 105   void setCur(InstList::iterator C) { Cur = C; }
 106   void setNext(InstList::iterator N) { Next = N; }
 107   void rewind();
 108   void setInsertPoint(const InstList::iterator &Position) { Next = Position; }
 109   void availabilityReset();
 110   void availabilityUpdate();
 111   Variable *availabilityGet(Operand *Src) const;
 112
 113 private:
 114   /// Node is the argument to Inst::updateVars().
 115   CfgNode *Node = nullptr;
 116   Inst *LastInserted = nullptr;
 117   /// Cur points to the current instruction being considered. It is guaranteed
 118   /// to point to a non-deleted instruction, or to be End.
 119   InstList::iterator Cur;
 120   /// Next doubles as a pointer to the next valid instruction (if any), and the
 121   /// new-instruction insertion point. It is also updated for the caller in case
 122   /// the lowering consumes more than one high-level instruction. It is
 123   /// guaranteed to point to a non-deleted instruction after Cur, or to be End.
 124   // TODO: Consider separating the notion of "next valid instruction" and "new
 125   // instruction insertion point", to avoid confusion when previously-deleted
 126   // instructions come between the two points.
 127   InstList::iterator Next;
 128   /// Begin is a copy of Insts.begin(), used if iterators are moved backward.
 129   InstList::iterator Begin;
 130   /// End is a copy of Insts.end(), used if Next needs to be advanced.
 131   InstList::iterator End;
 132   /// LastDest and LastSrc capture the parameters of the last "Dest=Src" simple
 133   /// assignment inserted (provided Src is a variable).  This is used for simple
 134   /// availability analysis.
 135   Variable *LastDest = nullptr;
 136   Variable *LastSrc = nullptr;
 137
 138   void skipDeleted(InstList::iterator &I) const;
 139   void advanceForward(InstList::iterator &I) const;
 140 };
 141
 142 /// A helper class to advance the LoweringContext at each loop iteration.
 143 class PostIncrLoweringContext {
 144   PostIncrLoweringContext() = delete;
 145   PostIncrLoweringContext(const PostIncrLoweringContext &) = delete;
 146   PostIncrLoweringContext &operator=(const PostIncrLoweringContext &) = delete;
 147
 148 public:
 149   explicit PostIncrLoweringContext(LoweringContext &Context)
 150       : Context(Context) {}
 151   ~PostIncrLoweringContext() {
 152     Context.advanceCur();
 153     Context.advanceNext();
 154   }
 155
 156 private:
 157   LoweringContext &Context;
 158 };
 159
 160 /// TargetLowering is the base class for all backends in Subzero. In addition to
 161 /// implementing the abstract methods in this class, each concrete target must
 162 /// also implement a named constructor in its own namespace. For instance, for
 163 /// X8632 we have:
 164 ///
 165 ///  namespace X8632 {
 166 ///    void createTargetLowering(Cfg *Func);
 167 ///  }
 168 class TargetLowering {
 169   TargetLowering() = delete;
 170   TargetLowering(const TargetLowering &) = delete;
 171   TargetLowering &operator=(const TargetLowering &) = delete;
 172
 173 public:
 174   static void staticInit(GlobalContext *Ctx);
 175   // Each target must define a public static method:
 176   //   static void staticInit(GlobalContext *Ctx);
 177   static bool shouldBePooled(const class Constant *C);
 178   static Type getPointerType();
 179
 180   static std::unique_ptr<TargetLowering> createLowering(TargetArch Target,
 181                                                         Cfg *Func);
 182
 183   virtual std::unique_ptr<Assembler> createAssembler() const = 0;
 184
 185   void translate() {
 186     switch (Func->getOptLevel()) {
 187     case Opt_m1:
 188       translateOm1();
 189       break;
 190     case Opt_0:
 191       translateO0();
 192       break;
 193     case Opt_1:
 194       translateO1();
 195       break;
 196     case Opt_2:
 197       translateO2();
 198       break;
 199     }
 200   }
 201   virtual void translateOm1() {
 202     Func->setError("Target doesn't specify Om1 lowering steps.");
 203   }
 204   virtual void translateO0() {
 205     Func->setError("Target doesn't specify O0 lowering steps.");
 206   }
 207   virtual void translateO1() {
 208     Func->setError("Target doesn't specify O1 lowering steps.");
 209   }
 210   virtual void translateO2() {
 211     Func->setError("Target doesn't specify O2 lowering steps.");
 212   }
 213
 214   /// Generates calls to intrinsics for operations the Target can't handle.
 215   void genTargetHelperCalls();
 216   /// Tries to do address mode optimization on a single instruction.
 217   void doAddressOpt();
 218   /// Randomly insert NOPs.
 219   void doNopInsertion(RandomNumberGenerator &RNG);
 220   /// Lowers a single non-Phi instruction.
 221   void lower();
 222   /// Inserts and lowers a single high-level instruction at a specific insertion
 223   /// point.
 224   void lowerInst(CfgNode *Node, InstList::iterator Next, InstHighLevel *Instr);
 225   /// Does preliminary lowering of the set of Phi instructions in the current
 226   /// node. The main intention is to do what's needed to keep the unlowered Phi
 227   /// instructions consistent with the lowered non-Phi instructions, e.g. to
 228   /// lower 64-bit operands on a 32-bit target.
 229   virtual void prelowerPhis() {}
 230   /// Tries to do branch optimization on a single instruction. Returns true if
 231   /// some optimization was done.
 232   virtual bool doBranchOpt(Inst * /*I*/, const CfgNode * /*NextNode*/) {
 233     return false;
 234   }
 235
 236   virtual SizeT getNumRegisters() const = 0;
 237   /// Returns a variable pre-colored to the specified physical register. This is
 238   /// generally used to get very direct access to the register such as in the
 239   /// prolog or epilog or for marking scratch registers as killed by a call. If
 240   /// a Type is not provided, a target-specific default type is used.
 241   virtual Variable *getPhysicalRegister(RegNumT RegNum,
 242                                         Type Ty = IceType_void) = 0;
 243   /// Returns a printable name for the register.
 244   virtual const char *getRegName(RegNumT RegNum, Type Ty) const = 0;
 245
 246   virtual bool hasFramePointer() const { return false; }
 247   virtual void setHasFramePointer() = 0;
 248   virtual RegNumT getStackReg() const = 0;
 249   virtual RegNumT getFrameReg() const = 0;
 250   virtual RegNumT getFrameOrStackReg() const = 0;
 251   virtual size_t typeWidthInBytesOnStack(Type Ty) const = 0;
 252   virtual uint32_t getStackAlignment() const = 0;
 253   virtual bool needsStackPointerAlignment() const { return false; }
 254   virtual void reserveFixedAllocaArea(size_t Size, size_t Align) = 0;
 255   virtual int32_t getFrameFixedAllocaOffset() const = 0;
 256   virtual uint32_t maxOutArgsSizeBytes() const { return 0; }
 257   // Addressing relative to frame pointer differs in MIPS compared to X86/ARM
 258   // since MIPS decrements its stack pointer prior to saving it in the frame
 259   // pointer register.
 260   virtual uint32_t getFramePointerOffset(uint32_t CurrentOffset,
 261                                          uint32_t Size) const {
 262     return -(CurrentOffset + Size);
 263   }
 264   /// Return whether a 64-bit Variable should be split into a Variable64On32.
 265   virtual bool shouldSplitToVariable64On32(Type Ty) const = 0;
 266
 267   /// Return whether a Vector Variable should be split into a VariableVecOn32.
 268   virtual bool shouldSplitToVariableVecOn32(Type Ty) const {
 269     (void)Ty;
 270     return false;
 271   }
 272
 273   bool hasComputedFrame() const { return HasComputedFrame; }
 274   /// Returns true if this function calls a function that has the "returns
 275   /// twice" attribute.
 276   bool callsReturnsTwice() const { return CallsReturnsTwice; }
 277   void setCallsReturnsTwice(bool RetTwice) { CallsReturnsTwice = RetTwice; }
 278   SizeT makeNextLabelNumber() { return NextLabelNumber++; }
 279   SizeT makeNextJumpTableNumber() { return NextJumpTableNumber++; }
 280   LoweringContext &getContext() { return Context; }
 281   Cfg *getFunc() const { return Func; }
 282   GlobalContext *getGlobalContext() const { return Ctx; }
 283
 284   enum RegSet {
 285     RegSet_None = 0,
 286     RegSet_CallerSave = 1 << 0,
 287     RegSet_CalleeSave = 1 << 1,
 288     RegSet_StackPointer = 1 << 2,
 289     RegSet_FramePointer = 1 << 3,
 290     RegSet_All = ~RegSet_None
 291   };
 292   using RegSetMask = uint32_t;
 293
 294   virtual SmallBitVector getRegisterSet(RegSetMask Include,
 295                                         RegSetMask Exclude) const = 0;
 296   /// Get the set of physical registers available for the specified Variable's
 297   /// register class, applying register restrictions from the command line.
 298   virtual const SmallBitVector &
 299   getRegistersForVariable(const Variable *Var) const = 0;
 300   /// Get the set of *all* physical registers available for the specified
 301   /// Variable's register class, *not* applying register restrictions from the
 302   /// command line.
 303   virtual const SmallBitVector &
 304   getAllRegistersForVariable(const Variable *Var) const = 0;
 305   virtual const SmallBitVector &getAliasesForRegister(RegNumT) const = 0;
 306
 307   void regAlloc(RegAllocKind Kind);
 308   void postRegallocSplitting(const SmallBitVector &RegMask);
 309
 310   virtual void
 311   makeRandomRegisterPermutation(llvm::SmallVectorImpl<RegNumT> &Permutation,
 312                                 const SmallBitVector &ExcludeRegisters,
 313                                 uint64_t Salt) const = 0;
 314
 315   /// Get the minimum number of clusters required for a jump table to be
 316   /// considered.
 317   virtual SizeT getMinJumpTableSize() const = 0;
 318   virtual void emitJumpTable(const Cfg *Func,
 319                              const InstJumpTable *JumpTable) const = 0;
 320
 321   virtual void emitVariable(const Variable *Var) const = 0;
 322
 323   void emitWithoutPrefix(const ConstantRelocatable *CR,
 324                          const char *Suffix = "") const;
 325
 326   virtual void emit(const ConstantInteger32 *C) const = 0;
 327   virtual void emit(const ConstantInteger64 *C) const = 0;
 328   virtual void emit(const ConstantFloat *C) const = 0;
 329   virtual void emit(const ConstantDouble *C) const = 0;
 330   virtual void emit(const ConstantUndef *C) const = 0;
 331   virtual void emit(const ConstantRelocatable *CR) const = 0;
 332
 333   /// Performs target-specific argument lowering.
 334   virtual void lowerArguments() = 0;
 335
 336   virtual void initNodeForLowering(CfgNode *) {}
 337   virtual void addProlog(CfgNode *Node) = 0;
 338   virtual void addEpilog(CfgNode *Node) = 0;
 339
 340   /// Create a properly-typed "mov" instruction.  This is primarily for local
 341   /// variable splitting.
 342   virtual Inst *createLoweredMove(Variable *Dest, Variable *SrcVar) {
 343     // TODO(stichnot): make pure virtual by implementing for all targets
 344     (void)Dest;
 345     (void)SrcVar;
 346     llvm::report_fatal_error("createLoweredMove() unimplemented");
 347     return nullptr;
 348   }
 349
 350   virtual ~TargetLowering() = default;
 351
 352 private:
 353   // This control variable is used by AutoBundle (RAII-style bundle
 354   // locking/unlocking) to prevent nested bundles.
 355   bool AutoBundling = false;
 356
 357   /// This indicates whether we are in the genTargetHelperCalls phase, and
 358   /// therefore can do things like scalarization.
 359   bool GeneratingTargetHelpers = false;
 360
 361   // _bundle_lock(), and _bundle_unlock(), were made private to force subtargets
 362   // to use the AutoBundle helper.
 363   void
 364   _bundle_lock(InstBundleLock::Option BundleOption = InstBundleLock::Opt_None) {
 365     Context.insert<InstBundleLock>(BundleOption);
 366   }
 367   void _bundle_unlock() { Context.insert<InstBundleUnlock>(); }
 368
 369 protected:
 370   /// AutoBundle provides RIAA-style bundling. Sub-targets are expected to use
 371   /// it when emitting NaCl Bundles to ensure proper bundle_unlocking, and
 372   /// prevent nested bundles.
 373   ///
 374   /// AutoBundle objects will emit a _bundle_lock during construction (but only
 375   /// if sandboxed code generation was requested), and a bundle_unlock() during
 376   /// destruction. By carefully scoping objects of this type, Subtargets can
 377   /// ensure proper bundle emission.
 378   class AutoBundle {
 379     AutoBundle() = delete;
 380     AutoBundle(const AutoBundle &) = delete;
 381     AutoBundle &operator=(const AutoBundle &) = delete;
 382
 383   public:
 384     explicit AutoBundle(TargetLowering *Target, InstBundleLock::Option Option =
 385                                                     InstBundleLock::Opt_None);
 386     ~AutoBundle();
 387
 388   private:
 389     TargetLowering *const Target;
 390     const bool NeedSandboxing;
 391   };
 392
 393   explicit TargetLowering(Cfg *Func);
 394   // Applies command line filters to TypeToRegisterSet array.
 395   static void filterTypeToRegisterSet(
 396       GlobalContext *Ctx, int32_t NumRegs, SmallBitVector TypeToRegisterSet[],
 397       size_t TypeToRegisterSetSize,
 398       std::function<std::string(RegNumT)> getRegName,
 399       std::function<const char *(RegClass)> getRegClassName);
 400   virtual void lowerAlloca(const InstAlloca *Instr) = 0;
 401   virtual void lowerArithmetic(const InstArithmetic *Instr) = 0;
 402   virtual void lowerAssign(const InstAssign *Instr) = 0;
 403   virtual void lowerBr(const InstBr *Instr) = 0;
 404   virtual void lowerBreakpoint(const InstBreakpoint *Instr) = 0;
 405   virtual void lowerCall(const InstCall *Instr) = 0;
 406   virtual void lowerCast(const InstCast *Instr) = 0;
 407   virtual void lowerFcmp(const InstFcmp *Instr) = 0;
 408   virtual void lowerExtractElement(const InstExtractElement *Instr) = 0;
 409   virtual void lowerIcmp(const InstIcmp *Instr) = 0;
 410   virtual void lowerInsertElement(const InstInsertElement *Instr) = 0;
 411   virtual void lowerIntrinsicCall(const InstIntrinsicCall *Instr) = 0;
 412   virtual void lowerLoad(const InstLoad *Instr) = 0;
 413   virtual void lowerPhi(const InstPhi *Instr) = 0;
 414   virtual void lowerRet(const InstRet *Instr) = 0;
 415   virtual void lowerSelect(const InstSelect *Instr) = 0;
 416   virtual void lowerShuffleVector(const InstShuffleVector *Instr) = 0;
 417   virtual void lowerStore(const InstStore *Instr) = 0;
 418   virtual void lowerSwitch(const InstSwitch *Instr) = 0;
 419   virtual void lowerUnreachable(const InstUnreachable *Instr) = 0;
 420   virtual void lowerOther(const Inst *Instr);
 421
 422   virtual void genTargetHelperCallFor(Inst *Instr) = 0;
 423   virtual uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) = 0;
 424
 425   /// Opportunity to modify other instructions to help Address Optimization
 426   virtual void doAddressOptOther() {}
 427   virtual void doAddressOptLoad() {}
 428   virtual void doAddressOptStore() {}
 429   virtual void doAddressOptLoadSubVector() {}
 430   virtual void doAddressOptStoreSubVector() {}
 431   virtual void doMockBoundsCheck(Operand *) {}
 432   virtual void randomlyInsertNop(float Probability,
 433                                  RandomNumberGenerator &RNG) = 0;
 434   /// This gives the target an opportunity to post-process the lowered expansion
 435   /// before returning.
 436   virtual void postLower() {}
 437
 438   /// When the SkipUnimplemented flag is set, addFakeDefUses() gets invoked by
 439   /// the UnimplementedLoweringError macro to insert fake uses of all the
 440   /// instruction variables and a fake def of the instruction dest, in order to
 441   /// preserve integrity of liveness analysis.
 442   void addFakeDefUses(const Inst *Instr);
 443
 444   /// Find (non-SSA) instructions where the Dest variable appears in some source
 445   /// operand, and set the IsDestRedefined flag.  This keeps liveness analysis
 446   /// consistent.
 447   void markRedefinitions();
 448
 449   /// Make a pass over the Cfg to determine which variables need stack slots and
 450   /// place them in a sorted list (SortedSpilledVariables). Among those, vars,
 451   /// classify the spill variables as local to the basic block vs global
 452   /// (multi-block) in order to compute the parameters GlobalsSize and
 453   /// SpillAreaSizeBytes (represents locals or general vars if the coalescing of
 454   /// locals is disallowed) along with alignments required for variables in each
 455   /// area. We rely on accurate VMetadata in order to classify a variable as
 456   /// global vs local (otherwise the variable is conservatively global). The
 457   /// in-args should be initialized to 0.
 458   ///
 459   /// This is only a pre-pass and the actual stack slot assignment is handled
 460   /// separately.
 461   ///
 462   /// There may be target-specific Variable types, which will be handled by
 463   /// TargetVarHook. If the TargetVarHook returns true, then the variable is
 464   /// skipped and not considered with the rest of the spilled variables.
 465   void getVarStackSlotParams(VarList &SortedSpilledVariables,
 466                              SmallBitVector &RegsUsed, size_t *GlobalsSize,
 467                              size_t *SpillAreaSizeBytes,
 468                              uint32_t *SpillAreaAlignmentBytes,
 469                              uint32_t *LocalsSlotsAlignmentBytes,
 470                              std::function<bool(Variable *)> TargetVarHook);
 471
 472   /// Calculate the amount of padding needed to align the local and global areas
 473   /// to the required alignment. This assumes the globals/locals layout used by
 474   /// getVarStackSlotParams and assignVarStackSlots.
 475   void alignStackSpillAreas(uint32_t SpillAreaStartOffset,
 476                             uint32_t SpillAreaAlignmentBytes,
 477                             size_t GlobalsSize,
 478                             uint32_t LocalsSlotsAlignmentBytes,
 479                             uint32_t *SpillAreaPaddingBytes,
 480                             uint32_t *LocalsSlotsPaddingBytes);
 481
 482   /// Make a pass through the SortedSpilledVariables and actually assign stack
 483   /// slots. SpillAreaPaddingBytes takes into account stack alignment padding.
 484   /// The SpillArea starts after that amount of padding. This matches the scheme
 485   /// in getVarStackSlotParams, where there may be a separate multi-block global
 486   /// var spill area and a local var spill area.
 487   void assignVarStackSlots(VarList &SortedSpilledVariables,
 488                            size_t SpillAreaPaddingBytes,
 489                            size_t SpillAreaSizeBytes,
 490                            size_t GlobalsAndSubsequentPaddingSize,
 491                            bool UsesFramePointer);
 492
 493   /// Sort the variables in Source based on required alignment. The variables
 494   /// with the largest alignment need are placed in the front of the Dest list.
 495   void sortVarsByAlignment(VarList &Dest, const VarList &Source) const;
 496
 497   InstCall *makeHelperCall(RuntimeHelper FuncID, Variable *Dest, SizeT MaxSrcs);
 498
 499   void _set_dest_redefined() { Context.getLastInserted()->setDestRedefined(); }
 500
 501   bool shouldOptimizeMemIntrins();
 502
 503   void scalarizeArithmetic(InstArithmetic::OpKind K, Variable *Dest,
 504                            Operand *Src0, Operand *Src1);
 505
 506   /// Generalizes scalarizeArithmetic to support other instruction types.
 507   ///
 508   /// insertScalarInstruction is a function-like object with signature
 509   /// (Variable *Dest, Variable *Src0, Variable *Src1) -> Instr *.
 510   template <typename... Operands,
 511             typename F = std::function<Inst *(Variable *, Operands *...)>>
 512   void scalarizeInstruction(Variable *Dest, F insertScalarInstruction,
 513                             Operands *... Srcs) {
 514     assert(GeneratingTargetHelpers &&
 515            "scalarizeInstruction called during incorrect phase");
 516     const Type DestTy = Dest->getType();
 517     assert(isVectorType(DestTy));
 518     const Type DestElementTy = typeElementType(DestTy);
 519     const SizeT NumElements = typeNumElements(DestTy);
 520
 521     Variable *T = Func->makeVariable(DestTy);
 522     if (auto *VarVecOn32 = llvm::dyn_cast<VariableVecOn32>(T)) {
 523       VarVecOn32->initVecElement(Func);
 524       auto *Undef = ConstantUndef::create(Ctx, DestTy);
 525       Context.insert<InstAssign>(T, Undef);
 526     } else {
 527       Context.insert<InstFakeDef>(T);
 528     }
 529
 530     for (SizeT I = 0; I < NumElements; ++I) {
 531       auto *Index = Ctx->getConstantInt32(I);
 532
 533       auto makeExtractThunk = [this, Index, NumElements](Operand *Src) {
 534         return [this, Index, NumElements, Src]() {
 535           assert(typeNumElements(Src->getType()) == NumElements);
 536
 537           const auto ElementTy = typeElementType(Src->getType());
 538           auto *Op = Func->makeVariable(ElementTy);
 539           Context.insert<InstExtractElement>(Op, Src, Index);
 540           return Op;
 541         };
 542       };
 543
 544       // Perform the operation as a scalar operation.
 545       auto *Res = Func->makeVariable(DestElementTy);
 546       auto *Arith = applyToThunkedArgs(insertScalarInstruction, Res,
 547                                        makeExtractThunk(Srcs)...);
 548       genTargetHelperCallFor(Arith);
 549
 550       Variable *DestT = Func->makeVariable(DestTy);
 551       Context.insert<InstInsertElement>(DestT, T, Res, Index);
 552       T = DestT;
 553     }
 554     Context.insert<InstAssign>(Dest, T);
 555   }
 556
 557   // applyToThunkedArgs is used by scalarizeInstruction. Ideally, we would just
 558   // call insertScalarInstruction(Res, Srcs...), but C++ does not specify
 559   // evaluation order which means this leads to an unpredictable final
 560   // output. Instead, we wrap each of the Srcs in a thunk and these
 561   // applyToThunkedArgs functions apply the thunks in a well defined order so we
 562   // still get well-defined output.
 563   Inst *applyToThunkedArgs(
 564       std::function<Inst *(Variable *, Variable *)> insertScalarInstruction,
 565       Variable *Res, std::function<Variable *()> thunk0) {
 566     auto *Src0 = thunk0();
 567     return insertScalarInstruction(Res, Src0);
 568   }
 569
 570   Inst *
 571   applyToThunkedArgs(std::function<Inst *(Variable *, Variable *, Variable *)>
 572                          insertScalarInstruction,
 573                      Variable *Res, std::function<Variable *()> thunk0,
 574                      std::function<Variable *()> thunk1) {
 575     auto *Src0 = thunk0();
 576     auto *Src1 = thunk1();
 577     return insertScalarInstruction(Res, Src0, Src1);
 578   }
 579
 580   Inst *applyToThunkedArgs(
 581       std::function<Inst *(Variable *, Variable *, Variable *, Variable *)>
 582           insertScalarInstruction,
 583       Variable *Res, std::function<Variable *()> thunk0,
 584       std::function<Variable *()> thunk1, std::function<Variable *()> thunk2) {
 585     auto *Src0 = thunk0();
 586     auto *Src1 = thunk1();
 587     auto *Src2 = thunk2();
 588     return insertScalarInstruction(Res, Src0, Src1, Src2);
 589   }
 590
 591   /// SandboxType enumerates all possible sandboxing strategies that
 592   enum SandboxType {
 593     ST_None,
 594     ST_NaCl,
 595     ST_Nonsfi,
 596   };
 597
 598   static SandboxType determineSandboxTypeFromFlags(const ClFlags &Flags);
 599
 600   Cfg *Func;
 601   GlobalContext *Ctx;
 602   bool HasComputedFrame = false;
 603   bool CallsReturnsTwice = false;
 604   SizeT NextLabelNumber = 0;
 605   SizeT NextJumpTableNumber = 0;
 606   LoweringContext Context;
 607   const SandboxType SandboxingType = ST_None;
 608
 609   const static constexpr char *H_getIP_prefix = "__Sz_getIP_";
 610 };
 611
 612 /// TargetDataLowering is used for "lowering" data including initializers for
 613 /// global variables, and the internal constant pools. It is separated out from
 614 /// TargetLowering because it does not require a Cfg.
 615 class TargetDataLowering {
 616   TargetDataLowering() = delete;
 617   TargetDataLowering(const TargetDataLowering &) = delete;
 618   TargetDataLowering &operator=(const TargetDataLowering &) = delete;
 619
 620 public:
 621   static std::unique_ptr<TargetDataLowering> createLowering(GlobalContext *Ctx);
 622   virtual ~TargetDataLowering();
 623
 624   virtual void lowerGlobals(const VariableDeclarationList &Vars,
 625                             const std::string &SectionSuffix) = 0;
 626   virtual void lowerConstants() = 0;
 627   virtual void lowerJumpTables() = 0;
 628   virtual void emitTargetRODataSections() {}
 629
 630 protected:
 631   void emitGlobal(const VariableDeclaration &Var,
 632                   const std::string &SectionSuffix);
 633
 634   /// For now, we assume .long is the right directive for emitting 4 byte emit
 635   /// global relocations. However, LLVM MIPS usually uses .4byte instead.
 636   /// Perhaps there is some difference when the location is unaligned.
 637   static const char *getEmit32Directive() { return ".long"; }
 638
 639   explicit TargetDataLowering(GlobalContext *Ctx) : Ctx(Ctx) {}
 640   GlobalContext *Ctx;
 641 };
 642
 643 /// TargetHeaderLowering is used to "lower" the header of an output file. It
 644 /// writes out the target-specific header attributes. E.g., for ARM this writes
 645 /// out the build attributes (float ABI, etc.).
 646 class TargetHeaderLowering {
 647   TargetHeaderLowering() = delete;
 648   TargetHeaderLowering(const TargetHeaderLowering &) = delete;
 649   TargetHeaderLowering &operator=(const TargetHeaderLowering &) = delete;
 650
 651 public:
 652   static std::unique_ptr<TargetHeaderLowering>
 653   createLowering(GlobalContext *Ctx);
 654   virtual ~TargetHeaderLowering();
 655
 656   virtual void lower() {}
 657
 658 protected:
 659   explicit TargetHeaderLowering(GlobalContext *Ctx) : Ctx(Ctx) {}
 660   GlobalContext *Ctx;
 661 };
 662
 663 } // end of namespace Ice
 664
 665 #endif // SUBZERO_SRC_ICETARGETLOWERING_H