src/IceTargetLoweringARM32.h

   1 //===- subzero/src/IceTargetLoweringARM32.h - ARM32 lowering ----*- C++ -*-===//
   2 //
   3 //                        The Subzero Code Generator
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 ///
  10 /// \file
  11 /// \brief Declares the TargetLoweringARM32 class, which implements the
  12 /// TargetLowering interface for the ARM 32-bit architecture.
  13 ///
  14 //===----------------------------------------------------------------------===//
  15
  16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGARM32_H
  17 #define SUBZERO_SRC_ICETARGETLOWERINGARM32_H
  18
  19 #include "IceAssemblerARM32.h"
  20 #include "IceDefs.h"
  21 #include "IceInstARM32.h"
  22 #include "IceRegistersARM32.h"
  23 #include "IceTargetLowering.h"
  24
  25 #include "llvm/ADT/SmallBitVector.h"
  26
  27 namespace Ice {
  28 namespace ARM32 {
  29
  30 // Class encapsulating ARM cpu features / instruction set.
  31 class TargetARM32Features {
  32   TargetARM32Features() = delete;
  33   TargetARM32Features(const TargetARM32Features &) = delete;
  34   TargetARM32Features &operator=(const TargetARM32Features &) = delete;
  35
  36 public:
  37   explicit TargetARM32Features(const ClFlags &Flags);
  38
  39   enum ARM32InstructionSet {
  40     Begin,
  41     // Neon is the PNaCl baseline instruction set.
  42     Neon = Begin,
  43     HWDivArm, // HW divide in ARM mode (not just Thumb mode).
  44     End
  45   };
  46
  47   bool hasFeature(ARM32InstructionSet I) const { return I <= InstructionSet; }
  48
  49 private:
  50   ARM32InstructionSet InstructionSet = ARM32InstructionSet::Begin;
  51 };
  52
  53 // The target lowering logic for ARM32.
  54 class TargetARM32 : public TargetLowering {
  55   TargetARM32() = delete;
  56   TargetARM32(const TargetARM32 &) = delete;
  57   TargetARM32 &operator=(const TargetARM32 &) = delete;
  58
  59 public:
  60   static void staticInit(GlobalContext *Ctx);
  61   // TODO(jvoung): return a unique_ptr.
  62   static std::unique_ptr<::Ice::TargetLowering> create(Cfg *Func) {
  63     return makeUnique<TargetARM32>(Func);
  64   }
  65
  66   std::unique_ptr<::Ice::Assembler> createAssembler() const override {
  67     return makeUnique<ARM32::AssemblerARM32>();
  68   }
  69
  70   void initNodeForLowering(CfgNode *Node) override {
  71     Computations.forgetProducers();
  72     Computations.recordProducers(Node);
  73     Computations.dump(Func);
  74   }
  75
  76   void translateOm1() override;
  77   void translateO2() override;
  78   bool doBranchOpt(Inst *I, const CfgNode *NextNode) override;
  79
  80   SizeT getNumRegisters() const override { return RegARM32::Reg_NUM; }
  81   Variable *getPhysicalRegister(SizeT RegNum, Type Ty = IceType_void) override;
  82   IceString getRegName(SizeT RegNum, Type Ty) const override;
  83   llvm::SmallBitVector getRegisterSet(RegSetMask Include,
  84                                       RegSetMask Exclude) const override;
  85   const llvm::SmallBitVector &
  86   getRegistersForVariable(const Variable *Var) const override {
  87     RegClass RC = Var->getRegClass();
  88     assert(RC < RC_Target);
  89     return TypeToRegisterSet[RC];
  90   }
  91   const llvm::SmallBitVector &
  92   getAllRegistersForVariable(const Variable *Var) const override {
  93     RegClass RC = Var->getRegClass();
  94     assert(RC < RC_Target);
  95     return TypeToRegisterSetUnfiltered[RC];
  96   }
  97   const llvm::SmallBitVector &getAliasesForRegister(SizeT Reg) const override {
  98     return RegisterAliases[Reg];
  99   }
 100   bool hasFramePointer() const override { return UsesFramePointer; }
 101   void setHasFramePointer() override { UsesFramePointer = true; }
 102   SizeT getStackReg() const override { return RegARM32::Reg_sp; }
 103   SizeT getFrameReg() const override { return RegARM32::Reg_fp; }
 104   SizeT getFrameOrStackReg() const override {
 105     return UsesFramePointer ? getFrameReg() : getStackReg();
 106   }
 107   int32_t getReservedTmpReg() const { return RegARM32::Reg_ip; }
 108
 109   size_t typeWidthInBytesOnStack(Type Ty) const override {
 110     // Round up to the next multiple of 4 bytes. In particular, i1, i8, and i16
 111     // are rounded up to 4 bytes.
 112     return (typeWidthInBytes(Ty) + 3) & ~3;
 113   }
 114   uint32_t getStackAlignment() const override;
 115   void reserveFixedAllocaArea(size_t Size, size_t Align) override {
 116     FixedAllocaSizeBytes = Size;
 117     assert(llvm::isPowerOf2_32(Align));
 118     FixedAllocaAlignBytes = Align;
 119     PrologEmitsFixedAllocas = true;
 120   }
 121   int32_t getFrameFixedAllocaOffset() const override {
 122     return FixedAllocaSizeBytes - (SpillAreaSizeBytes - MaxOutArgsSizeBytes);
 123   }
 124   uint32_t maxOutArgsSizeBytes() const override { return MaxOutArgsSizeBytes; }
 125
 126   bool shouldSplitToVariable64On32(Type Ty) const override {
 127     return Ty == IceType_i64;
 128   }
 129
 130   // TODO(ascull): what size is best for ARM?
 131   SizeT getMinJumpTableSize() const override { return 3; }
 132   void emitJumpTable(const Cfg *Func,
 133                      const InstJumpTable *JumpTable) const override;
 134
 135   void emitVariable(const Variable *Var) const override;
 136
 137   void emit(const ConstantUndef *C) const final;
 138   void emit(const ConstantInteger32 *C) const final;
 139   void emit(const ConstantInteger64 *C) const final;
 140   void emit(const ConstantFloat *C) const final;
 141   void emit(const ConstantDouble *C) const final;
 142   void emit(const ConstantRelocatable *C) const final;
 143
 144   void lowerArguments() override;
 145   void addProlog(CfgNode *Node) override;
 146   void addEpilog(CfgNode *Node) override;
 147
 148   Operand *loOperand(Operand *Operand);
 149   Operand *hiOperand(Operand *Operand);
 150   void finishArgumentLowering(Variable *Arg, Variable *FramePtr,
 151                               size_t BasicFrameOffset, size_t *InArgsSizeBytes);
 152
 153   bool hasCPUFeature(TargetARM32Features::ARM32InstructionSet I) const {
 154     return CPUFeatures.hasFeature(I);
 155   }
 156
 157   enum OperandLegalization {
 158     Legal_Reg = 1 << 0,  /// physical register, not stack location
 159     Legal_Flex = 1 << 1, /// A flexible operand2, which can hold rotated small
 160                          /// immediates, shifted registers, or modified fp imm.
 161     Legal_Mem = 1 << 2,  /// includes [r0, r1 lsl #2] as well as [sp, #12]
 162     Legal_Rematerializable = 1 << 3,
 163     Legal_Default = ~Legal_Rematerializable,
 164   };
 165
 166   using LegalMask = uint32_t;
 167   Operand *legalizeUndef(Operand *From, int32_t RegNum = Variable::NoRegister);
 168   Operand *legalize(Operand *From, LegalMask Allowed = Legal_Default,
 169                     int32_t RegNum = Variable::NoRegister);
 170   Variable *legalizeToReg(Operand *From, int32_t RegNum = Variable::NoRegister);
 171
 172   OperandARM32ShAmtImm *shAmtImm(uint32_t ShAmtImm) const {
 173     assert(ShAmtImm < 32);
 174     return OperandARM32ShAmtImm::create(
 175         Func,
 176         llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(ShAmtImm & 0x1F)));
 177   }
 178
 179   GlobalContext *getCtx() const { return Ctx; }
 180
 181 protected:
 182   explicit TargetARM32(Cfg *Func);
 183
 184   void postLower() override;
 185
 186   enum SafeBoolChain {
 187     SBC_No,
 188     SBC_Yes,
 189   };
 190
 191   void lowerAlloca(const InstAlloca *Inst) override;
 192   SafeBoolChain lowerInt1Arithmetic(const InstArithmetic *Inst);
 193   void lowerInt64Arithmetic(InstArithmetic::OpKind Op, Variable *Dest,
 194                             Operand *Src0, Operand *Src1);
 195   void lowerArithmetic(const InstArithmetic *Inst) override;
 196   void lowerAssign(const InstAssign *Inst) override;
 197   void lowerBr(const InstBr *Inst) override;
 198   void lowerCall(const InstCall *Inst) override;
 199   void lowerCast(const InstCast *Inst) override;
 200   void lowerExtractElement(const InstExtractElement *Inst) override;
 201
 202   /// CondWhenTrue is a helper type returned by every method in the lowering
 203   /// that emits code to set the condition codes.
 204   class CondWhenTrue {
 205   public:
 206     explicit CondWhenTrue(CondARM32::Cond T0,
 207                           CondARM32::Cond T1 = CondARM32::kNone)
 208         : WhenTrue0(T0), WhenTrue1(T1) {
 209       assert(T1 == CondARM32::kNone || T0 != CondARM32::kNone);
 210       assert(T1 != T0 || T0 == CondARM32::kNone);
 211     }
 212     CondARM32::Cond WhenTrue0;
 213     CondARM32::Cond WhenTrue1;
 214
 215     /// invert returns a new object with WhenTrue0 and WhenTrue1 inverted.
 216     CondWhenTrue invert() const {
 217       switch (WhenTrue0) {
 218       default:
 219         if (WhenTrue1 == CondARM32::kNone)
 220           return CondWhenTrue(InstARM32::getOppositeCondition(WhenTrue0));
 221         return CondWhenTrue(InstARM32::getOppositeCondition(WhenTrue0),
 222                             InstARM32::getOppositeCondition(WhenTrue1));
 223       case CondARM32::AL:
 224         return CondWhenTrue(CondARM32::kNone);
 225       case CondARM32::kNone:
 226         return CondWhenTrue(CondARM32::AL);
 227       }
 228     }
 229   };
 230
 231   CondWhenTrue lowerFcmpCond(const InstFcmp *Instr);
 232   void lowerFcmp(const InstFcmp *Instr) override;
 233   CondWhenTrue lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition,
 234                                          Operand *Src0, Operand *Src1);
 235   CondWhenTrue lowerInt32IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
 236                                   Operand *Src1);
 237   CondWhenTrue lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
 238                                   Operand *Src1);
 239   CondWhenTrue lowerIcmpCond(const InstIcmp *Instr);
 240   void lowerIcmp(const InstIcmp *Instr) override;
 241   void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr,
 242                       Operand *Val);
 243   void lowerIntrinsicCall(const InstIntrinsicCall *Inst) override;
 244   void lowerInsertElement(const InstInsertElement *Inst) override;
 245   void lowerLoad(const InstLoad *Inst) override;
 246   void lowerPhi(const InstPhi *Inst) override;
 247   void lowerRet(const InstRet *Inst) override;
 248   void lowerSelect(const InstSelect *Inst) override;
 249   void lowerStore(const InstStore *Inst) override;
 250   void lowerSwitch(const InstSwitch *Inst) override;
 251   void lowerUnreachable(const InstUnreachable *Inst) override;
 252   void prelowerPhis() override;
 253   uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) override;
 254   void genTargetHelperCallFor(Inst *Instr) override;
 255   void doAddressOptLoad() override;
 256   void doAddressOptStore() override;
 257   void randomlyInsertNop(float Probability,
 258                          RandomNumberGenerator &RNG) override;
 259
 260   OperandARM32Mem *formMemoryOperand(Operand *Ptr, Type Ty);
 261
 262   Variable64On32 *makeI64RegPair();
 263   Variable *makeReg(Type Ty, int32_t RegNum = Variable::NoRegister);
 264   static Type stackSlotType();
 265   Variable *copyToReg(Operand *Src, int32_t RegNum = Variable::NoRegister);
 266   void alignRegisterPow2(Variable *Reg, uint32_t Align,
 267                          int32_t TmpRegNum = Variable::NoRegister);
 268
 269   /// Returns a vector in a register with the given constant entries.
 270   Variable *makeVectorOfZeros(Type Ty, int32_t RegNum = Variable::NoRegister);
 271
 272   void
 273   makeRandomRegisterPermutation(llvm::SmallVectorImpl<int32_t> &Permutation,
 274                                 const llvm::SmallBitVector &ExcludeRegisters,
 275                                 uint64_t Salt) const override;
 276
 277   // If a divide-by-zero check is needed, inserts a: test; branch .LSKIP; trap;
 278   // .LSKIP: <continuation>. If no check is needed nothing is inserted.
 279   void div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi);
 280   using ExtInstr = void (TargetARM32::*)(Variable *, Variable *,
 281                                          CondARM32::Cond);
 282   using DivInstr = void (TargetARM32::*)(Variable *, Variable *, Variable *,
 283                                          CondARM32::Cond);
 284   void lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, Operand *Src1,
 285                     ExtInstr ExtFunc, DivInstr DivFunc, bool IsRemainder);
 286
 287   void lowerCLZ(Variable *Dest, Variable *ValLo, Variable *ValHi);
 288
 289   // The following are helpers that insert lowered ARM32 instructions with
 290   // minimal syntactic overhead, so that the lowering code can look as close to
 291   // assembly as practical.
 292   void _add(Variable *Dest, Variable *Src0, Operand *Src1,
 293             CondARM32::Cond Pred = CondARM32::AL) {
 294     Context.insert<InstARM32Add>(Dest, Src0, Src1, Pred);
 295   }
 296   void _adds(Variable *Dest, Variable *Src0, Operand *Src1,
 297              CondARM32::Cond Pred = CondARM32::AL) {
 298     constexpr bool SetFlags = true;
 299     Context.insert<InstARM32Add>(Dest, Src0, Src1, Pred, SetFlags);
 300   }
 301   void _adc(Variable *Dest, Variable *Src0, Operand *Src1,
 302             CondARM32::Cond Pred = CondARM32::AL) {
 303     Context.insert<InstARM32Adc>(Dest, Src0, Src1, Pred);
 304   }
 305   void _and(Variable *Dest, Variable *Src0, Operand *Src1,
 306             CondARM32::Cond Pred = CondARM32::AL) {
 307     Context.insert<InstARM32And>(Dest, Src0, Src1, Pred);
 308   }
 309   void _asr(Variable *Dest, Variable *Src0, Operand *Src1,
 310             CondARM32::Cond Pred = CondARM32::AL) {
 311     Context.insert<InstARM32Asr>(Dest, Src0, Src1, Pred);
 312   }
 313   void _bic(Variable *Dest, Variable *Src0, Operand *Src1,
 314             CondARM32::Cond Pred = CondARM32::AL) {
 315     Context.insert<InstARM32Bic>(Dest, Src0, Src1, Pred);
 316   }
 317   void _br(CfgNode *TargetTrue, CfgNode *TargetFalse,
 318            CondARM32::Cond Condition) {
 319     Context.insert<InstARM32Br>(TargetTrue, TargetFalse, Condition);
 320   }
 321   void _br(CfgNode *Target) { Context.insert<InstARM32Br>(Target); }
 322   void _br(CfgNode *Target, CondARM32::Cond Condition) {
 323     Context.insert<InstARM32Br>(Target, Condition);
 324   }
 325   void _br(InstARM32Label *Label, CondARM32::Cond Condition) {
 326     Context.insert<InstARM32Br>(Label, Condition);
 327   }
 328   void _cmn(Variable *Src0, Operand *Src1,
 329             CondARM32::Cond Pred = CondARM32::AL) {
 330     Context.insert<InstARM32Cmn>(Src0, Src1, Pred);
 331   }
 332   void _cmp(Variable *Src0, Operand *Src1,
 333             CondARM32::Cond Pred = CondARM32::AL) {
 334     Context.insert<InstARM32Cmp>(Src0, Src1, Pred);
 335   }
 336   void _clz(Variable *Dest, Variable *Src0,
 337             CondARM32::Cond Pred = CondARM32::AL) {
 338     Context.insert<InstARM32Clz>(Dest, Src0, Pred);
 339   }
 340   void _dmb() { Context.insert<InstARM32Dmb>(); }
 341   void _eor(Variable *Dest, Variable *Src0, Operand *Src1,
 342             CondARM32::Cond Pred = CondARM32::AL) {
 343     Context.insert<InstARM32Eor>(Dest, Src0, Src1, Pred);
 344   }
 345   /// _ldr, for all your memory to Variable data moves. It handles all types
 346   /// (integer, floating point, and vectors.) Addr needs to be valid for Dest's
 347   /// type (e.g., no immediates for vector loads, and no index registers for fp
 348   /// loads.)
 349   void _ldr(Variable *Dest, OperandARM32Mem *Addr,
 350             CondARM32::Cond Pred = CondARM32::AL) {
 351     Context.insert<InstARM32Ldr>(Dest, Addr, Pred);
 352   }
 353   void _ldrex(Variable *Dest, OperandARM32Mem *Addr,
 354               CondARM32::Cond Pred = CondARM32::AL) {
 355     Context.insert<InstARM32Ldrex>(Dest, Addr, Pred);
 356     if (auto *Dest64 = llvm::dyn_cast<Variable64On32>(Dest)) {
 357       Context.insert<InstFakeDef>(Dest64->getLo(), Dest);
 358       Context.insert<InstFakeDef>(Dest64->getHi(), Dest);
 359     }
 360   }
 361   void _lsl(Variable *Dest, Variable *Src0, Operand *Src1,
 362             CondARM32::Cond Pred = CondARM32::AL) {
 363     Context.insert<InstARM32Lsl>(Dest, Src0, Src1, Pred);
 364   }
 365   void _lsls(Variable *Dest, Variable *Src0, Operand *Src1,
 366              CondARM32::Cond Pred = CondARM32::AL) {
 367     constexpr bool SetFlags = true;
 368     Context.insert<InstARM32Lsl>(Dest, Src0, Src1, Pred, SetFlags);
 369   }
 370   void _lsr(Variable *Dest, Variable *Src0, Operand *Src1,
 371             CondARM32::Cond Pred = CondARM32::AL) {
 372     Context.insert<InstARM32Lsr>(Dest, Src0, Src1, Pred);
 373   }
 374   void _mla(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc,
 375             CondARM32::Cond Pred = CondARM32::AL) {
 376     Context.insert<InstARM32Mla>(Dest, Src0, Src1, Acc, Pred);
 377   }
 378   void _mls(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc,
 379             CondARM32::Cond Pred = CondARM32::AL) {
 380     Context.insert<InstARM32Mls>(Dest, Src0, Src1, Acc, Pred);
 381   }
 382   /// _mov, for all your Variable to Variable data movement needs. It handles
 383   /// all types (integer, floating point, and vectors), as well as moves between
 384   /// Core and VFP registers. This is not a panacea: you must obey the (weird,
 385   /// confusing, non-uniform) rules for data moves in ARM.
 386   void _mov(Variable *Dest, Operand *Src0,
 387             CondARM32::Cond Pred = CondARM32::AL) {
 388     // _mov used to be unique in the sense that it would create a temporary
 389     // automagically if Dest was nullptr. It won't do that anymore, so we keep
 390     // an assert around just in case there is some untested code path where Dest
 391     // is nullptr.
 392     assert(Dest != nullptr);
 393     assert(!llvm::isa<OperandARM32Mem>(Src0));
 394     auto *Instr = Context.insert<InstARM32Mov>(Dest, Src0, Pred);
 395
 396     if (Instr->isMultiDest()) {
 397       // If Instr is multi-dest, then Dest must be a Variable64On32. We add a
 398       // fake-def for Instr.DestHi here.
 399       assert(llvm::isa<Variable64On32>(Dest));
 400       Context.insert<InstFakeDef>(Instr->getDestHi());
 401     }
 402   }
 403
 404   void _mov_redefined(Variable *Dest, Operand *Src0,
 405                       CondARM32::Cond Pred = CondARM32::AL) {
 406     auto *Instr = Context.insert<InstARM32Mov>(Dest, Src0, Pred);
 407     Instr->setDestRedefined();
 408     if (Instr->isMultiDest()) {
 409       // If Instr is multi-dest, then Dest must be a Variable64On32. We add a
 410       // fake-def for Instr.DestHi here.
 411       assert(llvm::isa<Variable64On32>(Dest));
 412       Context.insert<InstFakeDef>(Instr->getDestHi());
 413     }
 414   }
 415
 416   // --------------------------------------------------------------------------
 417   // Begin bool folding machinery.
 418   //
 419   // There are three types of boolean lowerings handled by this target:
 420   //
 421   // 1) Boolean expressions leading to a boolean Variable definition
 422   // ---------------------------------------------------------------
 423   //
 424   // Whenever a i1 Variable is live out (i.e., its live range extends beyond
 425   // the defining basic block) we do not fold the operation. We instead
 426   // materialize (i.e., compute) the variable normally, so that it can be used
 427   // when needed. We also materialize i1 values that are not single use to
 428   // avoid code duplication. These expressions are not short circuited.
 429   //
 430   // 2) Boolean expressions leading to a select
 431   // ------------------------------------------
 432   //
 433   // These include boolean chains leading to a select instruction, as well as
 434   // i1 Sexts. These boolean expressions are lowered to:
 435   //
 436   // mov T, <false value>
 437   // CC <- eval(Boolean Expression)
 438   // movCC T, <true value>
 439   //
 440   // For Sexts, <false value> is 0, and <true value> is -1.
 441   //
 442   // 3) Boolean expressions leading to a br i1
 443   // -----------------------------------------
 444   //
 445   // These are the boolean chains leading to a branch. These chains are
 446   // short-circuited, i.e.:
 447   //
 448   //   A = or i1 B, C
 449   //   br i1 A, label %T, label %F
 450   //
 451   // becomes
 452   //
 453   //   tst B
 454   //   jne %T
 455   //   tst B
 456   //   jne %T
 457   //   j %F
 458   //
 459   // and
 460   //
 461   //   A = and i1 B, C
 462   //   br i1 A, label %T, label %F
 463   //
 464   // becomes
 465   //
 466   //   tst B
 467   //   jeq %F
 468   //   tst B
 469   //   jeq %F
 470   //   j %T
 471   //
 472   // Arbitrarily long chains are short circuited, e.g
 473   //
 474   //   A = or  i1 B, C
 475   //   D = and i1 A, E
 476   //   F = and i1 G, H
 477   //   I = or i1 D, F
 478   //   br i1 I, label %True, label %False
 479   //
 480   // becomes
 481   //
 482   // Label[A]:
 483   //   tst B, 1
 484   //   bne Label[D]
 485   //   tst C, 1
 486   //   beq Label[I]
 487   // Label[D]:
 488   //   tst E, 1
 489   //   bne %True
 490   // Label[I]
 491   //   tst G, 1
 492   //   beq %False
 493   //   tst H, 1
 494   //   beq %False (bne %True)
 495
 496   /// lowerInt1 materializes Boolean to a Variable.
 497   SafeBoolChain lowerInt1(Variable *Dest, Operand *Boolean);
 498
 499   /// lowerInt1ForSelect generates the following instruction sequence:
 500   ///
 501   ///   mov T, FalseValue
 502   ///   CC <- eval(Boolean)
 503   ///   movCC T, TrueValue
 504   ///   mov Dest, T
 505   ///
 506   /// It is used for lowering select i1, as well as i1 Sext.
 507   void lowerInt1ForSelect(Variable *Dest, Operand *Boolean, Operand *TrueValue,
 508                           Operand *FalseValue);
 509
 510   /// LowerInt1BranchTarget is used by lowerIntForBranch. It wraps a CfgNode, or
 511   /// an InstARM32Label (but never both) so that, during br i1 lowering, we can
 512   /// create auxiliary labels for short circuiting the condition evaluation.
 513   class LowerInt1BranchTarget {
 514   public:
 515     explicit LowerInt1BranchTarget(CfgNode *const Target)
 516         : NodeTarget(Target) {}
 517     explicit LowerInt1BranchTarget(InstARM32Label *const Target)
 518         : LabelTarget(Target) {}
 519
 520     /// createForLabelOrDuplicate will return a new LowerInt1BranchTarget that
 521     /// is the exact copy of this if Label is nullptr; otherwise, the returned
 522     /// object will wrap Label instead.
 523     LowerInt1BranchTarget
 524     createForLabelOrDuplicate(InstARM32Label *Label) const {
 525       if (Label != nullptr)
 526         return LowerInt1BranchTarget(Label);
 527       if (NodeTarget)
 528         return LowerInt1BranchTarget(NodeTarget);
 529       return LowerInt1BranchTarget(LabelTarget);
 530     }
 531
 532     CfgNode *const NodeTarget = nullptr;
 533     InstARM32Label *const LabelTarget = nullptr;
 534   };
 535
 536   /// LowerInt1AllowShortCircuit is a helper type used by lowerInt1ForBranch for
 537   /// determining which type arithmetic is allowed to be short circuited. This
 538   /// is useful for lowering
 539   ///
 540   ///   t1 = and i1 A, B
 541   ///   t2 = and i1 t1, C
 542   ///   br i1 t2, label %False, label %True
 543   ///
 544   /// to
 545   ///
 546   ///   tst A, 1
 547   ///   beq %False
 548   ///   tst B, 1
 549   ///   beq %False
 550   ///   tst C, 1
 551   ///   bne %True
 552   ///   b %False
 553   ///
 554   /// Without this information, short circuiting would only allow to short
 555   /// circuit a single high level instruction. For example:
 556   ///
 557   ///   t1 = or i1 A, B
 558   ///   t2 = and i1 t1, C
 559   ///   br i1 t2, label %False, label %True
 560   ///
 561   /// cannot be lowered to
 562   ///
 563   ///   tst A, 1
 564   ///   bne %True
 565   ///   tst B, 1
 566   ///   bne %True
 567   ///   tst C, 1
 568   ///   beq %True
 569   ///   b %False
 570   ///
 571   /// It needs to be lowered to
 572   ///
 573   ///   tst A, 1
 574   ///   bne Aux
 575   ///   tst B, 1
 576   ///   beq %False
 577   /// Aux:
 578   ///   tst C, 1
 579   ///   bne %True
 580   ///   b %False
 581   ///
 582   /// TODO(jpp): evaluate if this kind of short circuiting hurts performance (it
 583   /// might.)
 584   enum LowerInt1AllowShortCircuit {
 585     SC_And = 1,
 586     SC_Or = 2,
 587     SC_All = SC_And | SC_Or,
 588   };
 589
 590   /// ShortCircuitCondAndLabel wraps the condition codes that should be used
 591   /// after a lowerInt1ForBranch returns to branch to the
 592   /// TrueTarget/FalseTarget. If ShortCircuitLabel is not nullptr, then the
 593   /// called lowerInt1forBranch created an internal (i.e., short-circuit) label
 594   /// used for short circuiting.
 595   class ShortCircuitCondAndLabel {
 596   public:
 597     explicit ShortCircuitCondAndLabel(CondWhenTrue &&C,
 598                                       InstARM32Label *L = nullptr)
 599         : Cond(std::move(C)), ShortCircuitTarget(L) {}
 600     const CondWhenTrue Cond;
 601     InstARM32Label *const ShortCircuitTarget;
 602
 603     CondWhenTrue assertNoLabelAndReturnCond() const {
 604       assert(ShortCircuitTarget == nullptr);
 605       return Cond;
 606     }
 607   };
 608
 609   /// lowerInt1ForBranch expands Boolean, and returns the condition codes that
 610   /// are to be used for branching to the branch's TrueTarget. It may return a
 611   /// label that the expansion of Boolean used to short circuit the chain's
 612   /// evaluation.
 613   ShortCircuitCondAndLabel
 614   lowerInt1ForBranch(Operand *Boolean, const LowerInt1BranchTarget &TargetTrue,
 615                      const LowerInt1BranchTarget &TargetFalse,
 616                      uint32_t ShortCircuitable);
 617
 618   // _br is a convenience wrapper that emits br instructions to Target.
 619   void _br(const LowerInt1BranchTarget &BrTarget,
 620            CondARM32::Cond Cond = CondARM32::AL) {
 621     assert((BrTarget.NodeTarget == nullptr) !=
 622            (BrTarget.LabelTarget == nullptr));
 623     if (BrTarget.NodeTarget != nullptr)
 624       _br(BrTarget.NodeTarget, Cond);
 625     else
 626       _br(BrTarget.LabelTarget, Cond);
 627   }
 628
 629   // _br_short_circuit is used when lowering InstArithmetic::And and
 630   // InstArithmetic::Or and a short circuit branch is needed.
 631   void _br_short_circuit(const LowerInt1BranchTarget &Target,
 632                          const CondWhenTrue &Cond) {
 633     if (Cond.WhenTrue1 != CondARM32::kNone) {
 634       _br(Target, Cond.WhenTrue1);
 635     }
 636     if (Cond.WhenTrue0 != CondARM32::kNone) {
 637       _br(Target, Cond.WhenTrue0);
 638     }
 639   }
 640   // End of bool folding machinery
 641   // --------------------------------------------------------------------------
 642
 643   /// The Operand can only be a 16-bit immediate or a ConstantRelocatable (with
 644   /// an upper16 relocation).
 645   void _movt(Variable *Dest, Operand *Src0,
 646              CondARM32::Cond Pred = CondARM32::AL) {
 647     Context.insert<InstARM32Movt>(Dest, Src0, Pred);
 648   }
 649   void _movw(Variable *Dest, Operand *Src0,
 650              CondARM32::Cond Pred = CondARM32::AL) {
 651     Context.insert<InstARM32Movw>(Dest, Src0, Pred);
 652   }
 653   void _mul(Variable *Dest, Variable *Src0, Variable *Src1,
 654             CondARM32::Cond Pred = CondARM32::AL) {
 655     Context.insert<InstARM32Mul>(Dest, Src0, Src1, Pred);
 656   }
 657   void _mvn(Variable *Dest, Operand *Src0,
 658             CondARM32::Cond Pred = CondARM32::AL) {
 659     Context.insert<InstARM32Mvn>(Dest, Src0, Pred);
 660   }
 661   void _orr(Variable *Dest, Variable *Src0, Operand *Src1,
 662             CondARM32::Cond Pred = CondARM32::AL) {
 663     Context.insert<InstARM32Orr>(Dest, Src0, Src1, Pred);
 664   }
 665   void _orrs(Variable *Dest, Variable *Src0, Operand *Src1,
 666              CondARM32::Cond Pred = CondARM32::AL) {
 667     constexpr bool SetFlags = true;
 668     Context.insert<InstARM32Orr>(Dest, Src0, Src1, Pred, SetFlags);
 669   }
 670   void _push(const VarList &Sources) { Context.insert<InstARM32Push>(Sources); }
 671   void _pop(const VarList &Dests) {
 672     Context.insert<InstARM32Pop>(Dests);
 673     // Mark dests as modified.
 674     for (Variable *Dest : Dests)
 675       Context.insert<InstFakeDef>(Dest);
 676   }
 677   void _rbit(Variable *Dest, Variable *Src0,
 678              CondARM32::Cond Pred = CondARM32::AL) {
 679     Context.insert<InstARM32Rbit>(Dest, Src0, Pred);
 680   }
 681   void _rev(Variable *Dest, Variable *Src0,
 682             CondARM32::Cond Pred = CondARM32::AL) {
 683     Context.insert<InstARM32Rev>(Dest, Src0, Pred);
 684   }
 685   void _ret(Variable *LR, Variable *Src0 = nullptr) {
 686     Context.insert<InstARM32Ret>(LR, Src0);
 687   }
 688   void _rscs(Variable *Dest, Variable *Src0, Operand *Src1,
 689              CondARM32::Cond Pred = CondARM32::AL) {
 690     constexpr bool SetFlags = true;
 691     Context.insert<InstARM32Rsc>(Dest, Src0, Src1, Pred, SetFlags);
 692   }
 693   void _rsc(Variable *Dest, Variable *Src0, Operand *Src1,
 694             CondARM32::Cond Pred = CondARM32::AL) {
 695     Context.insert<InstARM32Rsc>(Dest, Src0, Src1, Pred);
 696   }
 697   void _rsbs(Variable *Dest, Variable *Src0, Operand *Src1,
 698              CondARM32::Cond Pred = CondARM32::AL) {
 699     constexpr bool SetFlags = true;
 700     Context.insert<InstARM32Rsb>(Dest, Src0, Src1, Pred, SetFlags);
 701   }
 702   void _rsb(Variable *Dest, Variable *Src0, Operand *Src1,
 703             CondARM32::Cond Pred = CondARM32::AL) {
 704     Context.insert<InstARM32Rsb>(Dest, Src0, Src1, Pred);
 705   }
 706   void _sbc(Variable *Dest, Variable *Src0, Operand *Src1,
 707             CondARM32::Cond Pred = CondARM32::AL) {
 708     Context.insert<InstARM32Sbc>(Dest, Src0, Src1, Pred);
 709   }
 710   void _sbcs(Variable *Dest, Variable *Src0, Operand *Src1,
 711              CondARM32::Cond Pred = CondARM32::AL) {
 712     constexpr bool SetFlags = true;
 713     Context.insert<InstARM32Sbc>(Dest, Src0, Src1, Pred, SetFlags);
 714   }
 715   void _sdiv(Variable *Dest, Variable *Src0, Variable *Src1,
 716              CondARM32::Cond Pred = CondARM32::AL) {
 717     Context.insert<InstARM32Sdiv>(Dest, Src0, Src1, Pred);
 718   }
 719   /// _str, for all your Variable to memory transfers. Addr has the same
 720   /// restrictions that it does in _ldr.
 721   void _str(Variable *Value, OperandARM32Mem *Addr,
 722             CondARM32::Cond Pred = CondARM32::AL) {
 723     Context.insert<InstARM32Str>(Value, Addr, Pred);
 724   }
 725   void _strex(Variable *Dest, Variable *Value, OperandARM32Mem *Addr,
 726               CondARM32::Cond Pred = CondARM32::AL) {
 727     // strex requires Dest to be a register other than Value or Addr. This
 728     // restriction is cleanly represented by adding an "early" definition of
 729     // Dest (or a latter use of all the sources.)
 730     Context.insert<InstFakeDef>(Dest);
 731     if (auto *Value64 = llvm::dyn_cast<Variable64On32>(Value)) {
 732       Context.insert<InstFakeUse>(Value64->getLo());
 733       Context.insert<InstFakeUse>(Value64->getHi());
 734     }
 735     auto *Instr = Context.insert<InstARM32Strex>(Dest, Value, Addr, Pred);
 736     Instr->setDestRedefined();
 737   }
 738   void _sub(Variable *Dest, Variable *Src0, Operand *Src1,
 739             CondARM32::Cond Pred = CondARM32::AL) {
 740     Context.insert<InstARM32Sub>(Dest, Src0, Src1, Pred);
 741   }
 742   void _subs(Variable *Dest, Variable *Src0, Operand *Src1,
 743              CondARM32::Cond Pred = CondARM32::AL) {
 744     constexpr bool SetFlags = true;
 745     Context.insert<InstARM32Sub>(Dest, Src0, Src1, Pred, SetFlags);
 746   }
 747   void _sxt(Variable *Dest, Variable *Src0,
 748             CondARM32::Cond Pred = CondARM32::AL) {
 749     Context.insert<InstARM32Sxt>(Dest, Src0, Pred);
 750   }
 751   void _tst(Variable *Src0, Operand *Src1,
 752             CondARM32::Cond Pred = CondARM32::AL) {
 753     Context.insert<InstARM32Tst>(Src0, Src1, Pred);
 754   }
 755   void _trap() { Context.insert<InstARM32Trap>(); }
 756   void _udiv(Variable *Dest, Variable *Src0, Variable *Src1,
 757              CondARM32::Cond Pred = CondARM32::AL) {
 758     Context.insert<InstARM32Udiv>(Dest, Src0, Src1, Pred);
 759   }
 760   void _umull(Variable *DestLo, Variable *DestHi, Variable *Src0,
 761               Variable *Src1, CondARM32::Cond Pred = CondARM32::AL) {
 762     Context.insert<InstARM32Umull>(DestLo, DestHi, Src0, Src1, Pred);
 763     // Model the modification to the second dest as a fake def. Note that the
 764     // def is not predicated.
 765     Context.insert<InstFakeDef>(DestHi, DestLo);
 766   }
 767   void _uxt(Variable *Dest, Variable *Src0,
 768             CondARM32::Cond Pred = CondARM32::AL) {
 769     Context.insert<InstARM32Uxt>(Dest, Src0, Pred);
 770   }
 771   void _vabs(Variable *Dest, Variable *Src,
 772              CondARM32::Cond Pred = CondARM32::AL) {
 773     Context.insert<InstARM32Vabs>(Dest, Src, Pred);
 774   }
 775   void _vadd(Variable *Dest, Variable *Src0, Variable *Src1) {
 776     Context.insert<InstARM32Vadd>(Dest, Src0, Src1);
 777   }
 778   void _vand(Variable *Dest, Variable *Src0, Variable *Src1) {
 779     Context.insert<InstARM32Vand>(Dest, Src0, Src1);
 780   }
 781   void _vcvt(Variable *Dest, Variable *Src, InstARM32Vcvt::VcvtVariant Variant,
 782              CondARM32::Cond Pred = CondARM32::AL) {
 783     Context.insert<InstARM32Vcvt>(Dest, Src, Variant, Pred);
 784   }
 785   void _vdiv(Variable *Dest, Variable *Src0, Variable *Src1) {
 786     Context.insert<InstARM32Vdiv>(Dest, Src0, Src1);
 787   }
 788   void _vcmp(Variable *Src0, Variable *Src1,
 789              CondARM32::Cond Pred = CondARM32::AL) {
 790     Context.insert<InstARM32Vcmp>(Src0, Src1, Pred);
 791   }
 792   void _vcmp(Variable *Src0, OperandARM32FlexFpZero *FpZero,
 793              CondARM32::Cond Pred = CondARM32::AL) {
 794     Context.insert<InstARM32Vcmp>(Src0, FpZero, Pred);
 795   }
 796   void _veor(Variable *Dest, Variable *Src0, Variable *Src1) {
 797     Context.insert<InstARM32Veor>(Dest, Src0, Src1);
 798   }
 799   void _vmrs(CondARM32::Cond Pred = CondARM32::AL) {
 800     Context.insert<InstARM32Vmrs>(Pred);
 801   }
 802   void _vmla(Variable *Dest, Variable *Src0, Variable *Src1) {
 803     Context.insert<InstARM32Vmla>(Dest, Src0, Src1);
 804   }
 805   void _vmls(Variable *Dest, Variable *Src0, Variable *Src1) {
 806     Context.insert<InstARM32Vmls>(Dest, Src0, Src1);
 807   }
 808   void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) {
 809     Context.insert<InstARM32Vmul>(Dest, Src0, Src1);
 810   }
 811   void _vorr(Variable *Dest, Variable *Src0, Variable *Src1) {
 812     Context.insert<InstARM32Vorr>(Dest, Src0, Src1);
 813   }
 814   void _vsqrt(Variable *Dest, Variable *Src,
 815               CondARM32::Cond Pred = CondARM32::AL) {
 816     Context.insert<InstARM32Vsqrt>(Dest, Src, Pred);
 817   }
 818   void _vsub(Variable *Dest, Variable *Src0, Variable *Src1) {
 819     Context.insert<InstARM32Vsub>(Dest, Src0, Src1);
 820   }
 821
 822   // Iterates over the CFG and determines the maximum outgoing stack arguments
 823   // bytes. This information is later used during addProlog() to pre-allocate
 824   // the outargs area.
 825   // TODO(jpp): This could live in the Parser, if we provided a Target-specific
 826   // method that the Parser could call.
 827   void findMaxStackOutArgsSize();
 828
 829   /// Returns true if the given Offset can be represented in a Load/Store Mem
 830   /// Operand.
 831   bool isLegalMemOffset(Type Ty, int32_t Offset) const;
 832
 833   void postLowerLegalization();
 834
 835   /// Sandboxer defines methods for ensuring that "dangerous" operations are
 836   /// masked during sandboxed code emission. For regular, non-sandboxed code
 837   /// emission, its methods are simple pass-through methods.
 838   ///
 839   /// The Sandboxer also emits BundleLock/BundleUnlock pseudo-instructions
 840   /// in the constructor/destructor during sandboxed code emission. Therefore,
 841   /// it is a bad idea to create an object of this type and "keep it around."
 842   /// The recommended usage is:
 843   ///
 844   /// AutoSandboxing(this).<<operation>>(...);
 845   ///
 846   /// This usage ensures that no other instructions are inadvertently added to
 847   /// the bundle.
 848   class Sandboxer {
 849     Sandboxer() = delete;
 850     Sandboxer(const Sandboxer &) = delete;
 851     Sandboxer &operator=(const Sandboxer &) = delete;
 852
 853   public:
 854     explicit Sandboxer(
 855         TargetARM32 *Target,
 856         InstBundleLock::Option BundleOption = InstBundleLock::Opt_None);
 857     ~Sandboxer();
 858
 859     /// Increments sp:
 860     ///
 861     ///   add sp, sp, AddAmount
 862     ///   bic sp, sp, 0xc0000000
 863     ///
 864     /// (for the rationale, see the ARM 32-bit Sandbox Specification.)
 865     void add_sp(Operand *AddAmount);
 866
 867     /// Emits code to align sp to the specified alignment:
 868     ///
 869     ///   bic/and sp, sp, Alignment
 870     ///   bic, sp, sp, 0xc0000000
 871     void align_sp(size_t Alignment);
 872
 873     /// Emits a call instruction. If CallTarget is a Variable, it emits
 874     ///
 875     ///   bic CallTarget, CallTarget, 0xc000000f
 876     ///   bl CallTarget
 877     ///
 878     /// Otherwise, it emits
 879     ///
 880     ///   bl CallTarget
 881     ///
 882     /// Note: in sandboxed code calls are always emitted in addresses 12 mod 16.
 883     InstARM32Call *bl(Variable *ReturnReg, Operand *CallTarget);
 884
 885     /// Emits a load:
 886     ///
 887     ///   bic rBase, rBase, 0xc0000000
 888     ///   ldr rDest, [rBase, #Offset]
 889     ///
 890     /// Exception: if rBase is r9 or sp, then the load is emitted as:
 891     ///
 892     ///   ldr rDest, [rBase, #Offset]
 893     ///
 894     /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are
 895     /// always valid.
 896     void ldr(Variable *Dest, OperandARM32Mem *Mem, CondARM32::Cond Pred);
 897
 898     /// Emits a load exclusive:
 899     ///
 900     ///   bic rBase, rBase, 0xc0000000
 901     ///   ldrex rDest, [rBase]
 902     ///
 903     /// Exception: if rBase is r9 or sp, then the load is emitted as:
 904     ///
 905     ///   ldrex rDest, [rBase]
 906     ///
 907     /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are
 908     /// always valid.
 909     void ldrex(Variable *Dest, OperandARM32Mem *Mem, CondARM32::Cond Pred);
 910
 911     /// Resets sp to Src:
 912     ///
 913     ///   mov sp, Src
 914     ///   bic sp, sp, 0xc0000000
 915     void reset_sp(Variable *Src);
 916
 917     /// Emits code to return from a function:
 918     ///
 919     ///   bic lr, lr, 0xc000000f
 920     ///   bx lr
 921     void ret(Variable *RetAddr, Variable *RetValue);
 922
 923     /// Emits a store:
 924     ///
 925     ///   bic rBase, rBase, 0xc0000000
 926     ///   str rSrc, [rBase, #Offset]
 927     ///
 928     /// Exception: if rBase is r9 or sp, then the store is emitted as:
 929     ///
 930     ///   str rDest, [rBase, #Offset]
 931     ///
 932     /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are
 933     /// always valid.
 934     void str(Variable *Src, OperandARM32Mem *Mem, CondARM32::Cond Pred);
 935
 936     /// Emits a store exclusive:
 937     ///
 938     ///   bic rBase, rBase, 0xc0000000
 939     ///   strex rDest, rSrc, [rBase]
 940     ///
 941     /// Exception: if rBase is r9 or sp, then the store is emitted as:
 942     ///
 943     ///   strex rDest, rSrc, [rBase]
 944     ///
 945     /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are
 946     /// always valid.
 947     void strex(Variable *Dest, Variable *Src, OperandARM32Mem *Mem,
 948                CondARM32::Cond Pred);
 949
 950     /// Decrements sp:
 951     ///
 952     ///   sub sp, sp, SubAmount
 953     ///   bic sp, sp, 0xc0000000
 954     void sub_sp(Operand *SubAmount);
 955
 956   private:
 957     AutoBundle Bundler;
 958     TargetARM32 *Target;
 959   };
 960
 961   class PostLoweringLegalizer {
 962     PostLoweringLegalizer() = delete;
 963     PostLoweringLegalizer(const PostLoweringLegalizer &) = delete;
 964     PostLoweringLegalizer &operator=(const PostLoweringLegalizer &) = delete;
 965
 966   public:
 967     explicit PostLoweringLegalizer(TargetARM32 *Target)
 968         : Target(Target), StackOrFrameReg(Target->getPhysicalRegister(
 969                               Target->getFrameOrStackReg())) {}
 970
 971     void resetTempBaseIfClobberedBy(const Inst *Instr);
 972
 973     // Ensures that the TempBase register held by the this legalizer (if any) is
 974     // assigned to IP.
 975     void assertNoTempOrAssignedToIP() const {
 976       assert(TempBaseReg == nullptr ||
 977              TempBaseReg->getRegNum() == Target->getReservedTmpReg());
 978     }
 979
 980     // Legalizes Mem. if Mem.Base is a Reamaterializable variable, Mem.Offset is
 981     // fixed up.
 982     OperandARM32Mem *legalizeMemOperand(OperandARM32Mem *Mem,
 983                                         bool AllowOffsets = true);
 984
 985     /// Legalizes Mov if its Source (or Destination) is a spilled Variable, or
 986     /// if its Source is a Rematerializable variable (this form is used in lieu
 987     /// of lea, which is not available in ARM.)
 988     ///
 989     /// Moves to memory become store instructions, and moves from memory, loads.
 990     void legalizeMov(InstARM32Mov *Mov);
 991
 992   private:
 993     /// Creates a new Base register centered around [Base, +/- Offset].
 994     Variable *newBaseRegister(Variable *Base, int32_t Offset,
 995                               int32_t ScratchRegNum);
 996
 997     /// Creates a new, legal OperandARM32Mem for accessing Base + Offset.
 998     /// The returned mem operand is a legal operand for accessing memory that is
 999     /// of type Ty.
1000     ///
1001     /// If [Base, #Offset] is encodable, then the method returns a Mem operand
1002     /// expressing it. Otherwise,
1003     ///
1004     /// if [TempBaseReg, #Offset-TempBaseOffset] is a valid memory operand, the
1005     /// method will return that. Otherwise,
1006     ///
1007     /// a new base register ip=Base+Offset is created, and the method returns a
1008     /// memory operand expressing [ip, #0].
1009     OperandARM32Mem *createMemOperand(Type Ty, Variable *Base, int32_t Offset,
1010                                       bool AllowOffsets = true);
1011     TargetARM32 *const Target;
1012     Variable *const StackOrFrameReg;
1013     Variable *TempBaseReg = nullptr;
1014     int32_t TempBaseOffset = 0;
1015   };
1016
1017   const bool NeedSandboxing;
1018   TargetARM32Features CPUFeatures;
1019   bool UsesFramePointer = false;
1020   bool NeedsStackAlignment = false;
1021   bool MaybeLeafFunc = true;
1022   size_t SpillAreaSizeBytes = 0;
1023   size_t FixedAllocaSizeBytes = 0;
1024   size_t FixedAllocaAlignBytes = 0;
1025   bool PrologEmitsFixedAllocas = false;
1026   uint32_t MaxOutArgsSizeBytes = 0;
1027   // TODO(jpp): std::array instead of array.
1028   static llvm::SmallBitVector TypeToRegisterSet[RegARM32::RCARM32_NUM];
1029   static llvm::SmallBitVector
1030       TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM];
1031   static llvm::SmallBitVector RegisterAliases[RegARM32::Reg_NUM];
1032   llvm::SmallBitVector RegsUsed;
1033   VarList PhysicalRegisters[IceType_NUM];
1034   VarList PreservedGPRs;
1035   VarList PreservedSRegs;
1036
1037   /// Helper class that understands the Calling Convention and register
1038   /// assignments. The first few integer type parameters can use r0-r3,
1039   /// regardless of their position relative to the floating-point/vector
1040   /// arguments in the argument list. Floating-point and vector arguments
1041   /// can use q0-q3 (aka d0-d7, s0-s15). For more information on the topic,
1042   /// see the ARM Architecture Procedure Calling Standards (AAPCS).
1043   ///
1044   /// Technically, arguments that can start with registers but extend beyond the
1045   /// available registers can be split between the registers and the stack.
1046   /// However, this is typically  for passing GPR structs by value, and PNaCl
1047   /// transforms expand this out.
1048   ///
1049   /// At (public) function entry, the stack must be 8-byte aligned.
1050   class CallingConv {
1051     CallingConv(const CallingConv &) = delete;
1052     CallingConv &operator=(const CallingConv &) = delete;
1053
1054   public:
1055     CallingConv();
1056     ~CallingConv() = default;
1057
1058     /// argInGPR returns true if there is a GPR available for the requested
1059     /// type, and false otherwise. If it returns true, Reg is set to the
1060     /// appropriate register number. Note that, when Ty == IceType_i64, Reg will
1061     /// be an I64 register pair.
1062     bool argInGPR(Type Ty, int32_t *Reg);
1063
1064     /// argInVFP is to floating-point/vector types what argInGPR is for integer
1065     /// types.
1066     bool argInVFP(Type Ty, int32_t *Reg);
1067
1068   private:
1069     void discardUnavailableGPRsAndTheirAliases(CfgVector<SizeT> *Regs);
1070     llvm::SmallBitVector GPRegsUsed;
1071     CfgVector<SizeT> GPRArgs;
1072     CfgVector<SizeT> I64Args;
1073
1074     void discardUnavailableVFPRegs(CfgVector<SizeT> *Regs);
1075     llvm::SmallBitVector VFPRegsUsed;
1076     CfgVector<SizeT> FP32Args;
1077     CfgVector<SizeT> FP64Args;
1078     CfgVector<SizeT> Vec128Args;
1079   };
1080
1081 private:
1082   ENABLE_MAKE_UNIQUE;
1083
1084   OperandARM32Mem *formAddressingMode(Type Ty, Cfg *Func, const Inst *LdSt,
1085                                       Operand *Base);
1086
1087   void postambleCtpop64(const InstCall *Instr);
1088   void preambleDivRem(const InstCall *Instr);
1089   std::unordered_map<Operand *, void (TargetARM32::*)(const InstCall *Inst)>
1090       ARM32HelpersPreamble;
1091   std::unordered_map<Operand *, void (TargetARM32::*)(const InstCall *Inst)>
1092       ARM32HelpersPostamble;
1093
1094   class ComputationTracker {
1095   public:
1096     ComputationTracker() = default;
1097     ~ComputationTracker() = default;
1098
1099     void forgetProducers() { KnownComputations.clear(); }
1100     void recordProducers(CfgNode *Node);
1101
1102     const Inst *getProducerOf(const Operand *Opnd) const {
1103       auto *Var = llvm::dyn_cast<Variable>(Opnd);
1104       if (Var == nullptr) {
1105         return nullptr;
1106       }
1107
1108       auto Iter = KnownComputations.find(Var->getIndex());
1109       if (Iter == KnownComputations.end()) {
1110         return nullptr;
1111       }
1112
1113       return Iter->second.Instr;
1114     }
1115
1116     void dump(const Cfg *Func) const {
1117       if (!BuildDefs::dump() || !Func->isVerbose(IceV_Folding))
1118         return;
1119       OstreamLocker L(Func->getContext());
1120       Ostream &Str = Func->getContext()->getStrDump();
1121       Str << "foldable producer:\n";
1122       for (const auto &Computation : KnownComputations) {
1123         Str << "    ";
1124         Computation.second.Instr->dump(Func);
1125         Str << "\n";
1126       }
1127       Str << "\n";
1128     }
1129
1130   private:
1131     class ComputationEntry {
1132     public:
1133       ComputationEntry(Inst *I, Type Ty) : Instr(I), ComputationType(Ty) {}
1134       Inst *const Instr;
1135       // Boolean folding is disabled for variables whose live range is multi
1136       // block. We conservatively initialize IsLiveOut to true, and set it to
1137       // false once we find the end of the live range for the variable defined
1138       // by this instruction. If liveness analysis is not performed (e.g., in
1139       // Om1 mode) IsLiveOut will never be set to false, and folding will be
1140       // disabled.
1141       bool IsLiveOut = true;
1142       int32_t NumUses = 0;
1143       Type ComputationType;
1144     };
1145
1146     // ComputationMap maps a Variable number to a payload identifying which
1147     // instruction defined it.
1148     using ComputationMap = std::unordered_map<SizeT, ComputationEntry>;
1149     ComputationMap KnownComputations;
1150   };
1151
1152   ComputationTracker Computations;
1153
1154   // AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked
1155   // without specifying a physical register. This is needed for creating unbound
1156   // temporaries during Ice -> ARM lowering, but before register allocation.
1157   // This a safe-guard that no unbound temporaries are created during the
1158   // legalization post-passes.
1159   bool AllowTemporaryWithNoReg = true;
1160   // ForbidTemporaryWithoutReg is a RAII class that manages
1161   // AllowTemporaryWithNoReg.
1162   class ForbidTemporaryWithoutReg {
1163     ForbidTemporaryWithoutReg() = delete;
1164     ForbidTemporaryWithoutReg(const ForbidTemporaryWithoutReg &) = delete;
1165     ForbidTemporaryWithoutReg &
1166     operator=(const ForbidTemporaryWithoutReg &) = delete;
1167
1168   public:
1169     explicit ForbidTemporaryWithoutReg(TargetARM32 *Target) : Target(Target) {
1170       Target->AllowTemporaryWithNoReg = false;
1171     }
1172     ~ForbidTemporaryWithoutReg() { Target->AllowTemporaryWithNoReg = true; }
1173
1174   private:
1175     TargetARM32 *const Target;
1176   };
1177 };
1178
1179 class TargetDataARM32 final : public TargetDataLowering {
1180   TargetDataARM32() = delete;
1181   TargetDataARM32(const TargetDataARM32 &) = delete;
1182   TargetDataARM32 &operator=(const TargetDataARM32 &) = delete;
1183
1184 public:
1185   static std::unique_ptr<TargetDataLowering> create(GlobalContext *Ctx) {
1186     return std::unique_ptr<TargetDataLowering>(new TargetDataARM32(Ctx));
1187   }
1188
1189   void lowerGlobals(const VariableDeclarationList &Vars,
1190                     const IceString &SectionSuffix) override;
1191   void lowerConstants() override;
1192   void lowerJumpTables() override;
1193
1194 protected:
1195   explicit TargetDataARM32(GlobalContext *Ctx);
1196
1197 private:
1198   ~TargetDataARM32() override = default;
1199 };
1200
1201 class TargetHeaderARM32 final : public TargetHeaderLowering {
1202   TargetHeaderARM32() = delete;
1203   TargetHeaderARM32(const TargetHeaderARM32 &) = delete;
1204   TargetHeaderARM32 &operator=(const TargetHeaderARM32 &) = delete;
1205
1206 public:
1207   static std::unique_ptr<TargetHeaderLowering> create(GlobalContext *Ctx) {
1208     return std::unique_ptr<TargetHeaderLowering>(new TargetHeaderARM32(Ctx));
1209   }
1210
1211   void lower() override;
1212
1213 protected:
1214   explicit TargetHeaderARM32(GlobalContext *Ctx);
1215
1216 private:
1217   ~TargetHeaderARM32() = default;
1218
1219   TargetARM32Features CPUFeatures;
1220 };
1221
1222 } // end of namespace ARM32
1223 } // end of namespace Ice
1224
1225 #endif // SUBZERO_SRC_ICETARGETLOWERINGARM32_H