1 //===- subzero/src/IceTargetLoweringARM32.h - ARM32 lowering ----*- C++ -*-===//
3 // The Subzero Code Generator
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief Declares the TargetLoweringARM32 class, which implements the
12 /// TargetLowering interface for the ARM 32-bit architecture.
14 //===----------------------------------------------------------------------===//
16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGARM32_H
17 #define SUBZERO_SRC_ICETARGETLOWERINGARM32_H
19 #include "IceAssemblerARM32.h"
21 #include "IceInstARM32.h"
22 #include "IceRegistersARM32.h"
23 #include "IceTargetLowering.h"
25 #include "llvm/ADT/SmallBitVector.h"
30 // Class encapsulating ARM cpu features / instruction set.
31 class TargetARM32Features {
32 TargetARM32Features() = delete;
33 TargetARM32Features(const TargetARM32Features &) = delete;
34 TargetARM32Features &operator=(const TargetARM32Features &) = delete;
37 explicit TargetARM32Features(const ClFlags &Flags);
39 enum ARM32InstructionSet {
41 // Neon is the PNaCl baseline instruction set.
43 HWDivArm, // HW divide in ARM mode (not just Thumb mode).
47 bool hasFeature(ARM32InstructionSet I) const { return I <= InstructionSet; }
50 ARM32InstructionSet InstructionSet = ARM32InstructionSet::Begin;
53 // The target lowering logic for ARM32.
54 class TargetARM32 : public TargetLowering {
55 TargetARM32() = delete;
56 TargetARM32(const TargetARM32 &) = delete;
57 TargetARM32 &operator=(const TargetARM32 &) = delete;
60 static void staticInit(GlobalContext *Ctx);
61 // TODO(jvoung): return a unique_ptr.
62 static std::unique_ptr<::Ice::TargetLowering> create(Cfg *Func) {
63 return makeUnique<TargetARM32>(Func);
66 std::unique_ptr<::Ice::Assembler> createAssembler() const override {
67 return makeUnique<ARM32::AssemblerARM32>();
70 void initNodeForLowering(CfgNode *Node) override {
71 Computations.forgetProducers();
72 Computations.recordProducers(Node);
73 Computations.dump(Func);
76 void translateOm1() override;
77 void translateO2() override;
78 bool doBranchOpt(Inst *I, const CfgNode *NextNode) override;
80 SizeT getNumRegisters() const override { return RegARM32::Reg_NUM; }
81 Variable *getPhysicalRegister(SizeT RegNum, Type Ty = IceType_void) override;
82 IceString getRegName(SizeT RegNum, Type Ty) const override;
83 llvm::SmallBitVector getRegisterSet(RegSetMask Include,
84 RegSetMask Exclude) const override;
85 const llvm::SmallBitVector &
86 getRegistersForVariable(const Variable *Var) const override {
87 RegClass RC = Var->getRegClass();
88 assert(RC < RC_Target);
89 return TypeToRegisterSet[RC];
91 const llvm::SmallBitVector &
92 getAllRegistersForVariable(const Variable *Var) const override {
93 RegClass RC = Var->getRegClass();
94 assert(RC < RC_Target);
95 return TypeToRegisterSetUnfiltered[RC];
97 const llvm::SmallBitVector &getAliasesForRegister(SizeT Reg) const override {
98 return RegisterAliases[Reg];
100 bool hasFramePointer() const override { return UsesFramePointer; }
101 void setHasFramePointer() override { UsesFramePointer = true; }
102 SizeT getStackReg() const override { return RegARM32::Reg_sp; }
103 SizeT getFrameReg() const override { return RegARM32::Reg_fp; }
104 SizeT getFrameOrStackReg() const override {
105 return UsesFramePointer ? getFrameReg() : getStackReg();
107 int32_t getReservedTmpReg() const { return RegARM32::Reg_ip; }
109 size_t typeWidthInBytesOnStack(Type Ty) const override {
110 // Round up to the next multiple of 4 bytes. In particular, i1, i8, and i16
111 // are rounded up to 4 bytes.
112 return (typeWidthInBytes(Ty) + 3) & ~3;
114 uint32_t getStackAlignment() const override;
115 void reserveFixedAllocaArea(size_t Size, size_t Align) override {
116 FixedAllocaSizeBytes = Size;
117 assert(llvm::isPowerOf2_32(Align));
118 FixedAllocaAlignBytes = Align;
119 PrologEmitsFixedAllocas = true;
121 int32_t getFrameFixedAllocaOffset() const override {
122 return FixedAllocaSizeBytes - (SpillAreaSizeBytes - MaxOutArgsSizeBytes);
124 uint32_t maxOutArgsSizeBytes() const override { return MaxOutArgsSizeBytes; }
126 bool shouldSplitToVariable64On32(Type Ty) const override {
127 return Ty == IceType_i64;
130 // TODO(ascull): what size is best for ARM?
131 SizeT getMinJumpTableSize() const override { return 3; }
132 void emitJumpTable(const Cfg *Func,
133 const InstJumpTable *JumpTable) const override;
135 void emitVariable(const Variable *Var) const override;
137 void emit(const ConstantUndef *C) const final;
138 void emit(const ConstantInteger32 *C) const final;
139 void emit(const ConstantInteger64 *C) const final;
140 void emit(const ConstantFloat *C) const final;
141 void emit(const ConstantDouble *C) const final;
142 void emit(const ConstantRelocatable *C) const final;
144 void lowerArguments() override;
145 void addProlog(CfgNode *Node) override;
146 void addEpilog(CfgNode *Node) override;
148 Operand *loOperand(Operand *Operand);
149 Operand *hiOperand(Operand *Operand);
150 void finishArgumentLowering(Variable *Arg, Variable *FramePtr,
151 size_t BasicFrameOffset, size_t *InArgsSizeBytes);
153 bool hasCPUFeature(TargetARM32Features::ARM32InstructionSet I) const {
154 return CPUFeatures.hasFeature(I);
157 enum OperandLegalization {
158 Legal_Reg = 1 << 0, /// physical register, not stack location
159 Legal_Flex = 1 << 1, /// A flexible operand2, which can hold rotated small
160 /// immediates, shifted registers, or modified fp imm.
161 Legal_Mem = 1 << 2, /// includes [r0, r1 lsl #2] as well as [sp, #12]
162 Legal_Rematerializable = 1 << 3,
163 Legal_Default = ~Legal_Rematerializable,
166 using LegalMask = uint32_t;
167 Operand *legalizeUndef(Operand *From, int32_t RegNum = Variable::NoRegister);
168 Operand *legalize(Operand *From, LegalMask Allowed = Legal_Default,
169 int32_t RegNum = Variable::NoRegister);
170 Variable *legalizeToReg(Operand *From, int32_t RegNum = Variable::NoRegister);
172 OperandARM32ShAmtImm *shAmtImm(uint32_t ShAmtImm) const {
173 assert(ShAmtImm < 32);
174 return OperandARM32ShAmtImm::create(
176 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(ShAmtImm & 0x1F)));
179 GlobalContext *getCtx() const { return Ctx; }
182 explicit TargetARM32(Cfg *Func);
184 void postLower() override;
191 void lowerAlloca(const InstAlloca *Inst) override;
192 SafeBoolChain lowerInt1Arithmetic(const InstArithmetic *Inst);
193 void lowerInt64Arithmetic(InstArithmetic::OpKind Op, Variable *Dest,
194 Operand *Src0, Operand *Src1);
195 void lowerArithmetic(const InstArithmetic *Inst) override;
196 void lowerAssign(const InstAssign *Inst) override;
197 void lowerBr(const InstBr *Inst) override;
198 void lowerCall(const InstCall *Inst) override;
199 void lowerCast(const InstCast *Inst) override;
200 void lowerExtractElement(const InstExtractElement *Inst) override;
202 /// CondWhenTrue is a helper type returned by every method in the lowering
203 /// that emits code to set the condition codes.
206 explicit CondWhenTrue(CondARM32::Cond T0,
207 CondARM32::Cond T1 = CondARM32::kNone)
208 : WhenTrue0(T0), WhenTrue1(T1) {
209 assert(T1 == CondARM32::kNone || T0 != CondARM32::kNone);
210 assert(T1 != T0 || T0 == CondARM32::kNone);
212 CondARM32::Cond WhenTrue0;
213 CondARM32::Cond WhenTrue1;
215 /// invert returns a new object with WhenTrue0 and WhenTrue1 inverted.
216 CondWhenTrue invert() const {
219 if (WhenTrue1 == CondARM32::kNone)
220 return CondWhenTrue(InstARM32::getOppositeCondition(WhenTrue0));
221 return CondWhenTrue(InstARM32::getOppositeCondition(WhenTrue0),
222 InstARM32::getOppositeCondition(WhenTrue1));
224 return CondWhenTrue(CondARM32::kNone);
225 case CondARM32::kNone:
226 return CondWhenTrue(CondARM32::AL);
231 CondWhenTrue lowerFcmpCond(const InstFcmp *Instr);
232 void lowerFcmp(const InstFcmp *Instr) override;
233 CondWhenTrue lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition,
234 Operand *Src0, Operand *Src1);
235 CondWhenTrue lowerInt32IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
237 CondWhenTrue lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
239 CondWhenTrue lowerIcmpCond(const InstIcmp *Instr);
240 void lowerIcmp(const InstIcmp *Instr) override;
241 void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr,
243 void lowerIntrinsicCall(const InstIntrinsicCall *Inst) override;
244 void lowerInsertElement(const InstInsertElement *Inst) override;
245 void lowerLoad(const InstLoad *Inst) override;
246 void lowerPhi(const InstPhi *Inst) override;
247 void lowerRet(const InstRet *Inst) override;
248 void lowerSelect(const InstSelect *Inst) override;
249 void lowerStore(const InstStore *Inst) override;
250 void lowerSwitch(const InstSwitch *Inst) override;
251 void lowerUnreachable(const InstUnreachable *Inst) override;
252 void prelowerPhis() override;
253 uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) override;
254 void genTargetHelperCallFor(Inst *Instr) override;
255 void doAddressOptLoad() override;
256 void doAddressOptStore() override;
257 void randomlyInsertNop(float Probability,
258 RandomNumberGenerator &RNG) override;
260 OperandARM32Mem *formMemoryOperand(Operand *Ptr, Type Ty);
262 Variable64On32 *makeI64RegPair();
263 Variable *makeReg(Type Ty, int32_t RegNum = Variable::NoRegister);
264 static Type stackSlotType();
265 Variable *copyToReg(Operand *Src, int32_t RegNum = Variable::NoRegister);
266 void alignRegisterPow2(Variable *Reg, uint32_t Align,
267 int32_t TmpRegNum = Variable::NoRegister);
269 /// Returns a vector in a register with the given constant entries.
270 Variable *makeVectorOfZeros(Type Ty, int32_t RegNum = Variable::NoRegister);
273 makeRandomRegisterPermutation(llvm::SmallVectorImpl<int32_t> &Permutation,
274 const llvm::SmallBitVector &ExcludeRegisters,
275 uint64_t Salt) const override;
277 // If a divide-by-zero check is needed, inserts a: test; branch .LSKIP; trap;
278 // .LSKIP: <continuation>. If no check is needed nothing is inserted.
279 void div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi);
280 using ExtInstr = void (TargetARM32::*)(Variable *, Variable *,
282 using DivInstr = void (TargetARM32::*)(Variable *, Variable *, Variable *,
284 void lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, Operand *Src1,
285 ExtInstr ExtFunc, DivInstr DivFunc, bool IsRemainder);
287 void lowerCLZ(Variable *Dest, Variable *ValLo, Variable *ValHi);
289 // The following are helpers that insert lowered ARM32 instructions with
290 // minimal syntactic overhead, so that the lowering code can look as close to
291 // assembly as practical.
292 void _add(Variable *Dest, Variable *Src0, Operand *Src1,
293 CondARM32::Cond Pred = CondARM32::AL) {
294 Context.insert<InstARM32Add>(Dest, Src0, Src1, Pred);
296 void _adds(Variable *Dest, Variable *Src0, Operand *Src1,
297 CondARM32::Cond Pred = CondARM32::AL) {
298 constexpr bool SetFlags = true;
299 Context.insert<InstARM32Add>(Dest, Src0, Src1, Pred, SetFlags);
301 void _adc(Variable *Dest, Variable *Src0, Operand *Src1,
302 CondARM32::Cond Pred = CondARM32::AL) {
303 Context.insert<InstARM32Adc>(Dest, Src0, Src1, Pred);
305 void _and(Variable *Dest, Variable *Src0, Operand *Src1,
306 CondARM32::Cond Pred = CondARM32::AL) {
307 Context.insert<InstARM32And>(Dest, Src0, Src1, Pred);
309 void _asr(Variable *Dest, Variable *Src0, Operand *Src1,
310 CondARM32::Cond Pred = CondARM32::AL) {
311 Context.insert<InstARM32Asr>(Dest, Src0, Src1, Pred);
313 void _bic(Variable *Dest, Variable *Src0, Operand *Src1,
314 CondARM32::Cond Pred = CondARM32::AL) {
315 Context.insert<InstARM32Bic>(Dest, Src0, Src1, Pred);
317 void _br(CfgNode *TargetTrue, CfgNode *TargetFalse,
318 CondARM32::Cond Condition) {
319 Context.insert<InstARM32Br>(TargetTrue, TargetFalse, Condition);
321 void _br(CfgNode *Target) { Context.insert<InstARM32Br>(Target); }
322 void _br(CfgNode *Target, CondARM32::Cond Condition) {
323 Context.insert<InstARM32Br>(Target, Condition);
325 void _br(InstARM32Label *Label, CondARM32::Cond Condition) {
326 Context.insert<InstARM32Br>(Label, Condition);
328 void _cmn(Variable *Src0, Operand *Src1,
329 CondARM32::Cond Pred = CondARM32::AL) {
330 Context.insert<InstARM32Cmn>(Src0, Src1, Pred);
332 void _cmp(Variable *Src0, Operand *Src1,
333 CondARM32::Cond Pred = CondARM32::AL) {
334 Context.insert<InstARM32Cmp>(Src0, Src1, Pred);
336 void _clz(Variable *Dest, Variable *Src0,
337 CondARM32::Cond Pred = CondARM32::AL) {
338 Context.insert<InstARM32Clz>(Dest, Src0, Pred);
340 void _dmb() { Context.insert<InstARM32Dmb>(); }
341 void _eor(Variable *Dest, Variable *Src0, Operand *Src1,
342 CondARM32::Cond Pred = CondARM32::AL) {
343 Context.insert<InstARM32Eor>(Dest, Src0, Src1, Pred);
345 /// _ldr, for all your memory to Variable data moves. It handles all types
346 /// (integer, floating point, and vectors.) Addr needs to be valid for Dest's
347 /// type (e.g., no immediates for vector loads, and no index registers for fp
349 void _ldr(Variable *Dest, OperandARM32Mem *Addr,
350 CondARM32::Cond Pred = CondARM32::AL) {
351 Context.insert<InstARM32Ldr>(Dest, Addr, Pred);
353 void _ldrex(Variable *Dest, OperandARM32Mem *Addr,
354 CondARM32::Cond Pred = CondARM32::AL) {
355 Context.insert<InstARM32Ldrex>(Dest, Addr, Pred);
356 if (auto *Dest64 = llvm::dyn_cast<Variable64On32>(Dest)) {
357 Context.insert<InstFakeDef>(Dest64->getLo(), Dest);
358 Context.insert<InstFakeDef>(Dest64->getHi(), Dest);
361 void _lsl(Variable *Dest, Variable *Src0, Operand *Src1,
362 CondARM32::Cond Pred = CondARM32::AL) {
363 Context.insert<InstARM32Lsl>(Dest, Src0, Src1, Pred);
365 void _lsls(Variable *Dest, Variable *Src0, Operand *Src1,
366 CondARM32::Cond Pred = CondARM32::AL) {
367 constexpr bool SetFlags = true;
368 Context.insert<InstARM32Lsl>(Dest, Src0, Src1, Pred, SetFlags);
370 void _lsr(Variable *Dest, Variable *Src0, Operand *Src1,
371 CondARM32::Cond Pred = CondARM32::AL) {
372 Context.insert<InstARM32Lsr>(Dest, Src0, Src1, Pred);
374 void _mla(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc,
375 CondARM32::Cond Pred = CondARM32::AL) {
376 Context.insert<InstARM32Mla>(Dest, Src0, Src1, Acc, Pred);
378 void _mls(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc,
379 CondARM32::Cond Pred = CondARM32::AL) {
380 Context.insert<InstARM32Mls>(Dest, Src0, Src1, Acc, Pred);
382 /// _mov, for all your Variable to Variable data movement needs. It handles
383 /// all types (integer, floating point, and vectors), as well as moves between
384 /// Core and VFP registers. This is not a panacea: you must obey the (weird,
385 /// confusing, non-uniform) rules for data moves in ARM.
386 void _mov(Variable *Dest, Operand *Src0,
387 CondARM32::Cond Pred = CondARM32::AL) {
388 // _mov used to be unique in the sense that it would create a temporary
389 // automagically if Dest was nullptr. It won't do that anymore, so we keep
390 // an assert around just in case there is some untested code path where Dest
392 assert(Dest != nullptr);
393 assert(!llvm::isa<OperandARM32Mem>(Src0));
394 auto *Instr = Context.insert<InstARM32Mov>(Dest, Src0, Pred);
396 if (Instr->isMultiDest()) {
397 // If Instr is multi-dest, then Dest must be a Variable64On32. We add a
398 // fake-def for Instr.DestHi here.
399 assert(llvm::isa<Variable64On32>(Dest));
400 Context.insert<InstFakeDef>(Instr->getDestHi());
404 void _mov_redefined(Variable *Dest, Operand *Src0,
405 CondARM32::Cond Pred = CondARM32::AL) {
406 auto *Instr = Context.insert<InstARM32Mov>(Dest, Src0, Pred);
407 Instr->setDestRedefined();
408 if (Instr->isMultiDest()) {
409 // If Instr is multi-dest, then Dest must be a Variable64On32. We add a
410 // fake-def for Instr.DestHi here.
411 assert(llvm::isa<Variable64On32>(Dest));
412 Context.insert<InstFakeDef>(Instr->getDestHi());
416 // --------------------------------------------------------------------------
417 // Begin bool folding machinery.
419 // There are three types of boolean lowerings handled by this target:
421 // 1) Boolean expressions leading to a boolean Variable definition
422 // ---------------------------------------------------------------
424 // Whenever a i1 Variable is live out (i.e., its live range extends beyond
425 // the defining basic block) we do not fold the operation. We instead
426 // materialize (i.e., compute) the variable normally, so that it can be used
427 // when needed. We also materialize i1 values that are not single use to
428 // avoid code duplication. These expressions are not short circuited.
430 // 2) Boolean expressions leading to a select
431 // ------------------------------------------
433 // These include boolean chains leading to a select instruction, as well as
434 // i1 Sexts. These boolean expressions are lowered to:
436 // mov T, <false value>
437 // CC <- eval(Boolean Expression)
438 // movCC T, <true value>
440 // For Sexts, <false value> is 0, and <true value> is -1.
442 // 3) Boolean expressions leading to a br i1
443 // -----------------------------------------
445 // These are the boolean chains leading to a branch. These chains are
446 // short-circuited, i.e.:
449 // br i1 A, label %T, label %F
462 // br i1 A, label %T, label %F
472 // Arbitrarily long chains are short circuited, e.g
478 // br i1 I, label %True, label %False
494 // beq %False (bne %True)
496 /// lowerInt1 materializes Boolean to a Variable.
497 SafeBoolChain lowerInt1(Variable *Dest, Operand *Boolean);
499 /// lowerInt1ForSelect generates the following instruction sequence:
501 /// mov T, FalseValue
502 /// CC <- eval(Boolean)
503 /// movCC T, TrueValue
506 /// It is used for lowering select i1, as well as i1 Sext.
507 void lowerInt1ForSelect(Variable *Dest, Operand *Boolean, Operand *TrueValue,
508 Operand *FalseValue);
510 /// LowerInt1BranchTarget is used by lowerIntForBranch. It wraps a CfgNode, or
511 /// an InstARM32Label (but never both) so that, during br i1 lowering, we can
512 /// create auxiliary labels for short circuiting the condition evaluation.
513 class LowerInt1BranchTarget {
515 explicit LowerInt1BranchTarget(CfgNode *const Target)
516 : NodeTarget(Target) {}
517 explicit LowerInt1BranchTarget(InstARM32Label *const Target)
518 : LabelTarget(Target) {}
520 /// createForLabelOrDuplicate will return a new LowerInt1BranchTarget that
521 /// is the exact copy of this if Label is nullptr; otherwise, the returned
522 /// object will wrap Label instead.
523 LowerInt1BranchTarget
524 createForLabelOrDuplicate(InstARM32Label *Label) const {
525 if (Label != nullptr)
526 return LowerInt1BranchTarget(Label);
528 return LowerInt1BranchTarget(NodeTarget);
529 return LowerInt1BranchTarget(LabelTarget);
532 CfgNode *const NodeTarget = nullptr;
533 InstARM32Label *const LabelTarget = nullptr;
536 /// LowerInt1AllowShortCircuit is a helper type used by lowerInt1ForBranch for
537 /// determining which type arithmetic is allowed to be short circuited. This
538 /// is useful for lowering
541 /// t2 = and i1 t1, C
542 /// br i1 t2, label %False, label %True
554 /// Without this information, short circuiting would only allow to short
555 /// circuit a single high level instruction. For example:
558 /// t2 = and i1 t1, C
559 /// br i1 t2, label %False, label %True
561 /// cannot be lowered to
571 /// It needs to be lowered to
582 /// TODO(jpp): evaluate if this kind of short circuiting hurts performance (it
584 enum LowerInt1AllowShortCircuit {
587 SC_All = SC_And | SC_Or,
590 /// ShortCircuitCondAndLabel wraps the condition codes that should be used
591 /// after a lowerInt1ForBranch returns to branch to the
592 /// TrueTarget/FalseTarget. If ShortCircuitLabel is not nullptr, then the
593 /// called lowerInt1forBranch created an internal (i.e., short-circuit) label
594 /// used for short circuiting.
595 class ShortCircuitCondAndLabel {
597 explicit ShortCircuitCondAndLabel(CondWhenTrue &&C,
598 InstARM32Label *L = nullptr)
599 : Cond(std::move(C)), ShortCircuitTarget(L) {}
600 const CondWhenTrue Cond;
601 InstARM32Label *const ShortCircuitTarget;
603 CondWhenTrue assertNoLabelAndReturnCond() const {
604 assert(ShortCircuitTarget == nullptr);
609 /// lowerInt1ForBranch expands Boolean, and returns the condition codes that
610 /// are to be used for branching to the branch's TrueTarget. It may return a
611 /// label that the expansion of Boolean used to short circuit the chain's
613 ShortCircuitCondAndLabel
614 lowerInt1ForBranch(Operand *Boolean, const LowerInt1BranchTarget &TargetTrue,
615 const LowerInt1BranchTarget &TargetFalse,
616 uint32_t ShortCircuitable);
618 // _br is a convenience wrapper that emits br instructions to Target.
619 void _br(const LowerInt1BranchTarget &BrTarget,
620 CondARM32::Cond Cond = CondARM32::AL) {
621 assert((BrTarget.NodeTarget == nullptr) !=
622 (BrTarget.LabelTarget == nullptr));
623 if (BrTarget.NodeTarget != nullptr)
624 _br(BrTarget.NodeTarget, Cond);
626 _br(BrTarget.LabelTarget, Cond);
629 // _br_short_circuit is used when lowering InstArithmetic::And and
630 // InstArithmetic::Or and a short circuit branch is needed.
631 void _br_short_circuit(const LowerInt1BranchTarget &Target,
632 const CondWhenTrue &Cond) {
633 if (Cond.WhenTrue1 != CondARM32::kNone) {
634 _br(Target, Cond.WhenTrue1);
636 if (Cond.WhenTrue0 != CondARM32::kNone) {
637 _br(Target, Cond.WhenTrue0);
640 // End of bool folding machinery
641 // --------------------------------------------------------------------------
643 /// The Operand can only be a 16-bit immediate or a ConstantRelocatable (with
644 /// an upper16 relocation).
645 void _movt(Variable *Dest, Operand *Src0,
646 CondARM32::Cond Pred = CondARM32::AL) {
647 Context.insert<InstARM32Movt>(Dest, Src0, Pred);
649 void _movw(Variable *Dest, Operand *Src0,
650 CondARM32::Cond Pred = CondARM32::AL) {
651 Context.insert<InstARM32Movw>(Dest, Src0, Pred);
653 void _mul(Variable *Dest, Variable *Src0, Variable *Src1,
654 CondARM32::Cond Pred = CondARM32::AL) {
655 Context.insert<InstARM32Mul>(Dest, Src0, Src1, Pred);
657 void _mvn(Variable *Dest, Operand *Src0,
658 CondARM32::Cond Pred = CondARM32::AL) {
659 Context.insert<InstARM32Mvn>(Dest, Src0, Pred);
661 void _orr(Variable *Dest, Variable *Src0, Operand *Src1,
662 CondARM32::Cond Pred = CondARM32::AL) {
663 Context.insert<InstARM32Orr>(Dest, Src0, Src1, Pred);
665 void _orrs(Variable *Dest, Variable *Src0, Operand *Src1,
666 CondARM32::Cond Pred = CondARM32::AL) {
667 constexpr bool SetFlags = true;
668 Context.insert<InstARM32Orr>(Dest, Src0, Src1, Pred, SetFlags);
670 void _push(const VarList &Sources) { Context.insert<InstARM32Push>(Sources); }
671 void _pop(const VarList &Dests) {
672 Context.insert<InstARM32Pop>(Dests);
673 // Mark dests as modified.
674 for (Variable *Dest : Dests)
675 Context.insert<InstFakeDef>(Dest);
677 void _rbit(Variable *Dest, Variable *Src0,
678 CondARM32::Cond Pred = CondARM32::AL) {
679 Context.insert<InstARM32Rbit>(Dest, Src0, Pred);
681 void _rev(Variable *Dest, Variable *Src0,
682 CondARM32::Cond Pred = CondARM32::AL) {
683 Context.insert<InstARM32Rev>(Dest, Src0, Pred);
685 void _ret(Variable *LR, Variable *Src0 = nullptr) {
686 Context.insert<InstARM32Ret>(LR, Src0);
688 void _rscs(Variable *Dest, Variable *Src0, Operand *Src1,
689 CondARM32::Cond Pred = CondARM32::AL) {
690 constexpr bool SetFlags = true;
691 Context.insert<InstARM32Rsc>(Dest, Src0, Src1, Pred, SetFlags);
693 void _rsc(Variable *Dest, Variable *Src0, Operand *Src1,
694 CondARM32::Cond Pred = CondARM32::AL) {
695 Context.insert<InstARM32Rsc>(Dest, Src0, Src1, Pred);
697 void _rsbs(Variable *Dest, Variable *Src0, Operand *Src1,
698 CondARM32::Cond Pred = CondARM32::AL) {
699 constexpr bool SetFlags = true;
700 Context.insert<InstARM32Rsb>(Dest, Src0, Src1, Pred, SetFlags);
702 void _rsb(Variable *Dest, Variable *Src0, Operand *Src1,
703 CondARM32::Cond Pred = CondARM32::AL) {
704 Context.insert<InstARM32Rsb>(Dest, Src0, Src1, Pred);
706 void _sbc(Variable *Dest, Variable *Src0, Operand *Src1,
707 CondARM32::Cond Pred = CondARM32::AL) {
708 Context.insert<InstARM32Sbc>(Dest, Src0, Src1, Pred);
710 void _sbcs(Variable *Dest, Variable *Src0, Operand *Src1,
711 CondARM32::Cond Pred = CondARM32::AL) {
712 constexpr bool SetFlags = true;
713 Context.insert<InstARM32Sbc>(Dest, Src0, Src1, Pred, SetFlags);
715 void _sdiv(Variable *Dest, Variable *Src0, Variable *Src1,
716 CondARM32::Cond Pred = CondARM32::AL) {
717 Context.insert<InstARM32Sdiv>(Dest, Src0, Src1, Pred);
719 /// _str, for all your Variable to memory transfers. Addr has the same
720 /// restrictions that it does in _ldr.
721 void _str(Variable *Value, OperandARM32Mem *Addr,
722 CondARM32::Cond Pred = CondARM32::AL) {
723 Context.insert<InstARM32Str>(Value, Addr, Pred);
725 void _strex(Variable *Dest, Variable *Value, OperandARM32Mem *Addr,
726 CondARM32::Cond Pred = CondARM32::AL) {
727 // strex requires Dest to be a register other than Value or Addr. This
728 // restriction is cleanly represented by adding an "early" definition of
729 // Dest (or a latter use of all the sources.)
730 Context.insert<InstFakeDef>(Dest);
731 if (auto *Value64 = llvm::dyn_cast<Variable64On32>(Value)) {
732 Context.insert<InstFakeUse>(Value64->getLo());
733 Context.insert<InstFakeUse>(Value64->getHi());
735 auto *Instr = Context.insert<InstARM32Strex>(Dest, Value, Addr, Pred);
736 Instr->setDestRedefined();
738 void _sub(Variable *Dest, Variable *Src0, Operand *Src1,
739 CondARM32::Cond Pred = CondARM32::AL) {
740 Context.insert<InstARM32Sub>(Dest, Src0, Src1, Pred);
742 void _subs(Variable *Dest, Variable *Src0, Operand *Src1,
743 CondARM32::Cond Pred = CondARM32::AL) {
744 constexpr bool SetFlags = true;
745 Context.insert<InstARM32Sub>(Dest, Src0, Src1, Pred, SetFlags);
747 void _sxt(Variable *Dest, Variable *Src0,
748 CondARM32::Cond Pred = CondARM32::AL) {
749 Context.insert<InstARM32Sxt>(Dest, Src0, Pred);
751 void _tst(Variable *Src0, Operand *Src1,
752 CondARM32::Cond Pred = CondARM32::AL) {
753 Context.insert<InstARM32Tst>(Src0, Src1, Pred);
755 void _trap() { Context.insert<InstARM32Trap>(); }
756 void _udiv(Variable *Dest, Variable *Src0, Variable *Src1,
757 CondARM32::Cond Pred = CondARM32::AL) {
758 Context.insert<InstARM32Udiv>(Dest, Src0, Src1, Pred);
760 void _umull(Variable *DestLo, Variable *DestHi, Variable *Src0,
761 Variable *Src1, CondARM32::Cond Pred = CondARM32::AL) {
762 Context.insert<InstARM32Umull>(DestLo, DestHi, Src0, Src1, Pred);
763 // Model the modification to the second dest as a fake def. Note that the
764 // def is not predicated.
765 Context.insert<InstFakeDef>(DestHi, DestLo);
767 void _uxt(Variable *Dest, Variable *Src0,
768 CondARM32::Cond Pred = CondARM32::AL) {
769 Context.insert<InstARM32Uxt>(Dest, Src0, Pred);
771 void _vabs(Variable *Dest, Variable *Src,
772 CondARM32::Cond Pred = CondARM32::AL) {
773 Context.insert<InstARM32Vabs>(Dest, Src, Pred);
775 void _vadd(Variable *Dest, Variable *Src0, Variable *Src1) {
776 Context.insert<InstARM32Vadd>(Dest, Src0, Src1);
778 void _vand(Variable *Dest, Variable *Src0, Variable *Src1) {
779 Context.insert<InstARM32Vand>(Dest, Src0, Src1);
781 void _vcvt(Variable *Dest, Variable *Src, InstARM32Vcvt::VcvtVariant Variant,
782 CondARM32::Cond Pred = CondARM32::AL) {
783 Context.insert<InstARM32Vcvt>(Dest, Src, Variant, Pred);
785 void _vdiv(Variable *Dest, Variable *Src0, Variable *Src1) {
786 Context.insert<InstARM32Vdiv>(Dest, Src0, Src1);
788 void _vcmp(Variable *Src0, Variable *Src1,
789 CondARM32::Cond Pred = CondARM32::AL) {
790 Context.insert<InstARM32Vcmp>(Src0, Src1, Pred);
792 void _vcmp(Variable *Src0, OperandARM32FlexFpZero *FpZero,
793 CondARM32::Cond Pred = CondARM32::AL) {
794 Context.insert<InstARM32Vcmp>(Src0, FpZero, Pred);
796 void _veor(Variable *Dest, Variable *Src0, Variable *Src1) {
797 Context.insert<InstARM32Veor>(Dest, Src0, Src1);
799 void _vmrs(CondARM32::Cond Pred = CondARM32::AL) {
800 Context.insert<InstARM32Vmrs>(Pred);
802 void _vmla(Variable *Dest, Variable *Src0, Variable *Src1) {
803 Context.insert<InstARM32Vmla>(Dest, Src0, Src1);
805 void _vmls(Variable *Dest, Variable *Src0, Variable *Src1) {
806 Context.insert<InstARM32Vmls>(Dest, Src0, Src1);
808 void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) {
809 Context.insert<InstARM32Vmul>(Dest, Src0, Src1);
811 void _vorr(Variable *Dest, Variable *Src0, Variable *Src1) {
812 Context.insert<InstARM32Vorr>(Dest, Src0, Src1);
814 void _vsqrt(Variable *Dest, Variable *Src,
815 CondARM32::Cond Pred = CondARM32::AL) {
816 Context.insert<InstARM32Vsqrt>(Dest, Src, Pred);
818 void _vsub(Variable *Dest, Variable *Src0, Variable *Src1) {
819 Context.insert<InstARM32Vsub>(Dest, Src0, Src1);
822 // Iterates over the CFG and determines the maximum outgoing stack arguments
823 // bytes. This information is later used during addProlog() to pre-allocate
825 // TODO(jpp): This could live in the Parser, if we provided a Target-specific
826 // method that the Parser could call.
827 void findMaxStackOutArgsSize();
829 /// Returns true if the given Offset can be represented in a Load/Store Mem
831 bool isLegalMemOffset(Type Ty, int32_t Offset) const;
833 void postLowerLegalization();
835 /// Sandboxer defines methods for ensuring that "dangerous" operations are
836 /// masked during sandboxed code emission. For regular, non-sandboxed code
837 /// emission, its methods are simple pass-through methods.
839 /// The Sandboxer also emits BundleLock/BundleUnlock pseudo-instructions
840 /// in the constructor/destructor during sandboxed code emission. Therefore,
841 /// it is a bad idea to create an object of this type and "keep it around."
842 /// The recommended usage is:
844 /// AutoSandboxing(this).<<operation>>(...);
846 /// This usage ensures that no other instructions are inadvertently added to
849 Sandboxer() = delete;
850 Sandboxer(const Sandboxer &) = delete;
851 Sandboxer &operator=(const Sandboxer &) = delete;
856 InstBundleLock::Option BundleOption = InstBundleLock::Opt_None);
861 /// add sp, sp, AddAmount
862 /// bic sp, sp, 0xc0000000
864 /// (for the rationale, see the ARM 32-bit Sandbox Specification.)
865 void add_sp(Operand *AddAmount);
867 /// Emits code to align sp to the specified alignment:
869 /// bic/and sp, sp, Alignment
870 /// bic, sp, sp, 0xc0000000
871 void align_sp(size_t Alignment);
873 /// Emits a call instruction. If CallTarget is a Variable, it emits
875 /// bic CallTarget, CallTarget, 0xc000000f
878 /// Otherwise, it emits
882 /// Note: in sandboxed code calls are always emitted in addresses 12 mod 16.
883 InstARM32Call *bl(Variable *ReturnReg, Operand *CallTarget);
887 /// bic rBase, rBase, 0xc0000000
888 /// ldr rDest, [rBase, #Offset]
890 /// Exception: if rBase is r9 or sp, then the load is emitted as:
892 /// ldr rDest, [rBase, #Offset]
894 /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are
896 void ldr(Variable *Dest, OperandARM32Mem *Mem, CondARM32::Cond Pred);
898 /// Emits a load exclusive:
900 /// bic rBase, rBase, 0xc0000000
901 /// ldrex rDest, [rBase]
903 /// Exception: if rBase is r9 or sp, then the load is emitted as:
905 /// ldrex rDest, [rBase]
907 /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are
909 void ldrex(Variable *Dest, OperandARM32Mem *Mem, CondARM32::Cond Pred);
911 /// Resets sp to Src:
914 /// bic sp, sp, 0xc0000000
915 void reset_sp(Variable *Src);
917 /// Emits code to return from a function:
919 /// bic lr, lr, 0xc000000f
921 void ret(Variable *RetAddr, Variable *RetValue);
925 /// bic rBase, rBase, 0xc0000000
926 /// str rSrc, [rBase, #Offset]
928 /// Exception: if rBase is r9 or sp, then the store is emitted as:
930 /// str rDest, [rBase, #Offset]
932 /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are
934 void str(Variable *Src, OperandARM32Mem *Mem, CondARM32::Cond Pred);
936 /// Emits a store exclusive:
938 /// bic rBase, rBase, 0xc0000000
939 /// strex rDest, rSrc, [rBase]
941 /// Exception: if rBase is r9 or sp, then the store is emitted as:
943 /// strex rDest, rSrc, [rBase]
945 /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are
947 void strex(Variable *Dest, Variable *Src, OperandARM32Mem *Mem,
948 CondARM32::Cond Pred);
952 /// sub sp, sp, SubAmount
953 /// bic sp, sp, 0xc0000000
954 void sub_sp(Operand *SubAmount);
961 class PostLoweringLegalizer {
962 PostLoweringLegalizer() = delete;
963 PostLoweringLegalizer(const PostLoweringLegalizer &) = delete;
964 PostLoweringLegalizer &operator=(const PostLoweringLegalizer &) = delete;
967 explicit PostLoweringLegalizer(TargetARM32 *Target)
968 : Target(Target), StackOrFrameReg(Target->getPhysicalRegister(
969 Target->getFrameOrStackReg())) {}
971 void resetTempBaseIfClobberedBy(const Inst *Instr);
973 // Ensures that the TempBase register held by the this legalizer (if any) is
975 void assertNoTempOrAssignedToIP() const {
976 assert(TempBaseReg == nullptr ||
977 TempBaseReg->getRegNum() == Target->getReservedTmpReg());
980 // Legalizes Mem. if Mem.Base is a Reamaterializable variable, Mem.Offset is
982 OperandARM32Mem *legalizeMemOperand(OperandARM32Mem *Mem,
983 bool AllowOffsets = true);
985 /// Legalizes Mov if its Source (or Destination) is a spilled Variable, or
986 /// if its Source is a Rematerializable variable (this form is used in lieu
987 /// of lea, which is not available in ARM.)
989 /// Moves to memory become store instructions, and moves from memory, loads.
990 void legalizeMov(InstARM32Mov *Mov);
993 /// Creates a new Base register centered around [Base, +/- Offset].
994 Variable *newBaseRegister(Variable *Base, int32_t Offset,
995 int32_t ScratchRegNum);
997 /// Creates a new, legal OperandARM32Mem for accessing Base + Offset.
998 /// The returned mem operand is a legal operand for accessing memory that is
1001 /// If [Base, #Offset] is encodable, then the method returns a Mem operand
1002 /// expressing it. Otherwise,
1004 /// if [TempBaseReg, #Offset-TempBaseOffset] is a valid memory operand, the
1005 /// method will return that. Otherwise,
1007 /// a new base register ip=Base+Offset is created, and the method returns a
1008 /// memory operand expressing [ip, #0].
1009 OperandARM32Mem *createMemOperand(Type Ty, Variable *Base, int32_t Offset,
1010 bool AllowOffsets = true);
1011 TargetARM32 *const Target;
1012 Variable *const StackOrFrameReg;
1013 Variable *TempBaseReg = nullptr;
1014 int32_t TempBaseOffset = 0;
1017 const bool NeedSandboxing;
1018 TargetARM32Features CPUFeatures;
1019 bool UsesFramePointer = false;
1020 bool NeedsStackAlignment = false;
1021 bool MaybeLeafFunc = true;
1022 size_t SpillAreaSizeBytes = 0;
1023 size_t FixedAllocaSizeBytes = 0;
1024 size_t FixedAllocaAlignBytes = 0;
1025 bool PrologEmitsFixedAllocas = false;
1026 uint32_t MaxOutArgsSizeBytes = 0;
1027 // TODO(jpp): std::array instead of array.
1028 static llvm::SmallBitVector TypeToRegisterSet[RegARM32::RCARM32_NUM];
1029 static llvm::SmallBitVector
1030 TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM];
1031 static llvm::SmallBitVector RegisterAliases[RegARM32::Reg_NUM];
1032 llvm::SmallBitVector RegsUsed;
1033 VarList PhysicalRegisters[IceType_NUM];
1034 VarList PreservedGPRs;
1035 VarList PreservedSRegs;
1037 /// Helper class that understands the Calling Convention and register
1038 /// assignments. The first few integer type parameters can use r0-r3,
1039 /// regardless of their position relative to the floating-point/vector
1040 /// arguments in the argument list. Floating-point and vector arguments
1041 /// can use q0-q3 (aka d0-d7, s0-s15). For more information on the topic,
1042 /// see the ARM Architecture Procedure Calling Standards (AAPCS).
1044 /// Technically, arguments that can start with registers but extend beyond the
1045 /// available registers can be split between the registers and the stack.
1046 /// However, this is typically for passing GPR structs by value, and PNaCl
1047 /// transforms expand this out.
1049 /// At (public) function entry, the stack must be 8-byte aligned.
1051 CallingConv(const CallingConv &) = delete;
1052 CallingConv &operator=(const CallingConv &) = delete;
1056 ~CallingConv() = default;
1058 /// argInGPR returns true if there is a GPR available for the requested
1059 /// type, and false otherwise. If it returns true, Reg is set to the
1060 /// appropriate register number. Note that, when Ty == IceType_i64, Reg will
1061 /// be an I64 register pair.
1062 bool argInGPR(Type Ty, int32_t *Reg);
1064 /// argInVFP is to floating-point/vector types what argInGPR is for integer
1066 bool argInVFP(Type Ty, int32_t *Reg);
1069 void discardUnavailableGPRsAndTheirAliases(CfgVector<SizeT> *Regs);
1070 llvm::SmallBitVector GPRegsUsed;
1071 CfgVector<SizeT> GPRArgs;
1072 CfgVector<SizeT> I64Args;
1074 void discardUnavailableVFPRegs(CfgVector<SizeT> *Regs);
1075 llvm::SmallBitVector VFPRegsUsed;
1076 CfgVector<SizeT> FP32Args;
1077 CfgVector<SizeT> FP64Args;
1078 CfgVector<SizeT> Vec128Args;
1084 OperandARM32Mem *formAddressingMode(Type Ty, Cfg *Func, const Inst *LdSt,
1087 void postambleCtpop64(const InstCall *Instr);
1088 void preambleDivRem(const InstCall *Instr);
1089 std::unordered_map<Operand *, void (TargetARM32::*)(const InstCall *Inst)>
1090 ARM32HelpersPreamble;
1091 std::unordered_map<Operand *, void (TargetARM32::*)(const InstCall *Inst)>
1092 ARM32HelpersPostamble;
1094 class ComputationTracker {
1096 ComputationTracker() = default;
1097 ~ComputationTracker() = default;
1099 void forgetProducers() { KnownComputations.clear(); }
1100 void recordProducers(CfgNode *Node);
1102 const Inst *getProducerOf(const Operand *Opnd) const {
1103 auto *Var = llvm::dyn_cast<Variable>(Opnd);
1104 if (Var == nullptr) {
1108 auto Iter = KnownComputations.find(Var->getIndex());
1109 if (Iter == KnownComputations.end()) {
1113 return Iter->second.Instr;
1116 void dump(const Cfg *Func) const {
1117 if (!BuildDefs::dump() || !Func->isVerbose(IceV_Folding))
1119 OstreamLocker L(Func->getContext());
1120 Ostream &Str = Func->getContext()->getStrDump();
1121 Str << "foldable producer:\n";
1122 for (const auto &Computation : KnownComputations) {
1124 Computation.second.Instr->dump(Func);
1131 class ComputationEntry {
1133 ComputationEntry(Inst *I, Type Ty) : Instr(I), ComputationType(Ty) {}
1135 // Boolean folding is disabled for variables whose live range is multi
1136 // block. We conservatively initialize IsLiveOut to true, and set it to
1137 // false once we find the end of the live range for the variable defined
1138 // by this instruction. If liveness analysis is not performed (e.g., in
1139 // Om1 mode) IsLiveOut will never be set to false, and folding will be
1141 bool IsLiveOut = true;
1142 int32_t NumUses = 0;
1143 Type ComputationType;
1146 // ComputationMap maps a Variable number to a payload identifying which
1147 // instruction defined it.
1148 using ComputationMap = std::unordered_map<SizeT, ComputationEntry>;
1149 ComputationMap KnownComputations;
1152 ComputationTracker Computations;
1154 // AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked
1155 // without specifying a physical register. This is needed for creating unbound
1156 // temporaries during Ice -> ARM lowering, but before register allocation.
1157 // This a safe-guard that no unbound temporaries are created during the
1158 // legalization post-passes.
1159 bool AllowTemporaryWithNoReg = true;
1160 // ForbidTemporaryWithoutReg is a RAII class that manages
1161 // AllowTemporaryWithNoReg.
1162 class ForbidTemporaryWithoutReg {
1163 ForbidTemporaryWithoutReg() = delete;
1164 ForbidTemporaryWithoutReg(const ForbidTemporaryWithoutReg &) = delete;
1165 ForbidTemporaryWithoutReg &
1166 operator=(const ForbidTemporaryWithoutReg &) = delete;
1169 explicit ForbidTemporaryWithoutReg(TargetARM32 *Target) : Target(Target) {
1170 Target->AllowTemporaryWithNoReg = false;
1172 ~ForbidTemporaryWithoutReg() { Target->AllowTemporaryWithNoReg = true; }
1175 TargetARM32 *const Target;
1179 class TargetDataARM32 final : public TargetDataLowering {
1180 TargetDataARM32() = delete;
1181 TargetDataARM32(const TargetDataARM32 &) = delete;
1182 TargetDataARM32 &operator=(const TargetDataARM32 &) = delete;
1185 static std::unique_ptr<TargetDataLowering> create(GlobalContext *Ctx) {
1186 return std::unique_ptr<TargetDataLowering>(new TargetDataARM32(Ctx));
1189 void lowerGlobals(const VariableDeclarationList &Vars,
1190 const IceString &SectionSuffix) override;
1191 void lowerConstants() override;
1192 void lowerJumpTables() override;
1195 explicit TargetDataARM32(GlobalContext *Ctx);
1198 ~TargetDataARM32() override = default;
1201 class TargetHeaderARM32 final : public TargetHeaderLowering {
1202 TargetHeaderARM32() = delete;
1203 TargetHeaderARM32(const TargetHeaderARM32 &) = delete;
1204 TargetHeaderARM32 &operator=(const TargetHeaderARM32 &) = delete;
1207 static std::unique_ptr<TargetHeaderLowering> create(GlobalContext *Ctx) {
1208 return std::unique_ptr<TargetHeaderLowering>(new TargetHeaderARM32(Ctx));
1211 void lower() override;
1214 explicit TargetHeaderARM32(GlobalContext *Ctx);
1217 ~TargetHeaderARM32() = default;
1219 TargetARM32Features CPUFeatures;
1222 } // end of namespace ARM32
1223 } // end of namespace Ice
1225 #endif // SUBZERO_SRC_ICETARGETLOWERINGARM32_H