1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
3 // The Subzero Code Generator
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// This file implements the TargetLoweringARM32 class, which consists almost
12 /// entirely of the lowering sequence for each high-level instruction.
14 //===----------------------------------------------------------------------===//
15 #include "IceTargetLoweringARM32.h"
18 #include "IceCfgNode.h"
19 #include "IceClFlags.h"
21 #include "IceELFObjectWriter.h"
22 #include "IceGlobalInits.h"
23 #include "IceInstARM32.def"
24 #include "IceInstARM32.h"
25 #include "IceLiveness.h"
26 #include "IceOperand.h"
27 #include "IcePhiLoweringImpl.h"
28 #include "IceRegistersARM32.h"
29 #include "IceTargetLoweringARM32.def"
31 #include "llvm/Support/MathExtras.h"
40 // The following table summarizes the logic for lowering the icmp instruction
41 // for i32 and narrower types. Each icmp condition has a clear mapping to an
42 // ARM32 conditional move instruction.
44 const struct TableIcmp32_ {
45 CondARM32::Cond Mapping;
47 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \
54 // The following table summarizes the logic for lowering the icmp instruction
55 // for the i64 type. Two conditional moves are needed for setting to 1 or 0.
56 // The operands may need to be swapped, and there is a slight difference for
57 // signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc).
58 const struct TableIcmp64_ {
61 CondARM32::Cond C1, C2;
63 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \
64 { is_signed, swapped64, CondARM32::C1_64, CondARM32::C2_64 } \
70 CondARM32::Cond getIcmp32Mapping(InstIcmp::ICond Cond) {
71 size_t Index = static_cast<size_t>(Cond);
72 assert(Index < llvm::array_lengthof(TableIcmp32));
73 return TableIcmp32[Index].Mapping;
76 // In some cases, there are x-macros tables for both high-level and low-level
77 // instructions/operands that use the same enum key value. The tables are kept
78 // separate to maintain a proper separation between abstraction layers. There
79 // is a risk that the tables could get out of sync if enum values are reordered
80 // or if entries are added or deleted. The following anonymous namespaces use
81 // static_asserts to ensure everything is kept in sync.
83 // Validate the enum values in ICMPARM32_TABLE.
85 // Define a temporary set of enum values based on low-level table entries.
87 #define X(val, signed, swapped64, C_32, C1_64, C2_64) _icmp_ll_##val,
92 // Define a set of constants based on high-level table entries.
93 #define X(tag, str) static constexpr int _icmp_hl_##tag = InstIcmp::tag;
96 // Define a set of constants based on low-level table entries, and ensure the
97 // table entry keys are consistent.
98 #define X(val, signed, swapped64, C_32, C1_64, C2_64) \
100 _icmp_ll_##val == _icmp_hl_##val, \
101 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE: " #val);
104 // Repeat the static asserts with respect to the high-level table entries in
105 // case the high-level table has extra entries.
106 #define X(tag, str) \
108 _icmp_hl_##tag == _icmp_ll_##tag, \
109 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE: " #tag);
112 } // end of anonymous namespace
115 const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16;
117 // Value is in bytes. Return Value adjusted to the next highest multiple of the
119 uint32_t applyStackAlignment(uint32_t Value) {
120 return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES);
123 // Value is in bytes. Return Value adjusted to the next highest multiple of the
124 // stack alignment required for the given type.
125 uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {
126 // Use natural alignment, except that normally (non-NaCl) ARM only aligns
127 // vectors to 8 bytes.
128 // TODO(jvoung): Check this ...
129 size_t typeAlignInBytes = typeWidthInBytes(Ty);
130 if (isVectorType(Ty))
131 typeAlignInBytes = 8;
132 return Utils::applyAlignment(Value, typeAlignInBytes);
135 // Conservatively check if at compile time we know that the operand is
136 // definitely a non-zero integer.
137 bool isGuaranteedNonzeroInt(const Operand *Op) {
138 if (auto *Const = llvm::dyn_cast_or_null<ConstantInteger32>(Op)) {
139 return Const->getValue() != 0;
144 } // end of anonymous namespace
146 TargetARM32Features::TargetARM32Features(const ClFlags &Flags) {
148 (ARM32InstructionSet::End - ARM32InstructionSet::Begin) ==
149 (TargetInstructionSet::ARM32InstructionSet_End -
150 TargetInstructionSet::ARM32InstructionSet_Begin),
151 "ARM32InstructionSet range different from TargetInstructionSet");
152 if (Flags.getTargetInstructionSet() !=
153 TargetInstructionSet::BaseInstructionSet) {
154 InstructionSet = static_cast<ARM32InstructionSet>(
155 (Flags.getTargetInstructionSet() -
156 TargetInstructionSet::ARM32InstructionSet_Begin) +
157 ARM32InstructionSet::Begin);
161 TargetARM32::TargetARM32(Cfg *Func)
162 : TargetLowering(Func), CPUFeatures(Func->getContext()->getFlags()) {
163 // TODO: Don't initialize IntegerRegisters and friends every time. Instead,
164 // initialize in some sort of static initializer for the class.
165 // Limit this size (or do all bitsets need to be the same width)???
166 llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM);
167 llvm::SmallBitVector I64PairRegisters(RegARM32::Reg_NUM);
168 llvm::SmallBitVector Float32Registers(RegARM32::Reg_NUM);
169 llvm::SmallBitVector Float64Registers(RegARM32::Reg_NUM);
170 llvm::SmallBitVector VectorRegisters(RegARM32::Reg_NUM);
171 llvm::SmallBitVector InvalidRegisters(RegARM32::Reg_NUM);
172 ScratchRegs.resize(RegARM32::Reg_NUM);
173 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
174 isI64Pair, isFP32, isFP64, isVec128, alias_init) \
175 IntegerRegisters[RegARM32::val] = isInt; \
176 I64PairRegisters[RegARM32::val] = isI64Pair; \
177 Float32Registers[RegARM32::val] = isFP32; \
178 Float64Registers[RegARM32::val] = isFP64; \
179 VectorRegisters[RegARM32::val] = isVec128; \
180 RegisterAliases[RegARM32::val].resize(RegARM32::Reg_NUM); \
181 for (SizeT RegAlias : alias_init) { \
182 assert(!RegisterAliases[RegARM32::val][RegAlias] && \
183 "Duplicate alias for " #val); \
184 RegisterAliases[RegARM32::val].set(RegAlias); \
186 assert(RegisterAliases[RegARM32::val][RegARM32::val]); \
187 ScratchRegs[RegARM32::val] = scratch;
190 TypeToRegisterSet[IceType_void] = InvalidRegisters;
191 TypeToRegisterSet[IceType_i1] = IntegerRegisters;
192 TypeToRegisterSet[IceType_i8] = IntegerRegisters;
193 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
194 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
195 TypeToRegisterSet[IceType_i64] = I64PairRegisters;
196 TypeToRegisterSet[IceType_f32] = Float32Registers;
197 TypeToRegisterSet[IceType_f64] = Float64Registers;
198 TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
199 TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
200 TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
201 TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
202 TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
203 TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
204 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
208 void copyRegAllocFromInfWeightVariable64On32(const VarList &Vars) {
209 for (Variable *Var : Vars) {
210 auto *Var64 = llvm::dyn_cast<Variable64On32>(Var);
212 // This is not the variable we are looking for.
215 assert(Var64->hasReg() || !Var64->mustHaveReg());
216 if (!Var64->hasReg()) {
219 SizeT FirstReg = RegARM32::getI64PairFirstGPRNum(Var->getRegNum());
220 // This assumes little endian.
221 Variable *Lo = Var64->getLo();
222 Variable *Hi = Var64->getHi();
223 assert(Lo->hasReg() == Hi->hasReg());
227 Lo->setRegNum(FirstReg);
228 Lo->setMustHaveReg();
229 Hi->setRegNum(FirstReg + 1);
230 Hi->setMustHaveReg();
233 } // end of anonymous namespace
235 void TargetARM32::translateO2() {
236 TimerMarker T(TimerStack::TT_O2, Func);
238 // TODO(stichnot): share passes with X86?
239 // https://code.google.com/p/nativeclient/issues/detail?id=4094
241 if (!Ctx->getFlags().getPhiEdgeSplit()) {
242 // Lower Phi instructions.
243 Func->placePhiLoads();
244 if (Func->hasError())
246 Func->placePhiStores();
247 if (Func->hasError())
250 if (Func->hasError())
252 Func->dump("After Phi lowering");
255 // Address mode optimization.
256 Func->getVMetadata()->init(VMK_SingleDefs);
257 Func->doAddressOpt();
260 Func->doArgLowering();
262 // Target lowering. This requires liveness analysis for some parts of the
263 // lowering decisions, such as compare/branch fusing. If non-lightweight
264 // liveness analysis is used, the instructions need to be renumbered first.
265 // TODO: This renumbering should only be necessary if we're actually
266 // calculating live intervals, which we only do for register allocation.
267 Func->renumberInstructions();
268 if (Func->hasError())
271 // TODO: It should be sufficient to use the fastest liveness calculation,
272 // i.e. livenessLightweight(). However, for some reason that slows down the
273 // rest of the translation. Investigate.
274 Func->liveness(Liveness_Basic);
275 if (Func->hasError())
277 Func->dump("After ARM32 address mode opt");
280 if (Func->hasError())
282 Func->dump("After ARM32 codegen");
284 // Register allocation. This requires instruction renumbering and full
285 // liveness analysis.
286 Func->renumberInstructions();
287 if (Func->hasError())
289 Func->liveness(Liveness_Intervals);
290 if (Func->hasError())
292 // Validate the live range computations. The expensive validation call is
293 // deliberately only made when assertions are enabled.
294 assert(Func->validateLiveness());
295 // The post-codegen dump is done here, after liveness analysis and associated
296 // cleanup, to make the dump cleaner and more useful.
297 Func->dump("After initial ARM32 codegen");
298 Func->getVMetadata()->init(VMK_All);
299 regAlloc(RAK_Global);
300 if (Func->hasError())
302 copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
303 Func->dump("After linear scan regalloc");
305 if (Ctx->getFlags().getPhiEdgeSplit()) {
306 Func->advancedPhiLowering();
307 Func->dump("After advanced Phi lowering");
310 // Stack frame mapping.
312 if (Func->hasError())
314 Func->dump("After stack frame mapping");
316 legalizeStackSlots();
317 if (Func->hasError())
319 Func->dump("After legalizeStackSlots");
321 Func->contractEmptyNodes();
322 Func->reorderNodes();
324 // Branch optimization. This needs to be done just before code emission. In
325 // particular, no transformations that insert or reorder CfgNodes should be
326 // done after branch optimization. We go ahead and do it before nop insertion
327 // to reduce the amount of work needed for searching for opportunities.
329 Func->dump("After branch optimization");
332 if (Ctx->getFlags().shouldDoNopInsertion()) {
333 Func->doNopInsertion();
337 void TargetARM32::translateOm1() {
338 TimerMarker T(TimerStack::TT_Om1, Func);
340 // TODO: share passes with X86?
342 Func->placePhiLoads();
343 if (Func->hasError())
345 Func->placePhiStores();
346 if (Func->hasError())
349 if (Func->hasError())
351 Func->dump("After Phi lowering");
353 Func->doArgLowering();
356 if (Func->hasError())
358 Func->dump("After initial ARM32 codegen");
360 regAlloc(RAK_InfOnly);
361 if (Func->hasError())
363 copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
364 Func->dump("After regalloc of infinite-weight variables");
367 if (Func->hasError())
369 Func->dump("After stack frame mapping");
371 legalizeStackSlots();
372 if (Func->hasError())
374 Func->dump("After legalizeStackSlots");
377 if (Ctx->getFlags().shouldDoNopInsertion()) {
378 Func->doNopInsertion();
382 bool TargetARM32::doBranchOpt(Inst *I, const CfgNode *NextNode) {
383 if (InstARM32Br *Br = llvm::dyn_cast<InstARM32Br>(I)) {
384 return Br->optimizeBranch(NextNode);
389 IceString TargetARM32::getRegName(SizeT RegNum, Type Ty) const {
390 assert(RegNum < RegARM32::Reg_NUM);
392 static const char *RegNames[] = {
393 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
394 isI64Pair, isFP32, isFP64, isVec128, alias_init) \
400 return RegNames[RegNum];
403 Variable *TargetARM32::getPhysicalRegister(SizeT RegNum, Type Ty) {
404 static const Type DefaultType[] = {
405 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
406 isI64Pair, isFP32, isFP64, isVec128, alias_init) \
409 : ((isFP64) ? IceType_f64 : ((isVec128 ? IceType_v4i32 : IceType_i32))),
414 assert(RegNum < RegARM32::Reg_NUM);
415 if (Ty == IceType_void) {
416 assert(RegNum < llvm::array_lengthof(DefaultType));
417 Ty = DefaultType[RegNum];
419 if (PhysicalRegisters[Ty].empty())
420 PhysicalRegisters[Ty].resize(RegARM32::Reg_NUM);
421 assert(RegNum < PhysicalRegisters[Ty].size());
422 Variable *Reg = PhysicalRegisters[Ty][RegNum];
423 if (Reg == nullptr) {
424 Reg = Func->makeVariable(Ty);
425 Reg->setRegNum(RegNum);
426 PhysicalRegisters[Ty][RegNum] = Reg;
427 // Specially mark a named physical register as an "argument" so that it is
428 // considered live upon function entry. Otherwise it's possible to get
429 // liveness validation errors for saving callee-save registers.
430 Func->addImplicitArg(Reg);
431 // Don't bother tracking the live range of a named physical register.
432 Reg->setIgnoreLiveness();
437 void TargetARM32::emitJumpTable(const Cfg *Func,
438 const InstJumpTable *JumpTable) const {
440 UnimplementedError(Func->getContext()->getFlags());
443 void TargetARM32::emitVariable(const Variable *Var) const {
444 if (!BuildDefs::dump())
446 Ostream &Str = Ctx->getStrEmit();
448 Str << getRegName(Var->getRegNum(), Var->getType());
451 if (Var->mustHaveReg()) {
452 llvm::report_fatal_error(
453 "Infinite-weight Variable has no register assigned");
455 int32_t Offset = Var->getStackOffset();
456 int32_t BaseRegNum = Var->getBaseRegNum();
457 if (BaseRegNum == Variable::NoRegister) {
458 BaseRegNum = getFrameOrStackReg();
459 if (!hasFramePointer())
460 Offset += getStackAdjustment();
462 const Type VarTy = Var->getType();
463 if (!isLegalVariableStackOffset(VarTy, Offset)) {
464 llvm::report_fatal_error("Illegal stack offset");
466 Str << "[" << getRegName(BaseRegNum, VarTy);
468 Str << ", " << getConstantPrefix() << Offset;
473 bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) {
474 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
476 int32_t RegLo, RegHi;
477 // Always start i64 registers at an even register, so this may end up padding
479 NumGPRRegsUsed = Utils::applyAlignment(NumGPRRegsUsed, 2);
480 RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed;
482 RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed;
484 // If this bumps us past the boundary, don't allocate to a register and leave
485 // any previously speculatively consumed registers as consumed.
486 if (NumGPRRegsUsed > ARM32_MAX_GPR_ARG)
489 Regs->second = RegHi;
493 bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) {
494 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
496 *Reg = RegARM32::Reg_r0 + NumGPRRegsUsed;
501 bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) {
502 if (!VFPRegsFree.any()) {
506 if (isVectorType(Ty)) {
507 // Q registers are declared in reverse order, so RegARM32::Reg_q0 >
508 // RegARM32::Reg_q1. Therefore, we need to subtract QRegStart from Reg_q0.
509 // Same thing goes for D registers.
510 static_assert(RegARM32::Reg_q0 > RegARM32::Reg_q1,
511 "ARM32 Q registers are possibly declared incorrectly.");
513 int32_t QRegStart = (VFPRegsFree & ValidV128Regs).find_first();
514 if (QRegStart >= 0) {
515 VFPRegsFree.reset(QRegStart, QRegStart + 4);
516 *Reg = RegARM32::Reg_q0 - (QRegStart / 4);
519 } else if (Ty == IceType_f64) {
520 static_assert(RegARM32::Reg_d0 > RegARM32::Reg_d1,
521 "ARM32 D registers are possibly declared incorrectly.");
523 int32_t DRegStart = (VFPRegsFree & ValidF64Regs).find_first();
524 if (DRegStart >= 0) {
525 VFPRegsFree.reset(DRegStart, DRegStart + 2);
526 *Reg = RegARM32::Reg_d0 - (DRegStart / 2);
530 static_assert(RegARM32::Reg_s0 < RegARM32::Reg_s1,
531 "ARM32 S registers are possibly declared incorrectly.");
533 assert(Ty == IceType_f32);
534 int32_t SReg = VFPRegsFree.find_first();
536 VFPRegsFree.reset(SReg);
537 *Reg = RegARM32::Reg_s0 + SReg;
541 // Parameter allocation failed. From now on, every fp register must be placed
542 // on the stack. We clear VFRegsFree in case there are any "holes" from S and
548 void TargetARM32::lowerArguments() {
549 VarList &Args = Func->getArgs();
550 TargetARM32::CallingConv CC;
552 // For each register argument, replace Arg in the argument list with the home
553 // register. Then generate an instruction in the prolog to copy the home
554 // register to the assigned location of Arg.
555 Context.init(Func->getEntryNode());
556 Context.setInsertPoint(Context.getCur());
558 for (SizeT I = 0, E = Args.size(); I < E; ++I) {
559 Variable *Arg = Args[I];
560 Type Ty = Arg->getType();
561 if (Ty == IceType_i64) {
562 std::pair<int32_t, int32_t> RegPair;
563 if (!CC.I64InRegs(&RegPair))
565 Variable *RegisterArg = Func->makeVariable(Ty);
566 auto *RegisterArg64On32 = llvm::cast<Variable64On32>(RegisterArg);
567 if (BuildDefs::dump())
568 RegisterArg64On32->setName(Func, "home_reg:" + Arg->getName(Func));
569 RegisterArg64On32->initHiLo(Func);
570 RegisterArg64On32->setIsArg();
571 RegisterArg64On32->getLo()->setRegNum(RegPair.first);
572 RegisterArg64On32->getHi()->setRegNum(RegPair.second);
573 Arg->setIsArg(false);
575 Args[I] = RegisterArg64On32;
576 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
580 if (isVectorType(Ty) || isFloatingType(Ty)) {
581 if (!CC.FPInReg(Ty, &RegNum))
584 assert(Ty == IceType_i32);
585 if (!CC.I32InReg(&RegNum))
588 Variable *RegisterArg = Func->makeVariable(Ty);
589 if (BuildDefs::dump()) {
590 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
592 RegisterArg->setRegNum(RegNum);
593 RegisterArg->setIsArg();
594 Arg->setIsArg(false);
596 Args[I] = RegisterArg;
597 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
603 // Helper function for addProlog().
605 // This assumes Arg is an argument passed on the stack. This sets the frame
606 // offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
607 // I64 arg that has been split into Lo and Hi components, it calls itself
608 // recursively on the components, taking care to handle Lo first because of the
609 // little-endian architecture. Lastly, this function generates an instruction
610 // to copy Arg into its assigned register if applicable.
611 void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
612 size_t BasicFrameOffset,
613 size_t &InArgsSizeBytes) {
614 if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) {
615 Variable *Lo = Arg64On32->getLo();
616 Variable *Hi = Arg64On32->getHi();
617 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
618 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
621 Type Ty = Arg->getType();
622 InArgsSizeBytes = applyStackAlignmentTy(InArgsSizeBytes, Ty);
623 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
624 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
625 // If the argument variable has been assigned a register, we need to load the
626 // value from the stack slot.
628 assert(Ty != IceType_i64);
629 // This should be simple, just load the parameter off the stack using a nice
630 // sp + imm addressing mode. Because ARM, we can't do that (e.g., VLDR, for
631 // fp types, cannot have an index register), so we legalize the memory
633 auto *Mem = OperandARM32Mem::create(
634 Func, Ty, FramePtr, llvm::cast<ConstantInteger32>(
635 Ctx->getConstantInt32(Arg->getStackOffset())));
636 _mov(Arg, legalizeToReg(Mem, Arg->getRegNum()));
637 // This argument-copying instruction uses an explicit OperandARM32Mem
638 // operand instead of a Variable, so its fill-from-stack operation has to
639 // be tracked separately for statistics.
640 Ctx->statsUpdateFills();
644 Type TargetARM32::stackSlotType() { return IceType_i32; }
646 void TargetARM32::addProlog(CfgNode *Node) {
647 // Stack frame layout:
649 // +------------------------+
650 // | 1. preserved registers |
651 // +------------------------+
653 // +------------------------+ <--- FramePointer (if used)
654 // | 3. global spill area |
655 // +------------------------+
657 // +------------------------+
658 // | 5. local spill area |
659 // +------------------------+
661 // +------------------------+
663 // +------------------------+ <--- StackPointer
665 // The following variables record the size in bytes of the given areas:
666 // * PreservedRegsSizeBytes: area 1
667 // * SpillAreaPaddingBytes: area 2
668 // * GlobalsSize: area 3
669 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4
670 // * LocalsSpillAreaSize: area 5
671 // * SpillAreaSizeBytes: areas 2 - 6
672 // Determine stack frame offsets for each Variable without a register
673 // assignment. This can be done as one variable per stack slot. Or, do
674 // coalescing by running the register allocator again with an infinite set of
675 // registers (as a side effect, this gives variables a second chance at
676 // physical register assignment).
678 // A middle ground approach is to leverage sparsity and allocate one block of
679 // space on the frame for globals (variables with multi-block lifetime), and
680 // one block to share for locals (single-block lifetime).
683 Context.setInsertPoint(Context.getCur());
685 llvm::SmallBitVector CalleeSaves =
686 getRegisterSet(RegSet_CalleeSave, RegSet_None);
687 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
688 VarList SortedSpilledVariables;
689 size_t GlobalsSize = 0;
690 // If there is a separate locals area, this represents that area. Otherwise
691 // it counts any variable not counted by GlobalsSize.
692 SpillAreaSizeBytes = 0;
693 // If there is a separate locals area, this specifies the alignment for it.
694 uint32_t LocalsSlotsAlignmentBytes = 0;
695 // The entire spill locations area gets aligned to largest natural alignment
696 // of the variables that have a spill slot.
697 uint32_t SpillAreaAlignmentBytes = 0;
698 // For now, we don't have target-specific variables that need special
699 // treatment (no stack-slot-linked SpillVariable type).
700 std::function<bool(Variable *)> TargetVarHook = [](Variable *Var) {
701 static constexpr bool AssignStackSlot = false;
702 static constexpr bool DontAssignStackSlot = !AssignStackSlot;
703 if (llvm::isa<Variable64On32>(Var)) {
704 return DontAssignStackSlot;
706 return AssignStackSlot;
709 // Compute the list of spilled variables and bounds for GlobalsSize, etc.
710 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
711 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
712 &LocalsSlotsAlignmentBytes, TargetVarHook);
713 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
714 SpillAreaSizeBytes += GlobalsSize;
716 // Add push instructions for preserved registers. On ARM, "push" can push a
717 // whole list of GPRs via a bitmask (0-15). Unlike x86, ARM also has
718 // callee-saved float/vector registers. The "vpush" instruction can handle a
719 // whole list of float/vector registers, but it only handles contiguous
720 // sequences of registers by specifying the start and the length.
721 VarList GPRsToPreserve;
722 GPRsToPreserve.reserve(CalleeSaves.size());
723 uint32_t NumCallee = 0;
724 size_t PreservedRegsSizeBytes = 0;
725 // Consider FP and LR as callee-save / used as needed.
726 if (UsesFramePointer) {
727 CalleeSaves[RegARM32::Reg_fp] = true;
728 assert(RegsUsed[RegARM32::Reg_fp] == false);
729 RegsUsed[RegARM32::Reg_fp] = true;
731 if (!MaybeLeafFunc) {
732 CalleeSaves[RegARM32::Reg_lr] = true;
733 RegsUsed[RegARM32::Reg_lr] = true;
735 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
736 if (RegARM32::isI64RegisterPair(i)) {
737 // We don't save register pairs explicitly. Instead, we rely on the code
738 // fake-defing/fake-using each register in the pair.
741 if (CalleeSaves[i] && RegsUsed[i]) {
742 // TODO(jvoung): do separate vpush for each floating point register
743 // segment and += 4, or 8 depending on type.
745 Variable *PhysicalRegister = getPhysicalRegister(i);
746 PreservedRegsSizeBytes +=
747 typeWidthInBytesOnStack(PhysicalRegister->getType());
748 GPRsToPreserve.push_back(getPhysicalRegister(i));
751 Ctx->statsUpdateRegistersSaved(NumCallee);
752 if (!GPRsToPreserve.empty())
753 _push(GPRsToPreserve);
755 // Generate "mov FP, SP" if needed.
756 if (UsesFramePointer) {
757 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
758 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
760 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
761 Context.insert(InstFakeUse::create(Func, FP));
764 // Align the variables area. SpillAreaPaddingBytes is the size of the region
765 // after the preserved registers and before the spill areas.
766 // LocalsSlotsPaddingBytes is the amount of padding between the globals and
767 // locals area if they are separate.
768 assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES);
769 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
770 uint32_t SpillAreaPaddingBytes = 0;
771 uint32_t LocalsSlotsPaddingBytes = 0;
772 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
773 GlobalsSize, LocalsSlotsAlignmentBytes,
774 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
775 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
776 uint32_t GlobalsAndSubsequentPaddingSize =
777 GlobalsSize + LocalsSlotsPaddingBytes;
779 // Align SP if necessary.
780 if (NeedsStackAlignment) {
781 uint32_t StackOffset = PreservedRegsSizeBytes;
782 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
783 SpillAreaSizeBytes = StackSize - StackOffset;
786 // Generate "sub sp, SpillAreaSizeBytes"
787 if (SpillAreaSizeBytes) {
788 // Use the scratch register if needed to legalize the immediate.
789 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
790 Legal_Reg | Legal_Flex, getReservedTmpReg());
791 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
792 _sub(SP, SP, SubAmount);
794 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
796 resetStackAdjustment();
798 // Fill in stack offsets for stack args, and copy args into registers for
799 // those that were register-allocated. Args are pushed right to left, so
800 // Arg[0] is closest to the stack/frame pointer.
801 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
802 size_t BasicFrameOffset = PreservedRegsSizeBytes;
803 if (!UsesFramePointer)
804 BasicFrameOffset += SpillAreaSizeBytes;
806 const VarList &Args = Func->getArgs();
807 size_t InArgsSizeBytes = 0;
808 TargetARM32::CallingConv CC;
809 for (Variable *Arg : Args) {
810 Type Ty = Arg->getType();
812 // Skip arguments passed in registers.
813 if (isVectorType(Ty) || isFloatingType(Ty)) {
815 InRegs = CC.FPInReg(Ty, &DummyReg);
816 } else if (Ty == IceType_i64) {
817 std::pair<int32_t, int32_t> DummyRegs;
818 InRegs = CC.I64InRegs(&DummyRegs);
820 assert(Ty == IceType_i32);
822 InRegs = CC.I32InReg(&DummyReg);
825 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
828 // Fill in stack offsets for locals.
829 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
830 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
832 this->HasComputedFrame = true;
834 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
835 OstreamLocker L(Func->getContext());
836 Ostream &Str = Func->getContext()->getStrDump();
838 Str << "Stack layout:\n";
839 uint32_t SPAdjustmentPaddingSize =
840 SpillAreaSizeBytes - LocalsSpillAreaSize -
841 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;
842 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
843 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
844 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
845 << " globals spill area = " << GlobalsSize << " bytes\n"
846 << " globals-locals spill areas intermediate padding = "
847 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
848 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
849 << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";
851 Str << "Stack details:\n"
852 << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"
853 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
854 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
856 << " is FP based = " << UsesFramePointer << "\n";
860 void TargetARM32::addEpilog(CfgNode *Node) {
861 InstList &Insts = Node->getInsts();
862 InstList::reverse_iterator RI, E;
863 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
864 if (llvm::isa<InstARM32Ret>(*RI))
870 // Convert the reverse_iterator position into its corresponding (forward)
871 // iterator position.
872 InstList::iterator InsertPoint = RI.base();
875 Context.setInsertPoint(InsertPoint);
877 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
878 if (UsesFramePointer) {
879 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
880 // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
881 // use of SP before the assignment of SP=FP keeps previous SP adjustments
882 // from being dead-code eliminated.
883 Context.insert(InstFakeUse::create(Func, SP));
886 // add SP, SpillAreaSizeBytes
887 if (SpillAreaSizeBytes) {
888 // Use the scratch register if needed to legalize the immediate.
890 legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
891 Legal_Reg | Legal_Flex, getReservedTmpReg());
892 _add(SP, SP, AddAmount);
896 // Add pop instructions for preserved registers.
897 llvm::SmallBitVector CalleeSaves =
898 getRegisterSet(RegSet_CalleeSave, RegSet_None);
899 VarList GPRsToRestore;
900 GPRsToRestore.reserve(CalleeSaves.size());
901 // Consider FP and LR as callee-save / used as needed.
902 if (UsesFramePointer) {
903 CalleeSaves[RegARM32::Reg_fp] = true;
905 if (!MaybeLeafFunc) {
906 CalleeSaves[RegARM32::Reg_lr] = true;
908 // Pop registers in ascending order just like push (instead of in reverse
910 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
911 if (RegARM32::isI64RegisterPair(i)) {
915 if (CalleeSaves[i] && RegsUsed[i]) {
916 GPRsToRestore.push_back(getPhysicalRegister(i));
919 if (!GPRsToRestore.empty())
922 if (!Ctx->getFlags().getUseSandboxing())
925 // Change the original ret instruction into a sandboxed return sequence.
927 // bic lr, #0xc000000f
930 // This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to
931 // restrict to the lower 1GB as well.
933 legalize(Ctx->getConstantInt32(0xc000000f), Legal_Reg | Legal_Flex);
934 Variable *LR = makeReg(IceType_i32, RegARM32::Reg_lr);
935 Variable *RetValue = nullptr;
936 if (RI->getSrcSize())
937 RetValue = llvm::cast<Variable>(RI->getSrc(0));
939 _bic(LR, LR, RetMask);
945 bool TargetARM32::isLegalVariableStackOffset(Type Ty, int32_t Offset) const {
946 constexpr bool SignExt = false;
947 return OperandARM32Mem::canHoldOffset(Ty, SignExt, Offset);
950 StackVariable *TargetARM32::legalizeVariableSlot(Variable *Var,
952 Variable *OrigBaseReg) {
953 int32_t Offset = Var->getStackOffset() + StackAdjust;
954 // Legalize will likely need a movw/movt combination, but if the top bits are
955 // all 0 from negating the offset and subtracting, we could use that instead.
956 bool ShouldSub = (-Offset & 0xFFFF0000) == 0;
959 Operand *OffsetVal = legalize(Ctx->getConstantInt32(Offset),
960 Legal_Reg | Legal_Flex, getReservedTmpReg());
961 Variable *ScratchReg = makeReg(IceType_i32, getReservedTmpReg());
963 _sub(ScratchReg, OrigBaseReg, OffsetVal);
965 _add(ScratchReg, OrigBaseReg, OffsetVal);
966 StackVariable *NewVar = Func->makeVariable<StackVariable>(stackSlotType());
967 NewVar->setMustNotHaveReg();
968 NewVar->setBaseRegNum(ScratchReg->getRegNum());
969 constexpr int32_t NewOffset = 0;
970 NewVar->setStackOffset(NewOffset);
974 void TargetARM32::legalizeStackSlots() {
975 // If a stack variable's frame offset doesn't fit, convert from:
978 // movw/movt TMP, OFF_PART
980 // ldr X, OFF_MORE[TMP]
982 // This is safe because we have reserved TMP, and add for ARM does not
983 // clobber the flags register.
984 Func->dump("Before legalizeStackSlots");
985 assert(hasComputedFrame());
986 // Early exit, if SpillAreaSizeBytes is really small.
987 // TODO(jpp): this is not safe -- loads and stores of q registers can't have
989 if (isLegalVariableStackOffset(IceType_v4i32, SpillAreaSizeBytes))
991 Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg());
992 int32_t StackAdjust = 0;
993 // Do a fairly naive greedy clustering for now. Pick the first stack slot
994 // that's out of bounds and make a new base reg using the architecture's temp
995 // register. If that works for the next slot, then great. Otherwise, create a
996 // new base register, clobbering the previous base register. Never share a
997 // base reg across different basic blocks. This isn't ideal if local and
998 // multi-block variables are far apart and their references are interspersed.
999 // It may help to be more coordinated about assign stack slot numbers and may
1000 // help to assign smaller offsets to higher-weight variables so that they
1001 // don't depend on this legalization.
1002 for (CfgNode *Node : Func->getNodes()) {
1004 StackVariable *NewBaseReg = nullptr;
1005 int32_t NewBaseOffset = 0;
1006 while (!Context.atEnd()) {
1007 PostIncrLoweringContext PostIncrement(Context);
1008 Inst *CurInstr = Context.getCur();
1009 Variable *Dest = CurInstr->getDest();
1010 // Check if the previous NewBaseReg is clobbered, and reset if needed.
1011 if ((Dest && NewBaseReg && Dest->hasReg() &&
1012 Dest->getRegNum() == NewBaseReg->getBaseRegNum()) ||
1013 llvm::isa<InstFakeKill>(CurInstr)) {
1014 NewBaseReg = nullptr;
1017 // The stack adjustment only matters if we are using SP instead of FP.
1018 if (!hasFramePointer()) {
1019 if (auto *AdjInst = llvm::dyn_cast<InstARM32AdjustStack>(CurInstr)) {
1020 StackAdjust += AdjInst->getAmount();
1021 NewBaseOffset += AdjInst->getAmount();
1024 if (llvm::isa<InstARM32Call>(CurInstr)) {
1025 NewBaseOffset -= StackAdjust;
1031 // For now, only Mov instructions can have stack variables. We need to
1032 // know the type of instruction because we currently create a fresh one
1033 // to replace Dest/Source, rather than mutate in place.
1034 bool MayNeedOffsetRewrite = false;
1035 if (auto *MovInstr = llvm::dyn_cast<InstARM32Mov>(CurInstr)) {
1036 MayNeedOffsetRewrite =
1037 !MovInstr->isMultiDest() && !MovInstr->isMultiSource();
1040 if (!MayNeedOffsetRewrite) {
1044 assert(Dest != nullptr);
1045 Type DestTy = Dest->getType();
1046 assert(DestTy != IceType_i64);
1047 if (!Dest->hasReg()) {
1048 int32_t Offset = Dest->getStackOffset();
1049 Offset += StackAdjust;
1050 if (!isLegalVariableStackOffset(DestTy, Offset)) {
1052 int32_t OffsetDiff = Offset - NewBaseOffset;
1053 if (isLegalVariableStackOffset(DestTy, OffsetDiff)) {
1054 StackVariable *NewDest =
1055 Func->makeVariable<StackVariable>(stackSlotType());
1056 NewDest->setMustNotHaveReg();
1057 NewDest->setBaseRegNum(NewBaseReg->getBaseRegNum());
1058 NewDest->setStackOffset(OffsetDiff);
1059 Variable *NewDestVar = NewDest;
1060 _mov(NewDestVar, CurInstr->getSrc(0));
1061 CurInstr->setDeleted();
1065 StackVariable *LegalDest =
1066 legalizeVariableSlot(Dest, StackAdjust, OrigBaseReg);
1067 assert(LegalDest != Dest);
1068 Variable *LegalDestVar = LegalDest;
1069 _mov(LegalDestVar, CurInstr->getSrc(0));
1070 CurInstr->setDeleted();
1071 NewBaseReg = LegalDest;
1072 NewBaseOffset = Offset;
1076 assert(CurInstr->getSrcSize() == 1);
1077 Variable *Var = llvm::dyn_cast<Variable>(CurInstr->getSrc(0));
1078 if (Var && !Var->hasReg()) {
1079 Type VarTy = Var->getType();
1080 int32_t Offset = Var->getStackOffset();
1081 Offset += StackAdjust;
1082 if (!isLegalVariableStackOffset(VarTy, Offset)) {
1084 int32_t OffsetDiff = Offset - NewBaseOffset;
1085 if (isLegalVariableStackOffset(VarTy, OffsetDiff)) {
1086 StackVariable *NewVar =
1087 Func->makeVariable<StackVariable>(stackSlotType());
1088 NewVar->setMustNotHaveReg();
1089 NewVar->setBaseRegNum(NewBaseReg->getBaseRegNum());
1090 NewVar->setStackOffset(OffsetDiff);
1092 CurInstr->setDeleted();
1096 StackVariable *LegalVar =
1097 legalizeVariableSlot(Var, StackAdjust, OrigBaseReg);
1098 assert(LegalVar != Var);
1099 _mov(Dest, LegalVar);
1100 CurInstr->setDeleted();
1101 NewBaseReg = LegalVar;
1102 NewBaseOffset = Offset;
1110 Operand *TargetARM32::loOperand(Operand *Operand) {
1111 assert(Operand->getType() == IceType_i64);
1112 if (Operand->getType() != IceType_i64)
1114 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
1115 return Var64On32->getLo();
1116 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand))
1117 return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue()));
1118 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
1119 // Conservatively disallow memory operands with side-effects (pre/post
1120 // increment) in case of duplication.
1121 assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
1122 Mem->getAddrMode() == OperandARM32Mem::NegOffset);
1123 if (Mem->isRegReg()) {
1124 return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
1125 Mem->getIndex(), Mem->getShiftOp(),
1126 Mem->getShiftAmt(), Mem->getAddrMode());
1128 return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
1129 Mem->getOffset(), Mem->getAddrMode());
1132 llvm_unreachable("Unsupported operand type");
1136 Operand *TargetARM32::hiOperand(Operand *Operand) {
1137 assert(Operand->getType() == IceType_i64);
1138 if (Operand->getType() != IceType_i64)
1140 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
1141 return Var64On32->getHi();
1142 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1143 return Ctx->getConstantInt32(
1144 static_cast<uint32_t>(Const->getValue() >> 32));
1146 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
1147 // Conservatively disallow memory operands with side-effects in case of
1149 assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
1150 Mem->getAddrMode() == OperandARM32Mem::NegOffset);
1151 const Type SplitType = IceType_i32;
1152 if (Mem->isRegReg()) {
1153 // We have to make a temp variable T, and add 4 to either Base or Index.
1154 // The Index may be shifted, so adding 4 can mean something else. Thus,
1155 // prefer T := Base + 4, and use T as the new Base.
1156 Variable *Base = Mem->getBase();
1157 Constant *Four = Ctx->getConstantInt32(4);
1158 Variable *NewBase = Func->makeVariable(Base->getType());
1159 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
1161 return OperandARM32Mem::create(Func, SplitType, NewBase, Mem->getIndex(),
1162 Mem->getShiftOp(), Mem->getShiftAmt(),
1163 Mem->getAddrMode());
1165 Variable *Base = Mem->getBase();
1166 ConstantInteger32 *Offset = Mem->getOffset();
1167 assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
1168 int32_t NextOffsetVal = Offset->getValue() + 4;
1169 const bool SignExt = false;
1170 if (!OperandARM32Mem::canHoldOffset(SplitType, SignExt, NextOffsetVal)) {
1171 // We have to make a temp variable and add 4 to either Base or Offset.
1172 // If we add 4 to Offset, this will convert a non-RegReg addressing
1173 // mode into a RegReg addressing mode. Since NaCl sandboxing disallows
1174 // RegReg addressing modes, prefer adding to base and replacing
1175 // instead. Thus we leave the old offset alone.
1176 Constant *Four = Ctx->getConstantInt32(4);
1177 Variable *NewBase = Func->makeVariable(Base->getType());
1178 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add,
1179 NewBase, Base, Four));
1183 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
1185 return OperandARM32Mem::create(Func, SplitType, Base, Offset,
1186 Mem->getAddrMode());
1189 llvm_unreachable("Unsupported operand type");
1193 llvm::SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include,
1194 RegSetMask Exclude) const {
1195 llvm::SmallBitVector Registers(RegARM32::Reg_NUM);
1197 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
1198 isI64Pair, isFP32, isFP64, isVec128, alias_init) \
1199 if (scratch && (Include & RegSet_CallerSave)) \
1200 Registers[RegARM32::val] = true; \
1201 if (preserved && (Include & RegSet_CalleeSave)) \
1202 Registers[RegARM32::val] = true; \
1203 if (stackptr && (Include & RegSet_StackPointer)) \
1204 Registers[RegARM32::val] = true; \
1205 if (frameptr && (Include & RegSet_FramePointer)) \
1206 Registers[RegARM32::val] = true; \
1207 if (scratch && (Exclude & RegSet_CallerSave)) \
1208 Registers[RegARM32::val] = false; \
1209 if (preserved && (Exclude & RegSet_CalleeSave)) \
1210 Registers[RegARM32::val] = false; \
1211 if (stackptr && (Exclude & RegSet_StackPointer)) \
1212 Registers[RegARM32::val] = false; \
1213 if (frameptr && (Exclude & RegSet_FramePointer)) \
1214 Registers[RegARM32::val] = false;
1223 void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
1224 UsesFramePointer = true;
1225 // Conservatively require the stack to be aligned. Some stack adjustment
1226 // operations implemented below assume that the stack is aligned before the
1227 // alloca. All the alloca code ensures that the stack alignment is preserved
1228 // after the alloca. The stack alignment restriction can be relaxed in some
1230 NeedsStackAlignment = true;
1232 // TODO(stichnot): minimize the number of adjustments of SP, etc.
1233 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
1234 Variable *Dest = Inst->getDest();
1235 uint32_t AlignmentParam = Inst->getAlignInBytes();
1236 // For default align=0, set it to the real value 1, to avoid any
1237 // bit-manipulation problems below.
1238 AlignmentParam = std::max(AlignmentParam, 1u);
1240 // LLVM enforces power of 2 alignment.
1241 assert(llvm::isPowerOf2_32(AlignmentParam));
1242 assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES));
1244 uint32_t Alignment = std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES);
1245 if (Alignment > ARM32_STACK_ALIGNMENT_BYTES) {
1246 alignRegisterPow2(SP, Alignment);
1248 Operand *TotalSize = Inst->getSizeInBytes();
1249 if (const auto *ConstantTotalSize =
1250 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
1251 uint32_t Value = ConstantTotalSize->getValue();
1252 Value = Utils::applyAlignment(Value, Alignment);
1253 Operand *SubAmount = legalize(Ctx->getConstantInt32(Value));
1254 _sub(SP, SP, SubAmount);
1256 // Non-constant sizes need to be adjusted to the next highest multiple of
1257 // the required alignment at runtime.
1258 TotalSize = legalize(TotalSize, Legal_Reg | Legal_Flex);
1259 Variable *T = makeReg(IceType_i32);
1261 Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1));
1262 _add(T, T, AddAmount);
1263 alignRegisterPow2(T, Alignment);
1269 void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) {
1270 if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi))
1272 Variable *SrcLoReg = legalizeToReg(SrcLo);
1275 llvm_unreachable("Unexpected type");
1278 legalize(Ctx->getConstantInt32(0xFF), Legal_Reg | Legal_Flex);
1279 _tst(SrcLoReg, Mask);
1284 legalize(Ctx->getConstantInt32(0xFFFF), Legal_Reg | Legal_Flex);
1285 _tst(SrcLoReg, Mask);
1289 _tst(SrcLoReg, SrcLoReg);
1293 Variable *ScratchReg = makeReg(IceType_i32);
1294 _orrs(ScratchReg, SrcLoReg, SrcHi);
1295 // ScratchReg isn't going to be used, but we need the side-effect of
1296 // setting flags from this operation.
1297 Context.insert(InstFakeUse::create(Func, ScratchReg));
1300 InstARM32Label *Label = InstARM32Label::create(Func, this);
1301 _br(Label, CondARM32::NE);
1303 Context.insert(Label);
1306 void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R,
1307 Operand *Src1, ExtInstr ExtFunc,
1308 DivInstr DivFunc, const char *DivHelperName,
1310 div0Check(Dest->getType(), Src1, nullptr);
1311 Variable *Src1R = legalizeToReg(Src1);
1312 Variable *T0R = Src0R;
1313 Variable *T1R = Src1R;
1314 if (Dest->getType() != IceType_i32) {
1315 T0R = makeReg(IceType_i32);
1316 (this->*ExtFunc)(T0R, Src0R, CondARM32::AL);
1317 T1R = makeReg(IceType_i32);
1318 (this->*ExtFunc)(T1R, Src1R, CondARM32::AL);
1320 if (hasCPUFeature(TargetARM32Features::HWDivArm)) {
1321 (this->*DivFunc)(T, T0R, T1R, CondARM32::AL);
1323 Variable *T2 = makeReg(IceType_i32);
1324 _mls(T2, T, T1R, T0R);
1329 constexpr SizeT MaxSrcs = 2;
1330 InstCall *Call = makeHelperCall(DivHelperName, Dest, MaxSrcs);
1338 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
1339 Variable *Dest = Inst->getDest();
1340 // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier to
1341 // legalize Src0 to flex or Src1 to flex and there is a reversible
1342 // instruction. E.g., reverse subtract with immediate, register vs register,
1344 // Or it may be the case that the operands aren't swapped, but the bits can
1345 // be flipped and a different operation applied. E.g., use BIC (bit clear)
1346 // instead of AND for some masks.
1347 Operand *Src0 = legalizeUndef(Inst->getSrc(0));
1348 Operand *Src1 = legalizeUndef(Inst->getSrc(1));
1349 if (Dest->getType() == IceType_i64) {
1350 // These helper-call-involved instructions are lowered in this separate
1351 // switch. This is because we would otherwise assume that we need to
1352 // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with
1353 // helper calls, and such unused/redundant instructions will fail liveness
1354 // analysis under -Om1 setting.
1355 switch (Inst->getOp()) {
1358 case InstArithmetic::Udiv:
1359 case InstArithmetic::Sdiv:
1360 case InstArithmetic::Urem:
1361 case InstArithmetic::Srem: {
1362 // Check for divide by 0 (ARM normally doesn't trap, but we want it to
1363 // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a
1364 // register, which will hide a constant source operand. Instead, check
1365 // the not-yet-legalized Src1 to optimize-out a divide by 0 check.
1366 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) {
1367 if (C64->getValue() == 0) {
1372 Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg | Legal_Flex);
1373 Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex);
1374 div0Check(IceType_i64, Src1Lo, Src1Hi);
1376 // Technically, ARM has their own aeabi routines, but we can use the
1377 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses
1378 // the more standard __moddi3 for rem.
1379 const char *HelperName = "";
1380 switch (Inst->getOp()) {
1382 llvm_unreachable("Should have only matched div ops.");
1384 case InstArithmetic::Udiv:
1385 HelperName = H_udiv_i64;
1387 case InstArithmetic::Sdiv:
1388 HelperName = H_sdiv_i64;
1390 case InstArithmetic::Urem:
1391 HelperName = H_urem_i64;
1393 case InstArithmetic::Srem:
1394 HelperName = H_srem_i64;
1397 constexpr SizeT MaxSrcs = 2;
1398 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);
1405 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1406 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1407 Variable *Src0RLo = legalizeToReg(loOperand(Src0));
1408 Variable *Src0RHi = legalizeToReg(hiOperand(Src0));
1409 Operand *Src1Lo = loOperand(Src1);
1410 Operand *Src1Hi = hiOperand(Src1);
1411 Variable *T_Lo = makeReg(DestLo->getType());
1412 Variable *T_Hi = makeReg(DestHi->getType());
1413 switch (Inst->getOp()) {
1414 case InstArithmetic::_num:
1415 llvm_unreachable("Unknown arithmetic operator");
1417 case InstArithmetic::Add:
1418 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1419 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
1420 _adds(T_Lo, Src0RLo, Src1Lo);
1422 _adc(T_Hi, Src0RHi, Src1Hi);
1425 case InstArithmetic::And:
1426 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1427 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
1428 _and(T_Lo, Src0RLo, Src1Lo);
1430 _and(T_Hi, Src0RHi, Src1Hi);
1433 case InstArithmetic::Or:
1434 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1435 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
1436 _orr(T_Lo, Src0RLo, Src1Lo);
1438 _orr(T_Hi, Src0RHi, Src1Hi);
1441 case InstArithmetic::Xor:
1442 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1443 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
1444 _eor(T_Lo, Src0RLo, Src1Lo);
1446 _eor(T_Hi, Src0RHi, Src1Hi);
1449 case InstArithmetic::Sub:
1450 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1451 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
1452 _subs(T_Lo, Src0RLo, Src1Lo);
1454 _sbc(T_Hi, Src0RHi, Src1Hi);
1457 case InstArithmetic::Mul: {
1460 // t_acc =(mul) (b.lo * c.hi)
1461 // t_acc =(mla) (c.lo * b.hi) + t_acc
1462 // t.hi,t.lo =(umull) b.lo * c.lo
1468 // t.hi,t.lo =(umull) b.lo * c.lo
1469 // t.hi =(mla) (b.lo * c.hi) + t.hi
1470 // t.hi =(mla) (b.hi * c.lo) + t.hi
1474 // LLVM's lowering has fewer instructions, but more register pressure:
1475 // t.lo is live from beginning to end, while GCC delays the two-dest
1476 // instruction till the end, and kills c.hi immediately.
1477 Variable *T_Acc = makeReg(IceType_i32);
1478 Variable *T_Acc1 = makeReg(IceType_i32);
1479 Variable *T_Hi1 = makeReg(IceType_i32);
1480 Variable *Src1RLo = legalizeToReg(Src1Lo);
1481 Variable *Src1RHi = legalizeToReg(Src1Hi);
1482 _mul(T_Acc, Src0RLo, Src1RHi);
1483 _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);
1484 _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);
1485 _add(T_Hi, T_Hi1, T_Acc1);
1490 case InstArithmetic::Shl: {
1496 // rsb T0, t_c.lo, #32
1497 // lsr T1, t_b.lo, T0
1498 // orr t_a.hi, T1, t_b.hi, lsl t_c.lo
1499 // sub T2, t_c.lo, #32
1501 // lslge t_a.hi, t_b.lo, T2
1502 // lsl t_a.lo, t_b.lo, t_c.lo
1507 // sub t_c1, c.lo, #32
1508 // lsl t_hi, b.hi, c.lo
1509 // orr t_hi, t_hi, b.lo, lsl t_c1
1510 // rsb t_c2, c.lo, #32
1511 // orr t_hi, t_hi, b.lo, lsr t_c2
1512 // lsl t_lo, b.lo, c.lo
1516 // These are incompatible, therefore we mimic pnacl-llc.
1517 // Can be strength-reduced for constant-shifts, but we don't do that for
1519 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On
1520 // ARM, shifts only take the lower 8 bits of the shift register, and
1521 // saturate to the range 0-32, so the negative value will saturate to 32.
1522 Constant *_32 = Ctx->getConstantInt32(32);
1523 Constant *_0 = Ctx->getConstantZero(IceType_i32);
1524 Variable *Src1RLo = legalizeToReg(Src1Lo);
1525 Variable *T0 = makeReg(IceType_i32);
1526 Variable *T1 = makeReg(IceType_i32);
1527 Variable *T2 = makeReg(IceType_i32);
1528 Variable *TA_Hi = makeReg(IceType_i32);
1529 Variable *TA_Lo = makeReg(IceType_i32);
1530 _rsb(T0, Src1RLo, _32);
1531 _lsr(T1, Src0RLo, T0);
1532 _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1533 OperandARM32::LSL, Src1RLo));
1534 _sub(T2, Src1RLo, _32);
1536 _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE);
1537 _set_dest_redefined();
1538 _lsl(TA_Lo, Src0RLo, Src1RLo);
1539 _mov(DestLo, TA_Lo);
1540 _mov(DestHi, TA_Hi);
1543 case InstArithmetic::Lshr:
1544 case InstArithmetic::Ashr: {
1550 // lsr T0, t_b.lo, t_c.lo
1551 // rsb T1, t_c.lo, #32
1552 // orr t_a.lo, T0, t_b.hi, lsl T1
1553 // sub T2, t_c.lo, #32
1555 // [al]srge t_a.lo, t_b.hi, T2
1556 // [al]sr t_a.hi, t_b.hi, t_c.lo
1560 // GCC 4.8 does (lsr):
1561 // rsb t_c1, c.lo, #32
1562 // lsr t_lo, b.lo, c.lo
1563 // orr t_lo, t_lo, b.hi, lsl t_c1
1564 // sub t_c2, c.lo, #32
1565 // orr t_lo, t_lo, b.hi, lsr t_c2
1566 // lsr t_hi, b.hi, c.lo
1570 // These are incompatible, therefore we mimic pnacl-llc.
1571 const bool IsAshr = Inst->getOp() == InstArithmetic::Ashr;
1572 Constant *_32 = Ctx->getConstantInt32(32);
1573 Constant *_0 = Ctx->getConstantZero(IceType_i32);
1574 Variable *Src1RLo = legalizeToReg(Src1Lo);
1575 Variable *T0 = makeReg(IceType_i32);
1576 Variable *T1 = makeReg(IceType_i32);
1577 Variable *T2 = makeReg(IceType_i32);
1578 Variable *TA_Lo = makeReg(IceType_i32);
1579 Variable *TA_Hi = makeReg(IceType_i32);
1580 _lsr(T0, Src0RLo, Src1RLo);
1581 _rsb(T1, Src1RLo, _32);
1582 _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1583 OperandARM32::LSL, T1));
1584 _sub(T2, Src1RLo, _32);
1587 _asr(TA_Lo, Src0RHi, T2, CondARM32::GE);
1588 _set_dest_redefined();
1589 _asr(TA_Hi, Src0RHi, Src1RLo);
1591 _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE);
1592 _set_dest_redefined();
1593 _lsr(TA_Hi, Src0RHi, Src1RLo);
1595 _mov(DestLo, TA_Lo);
1596 _mov(DestHi, TA_Hi);
1599 case InstArithmetic::Fadd:
1600 case InstArithmetic::Fsub:
1601 case InstArithmetic::Fmul:
1602 case InstArithmetic::Fdiv:
1603 case InstArithmetic::Frem:
1604 llvm_unreachable("FP instruction with i64 type");
1606 case InstArithmetic::Udiv:
1607 case InstArithmetic::Sdiv:
1608 case InstArithmetic::Urem:
1609 case InstArithmetic::Srem:
1610 llvm_unreachable("Call-helper-involved instruction for i64 type "
1611 "should have already been handled before");
1615 } else if (isVectorType(Dest->getType())) {
1616 // Add a fake def to keep liveness consistent in the meantime.
1617 Variable *T = makeReg(Dest->getType());
1618 Context.insert(InstFakeDef::create(Func, T));
1620 UnimplementedError(Func->getContext()->getFlags());
1623 // Dest->getType() is a non-i64 scalar.
1624 Variable *Src0R = legalizeToReg(Src0);
1625 Variable *T = makeReg(Dest->getType());
1626 // Handle div/rem separately. They require a non-legalized Src1 to inspect
1627 // whether or not Src1 is a non-zero constant. Once legalized it is more
1628 // difficult to determine (constant may be moved to a register).
1629 switch (Inst->getOp()) {
1632 case InstArithmetic::Udiv: {
1633 constexpr bool NotRemainder = false;
1634 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
1635 H_udiv_i32, NotRemainder);
1638 case InstArithmetic::Sdiv: {
1639 constexpr bool NotRemainder = false;
1640 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
1641 H_sdiv_i32, NotRemainder);
1644 case InstArithmetic::Urem: {
1645 constexpr bool IsRemainder = true;
1646 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
1647 H_urem_i32, IsRemainder);
1650 case InstArithmetic::Srem: {
1651 constexpr bool IsRemainder = true;
1652 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
1653 H_srem_i32, IsRemainder);
1656 case InstArithmetic::Frem: {
1657 const SizeT MaxSrcs = 2;
1658 Type Ty = Dest->getType();
1659 InstCall *Call = makeHelperCall(
1660 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);
1661 Call->addArg(Src0R);
1668 // Handle floating point arithmetic separately: they require Src1 to be
1669 // legalized to a register.
1670 switch (Inst->getOp()) {
1673 case InstArithmetic::Fadd: {
1674 Variable *Src1R = legalizeToReg(Src1);
1675 _vadd(T, Src0R, Src1R);
1679 case InstArithmetic::Fsub: {
1680 Variable *Src1R = legalizeToReg(Src1);
1681 _vsub(T, Src0R, Src1R);
1685 case InstArithmetic::Fmul: {
1686 Variable *Src1R = legalizeToReg(Src1);
1687 _vmul(T, Src0R, Src1R);
1691 case InstArithmetic::Fdiv: {
1692 Variable *Src1R = legalizeToReg(Src1);
1693 _vdiv(T, Src0R, Src1R);
1699 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex);
1700 switch (Inst->getOp()) {
1701 case InstArithmetic::_num:
1702 llvm_unreachable("Unknown arithmetic operator");
1704 case InstArithmetic::Add:
1705 _add(T, Src0R, Src1RF);
1708 case InstArithmetic::And:
1709 _and(T, Src0R, Src1RF);
1712 case InstArithmetic::Or:
1713 _orr(T, Src0R, Src1RF);
1716 case InstArithmetic::Xor:
1717 _eor(T, Src0R, Src1RF);
1720 case InstArithmetic::Sub:
1721 _sub(T, Src0R, Src1RF);
1724 case InstArithmetic::Mul: {
1725 Variable *Src1R = legalizeToReg(Src1RF);
1726 _mul(T, Src0R, Src1R);
1730 case InstArithmetic::Shl:
1731 _lsl(T, Src0R, Src1RF);
1734 case InstArithmetic::Lshr:
1735 if (Dest->getType() != IceType_i32) {
1738 _lsr(T, Src0R, Src1RF);
1741 case InstArithmetic::Ashr:
1742 if (Dest->getType() != IceType_i32) {
1745 _asr(T, Src0R, Src1RF);
1748 case InstArithmetic::Udiv:
1749 case InstArithmetic::Sdiv:
1750 case InstArithmetic::Urem:
1751 case InstArithmetic::Srem:
1752 llvm_unreachable("Integer div/rem should have been handled earlier.");
1754 case InstArithmetic::Fadd:
1755 case InstArithmetic::Fsub:
1756 case InstArithmetic::Fmul:
1757 case InstArithmetic::Fdiv:
1758 case InstArithmetic::Frem:
1759 llvm_unreachable("Floating point arith should have been handled earlier.");
1764 void TargetARM32::lowerAssign(const InstAssign *Inst) {
1765 Variable *Dest = Inst->getDest();
1766 Operand *Src0 = Inst->getSrc(0);
1767 assert(Dest->getType() == Src0->getType());
1768 if (Dest->getType() == IceType_i64) {
1769 Src0 = legalizeUndef(Src0);
1770 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
1771 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
1772 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1773 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1774 Variable *T_Lo = makeReg(IceType_i32);
1775 Variable *T_Hi = makeReg(IceType_i32);
1783 if (Dest->hasReg()) {
1784 // If Dest already has a physical register, then legalize the Src operand
1785 // into a Variable with the same register assignment. This especially
1786 // helps allow the use of Flex operands.
1787 NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum());
1789 // Dest could be a stack operand. Since we could potentially need to do a
1790 // Store (and store can only have Register operands), legalize this to a
1792 NewSrc = legalize(Src0, Legal_Reg);
1794 if (isVectorType(Dest->getType())) {
1795 Variable *SrcR = legalizeToReg(NewSrc);
1797 } else if (isFloatingType(Dest->getType())) {
1798 Variable *SrcR = legalizeToReg(NewSrc);
1806 void TargetARM32::lowerBr(const InstBr *Inst) {
1807 if (Inst->isUnconditional()) {
1808 _br(Inst->getTargetUnconditional());
1811 Operand *Cond = Inst->getCondition();
1812 // TODO(jvoung): Handle folding opportunities.
1814 Type Ty = Cond->getType();
1815 Variable *Src0R = legalizeToReg(Cond);
1816 assert(Ty == IceType_i1);
1817 if (Ty != IceType_i32)
1819 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1821 _br(Inst->getTargetTrue(), Inst->getTargetFalse(), CondARM32::NE);
1824 void TargetARM32::lowerCall(const InstCall *Instr) {
1825 MaybeLeafFunc = false;
1826 NeedsStackAlignment = true;
1828 // Assign arguments to registers and stack. Also reserve stack.
1829 TargetARM32::CallingConv CC;
1830 // Pair of Arg Operand -> GPR number assignments.
1831 llvm::SmallVector<std::pair<Operand *, int32_t>,
1832 TargetARM32::CallingConv::ARM32_MAX_GPR_ARG> GPRArgs;
1833 llvm::SmallVector<std::pair<Operand *, int32_t>,
1834 TargetARM32::CallingConv::ARM32_MAX_FP_REG_UNITS> FPArgs;
1835 // Pair of Arg Operand -> stack offset.
1836 llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs;
1837 int32_t ParameterAreaSizeBytes = 0;
1839 // Classify each argument operand according to the location where the
1840 // argument is passed.
1841 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
1842 Operand *Arg = legalizeUndef(Instr->getArg(i));
1843 Type Ty = Arg->getType();
1844 bool InRegs = false;
1845 if (Ty == IceType_i64) {
1846 std::pair<int32_t, int32_t> Regs;
1847 if (CC.I64InRegs(&Regs)) {
1849 Operand *Lo = loOperand(Arg);
1850 Operand *Hi = hiOperand(Arg);
1851 GPRArgs.push_back(std::make_pair(Lo, Regs.first));
1852 GPRArgs.push_back(std::make_pair(Hi, Regs.second));
1854 } else if (isVectorType(Ty) || isFloatingType(Ty)) {
1856 if (CC.FPInReg(Ty, &Reg)) {
1858 FPArgs.push_back(std::make_pair(Arg, Reg));
1861 assert(Ty == IceType_i32);
1863 if (CC.I32InReg(&Reg)) {
1865 GPRArgs.push_back(std::make_pair(Arg, Reg));
1870 ParameterAreaSizeBytes =
1871 applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);
1872 StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));
1873 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Ty);
1877 // Adjust the parameter area so that the stack is aligned. It is assumed that
1878 // the stack is already aligned at the start of the calling sequence.
1879 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
1881 // Subtract the appropriate amount for the argument area. This also takes
1882 // care of setting the stack adjustment during emission.
1884 // TODO: If for some reason the call instruction gets dead-code eliminated
1885 // after lowering, we would need to ensure that the pre-call and the
1886 // post-call esp adjustment get eliminated as well.
1887 if (ParameterAreaSizeBytes) {
1888 Operand *SubAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
1889 Legal_Reg | Legal_Flex);
1890 _adjust_stack(ParameterAreaSizeBytes, SubAmount);
1893 // Copy arguments that are passed on the stack to the appropriate stack
1895 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
1896 for (auto &StackArg : StackArgs) {
1897 ConstantInteger32 *Loc =
1898 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));
1899 Type Ty = StackArg.first->getType();
1900 OperandARM32Mem *Addr;
1901 constexpr bool SignExt = false;
1902 if (OperandARM32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) {
1903 Addr = OperandARM32Mem::create(Func, Ty, SP, Loc);
1905 Variable *NewBase = Func->makeVariable(SP->getType());
1907 InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc));
1908 Addr = formMemoryOperand(NewBase, Ty);
1910 lowerStore(InstStore::create(Func, StackArg.first, Addr));
1913 // Generate the call instruction. Assign its result to a temporary with high
1914 // register allocation weight.
1915 Variable *Dest = Instr->getDest();
1916 // ReturnReg doubles as ReturnRegLo as necessary.
1917 Variable *ReturnReg = nullptr;
1918 Variable *ReturnRegHi = nullptr;
1920 switch (Dest->getType()) {
1922 llvm_unreachable("Invalid Call dest type");
1930 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_r0);
1933 ReturnReg = makeReg(IceType_i32, RegARM32::Reg_r0);
1934 ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1);
1937 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_s0);
1940 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_d0);
1949 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_q0);
1953 Operand *CallTarget = Instr->getCallTarget();
1954 // TODO(jvoung): Handle sandboxing. const bool NeedSandboxing =
1955 // Ctx->getFlags().getUseSandboxing();
1957 // Allow ConstantRelocatable to be left alone as a direct call, but force
1958 // other constants like ConstantInteger32 to be in a register and make it an
1960 if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
1961 CallTarget = legalize(CallTarget, Legal_Reg);
1964 // Copy arguments to be passed in registers to the appropriate registers.
1965 for (auto &FPArg : FPArgs) {
1966 Variable *Reg = legalizeToReg(FPArg.first, FPArg.second);
1967 Context.insert(InstFakeUse::create(Func, Reg));
1969 for (auto &GPRArg : GPRArgs) {
1970 Variable *Reg = legalizeToReg(GPRArg.first, GPRArg.second);
1971 // Generate a FakeUse of register arguments so that they do not get dead
1972 // code eliminated as a result of the FakeKill of scratch registers after
1974 Context.insert(InstFakeUse::create(Func, Reg));
1976 Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget);
1977 Context.insert(NewCall);
1979 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
1981 // Add the appropriate offset to SP. The call instruction takes care of
1982 // resetting the stack offset during emission.
1983 if (ParameterAreaSizeBytes) {
1984 Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
1985 Legal_Reg | Legal_Flex);
1986 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
1987 _add(SP, SP, AddAmount);
1990 // Insert a register-kill pseudo instruction.
1991 Context.insert(InstFakeKill::create(Func, NewCall));
1993 // Generate a FakeUse to keep the call live if necessary.
1994 if (Instr->hasSideEffects() && ReturnReg) {
1995 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
1996 Context.insert(FakeUse);
2002 // Assign the result of the call to Dest.
2005 auto *Dest64On32 = llvm::cast<Variable64On32>(Dest);
2006 Variable *DestLo = Dest64On32->getLo();
2007 Variable *DestHi = Dest64On32->getHi();
2008 _mov(DestLo, ReturnReg);
2009 _mov(DestHi, ReturnRegHi);
2011 if (isFloatingType(Dest->getType()) || isVectorType(Dest->getType())) {
2012 _mov(Dest, ReturnReg);
2014 assert(isIntegerType(Dest->getType()) &&
2015 typeWidthInBytes(Dest->getType()) <= 4);
2016 _mov(Dest, ReturnReg);
2023 void configureBitcastTemporary(Variable64On32 *Var) {
2024 Var->setMustNotHaveReg();
2025 Var->getHi()->setMustHaveReg();
2026 Var->getLo()->setMustHaveReg();
2028 } // end of anonymous namespace
2030 void TargetARM32::lowerCast(const InstCast *Inst) {
2031 InstCast::OpKind CastKind = Inst->getCastKind();
2032 Variable *Dest = Inst->getDest();
2033 Operand *Src0 = legalizeUndef(Inst->getSrc(0));
2036 Func->setError("Cast type not supported");
2038 case InstCast::Sext: {
2039 if (isVectorType(Dest->getType())) {
2040 Variable *T = makeReg(Dest->getType());
2041 Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0)));
2043 UnimplementedError(Func->getContext()->getFlags());
2044 } else if (Dest->getType() == IceType_i64) {
2045 // t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2
2046 Constant *ShiftAmt = Ctx->getConstantInt32(31);
2047 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2048 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2049 Variable *T_Lo = makeReg(DestLo->getType());
2050 if (Src0->getType() == IceType_i32) {
2051 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
2053 } else if (Src0->getType() == IceType_i1) {
2054 Variable *Src0R = legalizeToReg(Src0);
2055 _lsl(T_Lo, Src0R, ShiftAmt);
2056 _asr(T_Lo, T_Lo, ShiftAmt);
2058 Variable *Src0R = legalizeToReg(Src0);
2062 Variable *T_Hi = makeReg(DestHi->getType());
2063 if (Src0->getType() != IceType_i1) {
2064 _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, T_Lo,
2065 OperandARM32::ASR, ShiftAmt));
2067 // For i1, the asr instruction is already done above.
2071 } else if (Src0->getType() == IceType_i1) {
2072 // GPR registers are 32-bit, so just use 31 as dst_bitwidth - 1.
2073 // lsl t1, src_reg, 31
2076 Variable *Src0R = legalizeToReg(Src0);
2077 Constant *ShiftAmt = Ctx->getConstantInt32(31);
2078 Variable *T = makeReg(Dest->getType());
2079 _lsl(T, Src0R, ShiftAmt);
2080 _asr(T, T, ShiftAmt);
2083 // t1 = sxt src; dst = t1
2084 Variable *Src0R = legalizeToReg(Src0);
2085 Variable *T = makeReg(Dest->getType());
2091 case InstCast::Zext: {
2092 if (isVectorType(Dest->getType())) {
2093 Variable *T = makeReg(Dest->getType());
2094 Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0)));
2096 UnimplementedError(Func->getContext()->getFlags());
2097 } else if (Dest->getType() == IceType_i64) {
2098 // t1=uxtb src; dst.lo=t1; dst.hi=0
2099 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2100 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2101 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2102 Variable *T_Lo = makeReg(DestLo->getType());
2103 // i32 and i1 can just take up the whole register. i32 doesn't need uxt,
2104 // while i1 will have an and mask later anyway.
2105 if (Src0->getType() == IceType_i32 || Src0->getType() == IceType_i1) {
2106 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
2109 Variable *Src0R = legalizeToReg(Src0);
2112 if (Src0->getType() == IceType_i1) {
2113 Constant *One = Ctx->getConstantInt32(1);
2114 _and(T_Lo, T_Lo, One);
2117 Variable *T_Hi = makeReg(DestLo->getType());
2120 } else if (Src0->getType() == IceType_i1) {
2121 // t = Src0; t &= 1; Dest = t
2122 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
2123 Constant *One = Ctx->getConstantInt32(1);
2124 Variable *T = makeReg(Dest->getType());
2125 // Just use _mov instead of _uxt since all registers are 32-bit. _uxt
2126 // requires the source to be a register so could have required a _mov
2127 // from legalize anyway.
2132 // t1 = uxt src; dst = t1
2133 Variable *Src0R = legalizeToReg(Src0);
2134 Variable *T = makeReg(Dest->getType());
2140 case InstCast::Trunc: {
2141 if (isVectorType(Dest->getType())) {
2142 Variable *T = makeReg(Dest->getType());
2143 Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0)));
2145 UnimplementedError(Func->getContext()->getFlags());
2147 if (Src0->getType() == IceType_i64)
2148 Src0 = loOperand(Src0);
2149 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
2150 // t1 = trunc Src0RF; Dest = t1
2151 Variable *T = makeReg(Dest->getType());
2153 if (Dest->getType() == IceType_i1)
2154 _and(T, T, Ctx->getConstantInt1(1));
2159 case InstCast::Fptrunc:
2160 case InstCast::Fpext: {
2161 // fptrunc: dest.f32 = fptrunc src0.fp64
2162 // fpext: dest.f64 = fptrunc src0.fp32
2163 const bool IsTrunc = CastKind == InstCast::Fptrunc;
2164 if (isVectorType(Dest->getType())) {
2165 Variable *T = makeReg(Dest->getType());
2166 Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0)));
2168 UnimplementedError(Func->getContext()->getFlags());
2171 assert(Dest->getType() == (IsTrunc ? IceType_f32 : IceType_f64));
2172 assert(Src0->getType() == (IsTrunc ? IceType_f64 : IceType_f32));
2173 Variable *Src0R = legalizeToReg(Src0);
2174 Variable *T = makeReg(Dest->getType());
2175 _vcvt(T, Src0R, IsTrunc ? InstARM32Vcvt::D2s : InstARM32Vcvt::S2d);
2179 case InstCast::Fptosi:
2180 case InstCast::Fptoui: {
2181 if (isVectorType(Dest->getType())) {
2182 Variable *T = makeReg(Dest->getType());
2183 Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0)));
2185 UnimplementedError(Func->getContext()->getFlags());
2189 const bool DestIsSigned = CastKind == InstCast::Fptosi;
2190 const bool Src0IsF32 = isFloat32Asserting32Or64(Src0->getType());
2191 if (llvm::isa<Variable64On32>(Dest)) {
2192 const char *HelperName =
2193 Src0IsF32 ? (DestIsSigned ? H_fptosi_f32_i64 : H_fptoui_f32_i64)
2194 : (DestIsSigned ? H_fptosi_f64_i64 : H_fptoui_f64_i64);
2195 static constexpr SizeT MaxSrcs = 1;
2196 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);
2202 // t1.fp = vcvt src0.fp
2203 // t2.i32 = vmov t1.fp
2204 // dest.int = conv t2.i32 @ Truncates the result if needed.
2206 // t1.fp = vcvt src0.fp
2207 // t2.u32 = vmov t1.fp
2208 // dest.uint = conv t2.u32 @ Truncates the result if needed.
2209 Variable *Src0R = legalizeToReg(Src0);
2210 Variable *T_fp = makeReg(IceType_f32);
2211 const InstARM32Vcvt::VcvtVariant Conversion =
2212 Src0IsF32 ? (DestIsSigned ? InstARM32Vcvt::S2si : InstARM32Vcvt::S2ui)
2213 : (DestIsSigned ? InstARM32Vcvt::D2si : InstARM32Vcvt::D2ui);
2214 _vcvt(T_fp, Src0R, Conversion);
2215 Variable *T = makeReg(IceType_i32);
2217 if (Dest->getType() != IceType_i32) {
2218 Variable *T_1 = makeReg(Dest->getType());
2219 lowerCast(InstCast::create(Func, InstCast::Trunc, T_1, T));
2225 case InstCast::Sitofp:
2226 case InstCast::Uitofp: {
2227 if (isVectorType(Dest->getType())) {
2228 Variable *T = makeReg(Dest->getType());
2229 Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0)));
2231 UnimplementedError(Func->getContext()->getFlags());
2234 const bool SourceIsSigned = CastKind == InstCast::Sitofp;
2235 const bool DestIsF32 = isFloat32Asserting32Or64(Dest->getType());
2236 if (Src0->getType() == IceType_i64) {
2237 const char *HelperName =
2238 DestIsF32 ? (SourceIsSigned ? H_sitofp_i64_f32 : H_uitofp_i64_f32)
2239 : (SourceIsSigned ? H_sitofp_i64_f64 : H_uitofp_i64_f64);
2240 static constexpr SizeT MaxSrcs = 1;
2241 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);
2247 // t1.i32 = sext src.int @ sign-extends src0 if needed.
2248 // t2.fp32 = vmov t1.i32
2249 // t3.fp = vcvt.{fp}.s32 @ fp is either f32 or f64
2251 // t1.i32 = zext src.int @ zero-extends src0 if needed.
2252 // t2.fp32 = vmov t1.i32
2253 // t3.fp = vcvt.{fp}.s32 @ fp is either f32 or f64
2254 if (Src0->getType() != IceType_i32) {
2255 Variable *Src0R_32 = makeReg(IceType_i32);
2256 lowerCast(InstCast::create(Func, SourceIsSigned ? InstCast::Sext
2261 Variable *Src0R = legalizeToReg(Src0);
2262 Variable *Src0R_f32 = makeReg(IceType_f32);
2263 _mov(Src0R_f32, Src0R);
2265 Variable *T = makeReg(Dest->getType());
2266 const InstARM32Vcvt::VcvtVariant Conversion =
2268 ? (SourceIsSigned ? InstARM32Vcvt::Si2s : InstARM32Vcvt::Ui2s)
2269 : (SourceIsSigned ? InstARM32Vcvt::Si2d : InstARM32Vcvt::Ui2d);
2270 _vcvt(T, Src0R, Conversion);
2274 case InstCast::Bitcast: {
2275 Operand *Src0 = Inst->getSrc(0);
2276 if (Dest->getType() == Src0->getType()) {
2277 InstAssign *Assign = InstAssign::create(Func, Dest, Src0);
2278 lowerAssign(Assign);
2281 Type DestType = Dest->getType();
2285 llvm::report_fatal_error("Unexpected bitcast.");
2287 UnimplementedError(Func->getContext()->getFlags());
2290 UnimplementedError(Func->getContext()->getFlags());
2293 UnimplementedError(Func->getContext()->getFlags());
2297 Variable *Src0R = legalizeToReg(Src0);
2298 Variable *T = makeReg(DestType);
2300 lowerAssign(InstAssign::create(Func, Dest, T));
2306 // dest[63..32] = t1
2307 assert(Src0->getType() == IceType_f64);
2308 auto *T = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
2310 configureBitcastTemporary(T);
2311 Variable *Src0R = legalizeToReg(Src0);
2313 Context.insert(InstFakeUse::create(Func, T->getHi()));
2314 Context.insert(InstFakeUse::create(Func, T->getLo()));
2315 lowerAssign(InstAssign::create(Func, Dest, T));
2323 assert(Src0->getType() == IceType_i64);
2324 Variable *T = makeReg(DestType);
2325 auto *Src64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
2326 Src64->initHiLo(Func);
2327 configureBitcastTemporary(Src64);
2328 lowerAssign(InstAssign::create(Func, Src64, Src0));
2330 lowerAssign(InstAssign::create(Func, Dest, T));
2339 case IceType_v4i32: {
2340 // avoid liveness errors
2341 Variable *T = makeReg(DestType);
2342 Context.insert(InstFakeDef::create(Func, T, legalizeToReg(Src0)));
2344 UnimplementedError(Func->getContext()->getFlags());
2353 void TargetARM32::lowerExtractElement(const InstExtractElement *Inst) {
2354 Variable *Dest = Inst->getDest();
2355 Type DestType = Dest->getType();
2356 Variable *T = makeReg(DestType);
2357 Context.insert(InstFakeDef::create(Func, T));
2359 UnimplementedError(Func->getContext()->getFlags());
2363 // Validates FCMPARM32_TABLE's declaration w.r.t. InstFcmp::FCondition ordering
2366 #define X(val, CC0, CC1) _fcmp_ll_##val,
2373 #define X(tag, str) _fcmp_hl_##tag = InstFcmp::tag,
2379 static_assert(_fcmp_hl_NUM == _fcmp_ll_NUM,
2380 "Inconsistency between high-level and low-level fcmp tags.");
2381 #define X(tag, str) \
2383 _fcmp_hl_##tag == _fcmp_ll_##tag, \
2384 "Inconsistency between high-level and low-level fcmp tag " #tag);
2389 CondARM32::Cond CC0;
2390 CondARM32::Cond CC1;
2392 #define X(val, CC0, CC1) \
2393 { CondARM32::CC0, CondARM32::CC1 } \
2398 } // end of anonymous namespace
2400 void TargetARM32::lowerFcmp(const InstFcmp *Inst) {
2401 Variable *Dest = Inst->getDest();
2402 if (isVectorType(Dest->getType())) {
2403 Variable *T = makeReg(Dest->getType());
2404 Context.insert(InstFakeDef::create(Func, T));
2406 UnimplementedError(Func->getContext()->getFlags());
2410 Variable *Src0R = legalizeToReg(Inst->getSrc(0));
2411 Variable *Src1R = legalizeToReg(Inst->getSrc(1));
2412 Variable *T = makeReg(IceType_i32);
2413 _vcmp(Src0R, Src1R);
2414 _mov(T, Ctx->getConstantZero(IceType_i32));
2416 Operand *One = Ctx->getConstantInt32(1);
2417 InstFcmp::FCond Condition = Inst->getCondition();
2418 assert(Condition < llvm::array_lengthof(TableFcmp));
2419 CondARM32::Cond CC0 = TableFcmp[Condition].CC0;
2420 CondARM32::Cond CC1 = TableFcmp[Condition].CC1;
2421 if (CC0 != CondARM32::kNone) {
2423 // If this mov is not a maybe mov, but an actual mov (i.e., CC0 == AL), we
2424 // don't want to _set_dest_redefined so that liveness + dead-code
2425 // elimination will get rid of the previous assignment (i.e., T = 0) above.
2426 // TODO(stichnot,jpp): We should be able to conditionally create the "T=0"
2427 // instruction based on CC0, instead of relying on DCE to remove it.
2428 if (CC0 != CondARM32::AL)
2429 _set_dest_redefined();
2431 if (CC1 != CondARM32::kNone) {
2432 assert(CC0 != CondARM32::kNone);
2433 assert(CC1 != CondARM32::AL);
2434 _mov_redefined(T, One, CC1);
2439 void TargetARM32::lowerIcmp(const InstIcmp *Inst) {
2440 Variable *Dest = Inst->getDest();
2441 Operand *Src0 = legalizeUndef(Inst->getSrc(0));
2442 Operand *Src1 = legalizeUndef(Inst->getSrc(1));
2444 if (isVectorType(Dest->getType())) {
2445 Variable *T = makeReg(Dest->getType());
2446 Context.insert(InstFakeDef::create(Func, T));
2448 UnimplementedError(Func->getContext()->getFlags());
2452 // a=icmp cond, b, c ==>
2454 // cmp b.hi, c.hi or cmp b.lo, c.lo
2455 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi
2456 // mov.<C1> t, #1 mov.<C1> t, #1
2457 // mov.<C2> t, #0 mov.<C2> t, #0
2458 // mov a, t mov a, t
2459 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"
2460 // is used for signed compares. In some cases, b and c need to be swapped as
2465 // eor t1, b.hi, c.hi
2466 // eor t2, b.lo, c.hi
2471 // that's nice in that it's just as short but has fewer dependencies for
2472 // better ILP at the cost of more registers.
2474 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two
2475 // unconditional mov #0, two cmps, two conditional mov #1, and one
2476 // conditional reg mov. That has few dependencies for good ILP, but is a
2479 // So, we are going with the GCC version since it's usually better (except
2480 // perhaps for eq/ne). We could revisit special-casing eq/ne later.
2481 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2482 Constant *One = Ctx->getConstantInt32(1);
2483 if (Src0->getType() == IceType_i64) {
2484 InstIcmp::ICond Conditon = Inst->getCondition();
2485 size_t Index = static_cast<size_t>(Conditon);
2486 assert(Index < llvm::array_lengthof(TableIcmp64));
2487 Variable *Src0Lo, *Src0Hi;
2488 Operand *Src1LoRF, *Src1HiRF;
2489 if (TableIcmp64[Index].Swapped) {
2490 Src0Lo = legalizeToReg(loOperand(Src1));
2491 Src0Hi = legalizeToReg(hiOperand(Src1));
2492 Src1LoRF = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
2493 Src1HiRF = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
2495 Src0Lo = legalizeToReg(loOperand(Src0));
2496 Src0Hi = legalizeToReg(hiOperand(Src0));
2497 Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex);
2498 Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex);
2500 Variable *T = makeReg(IceType_i32);
2501 if (TableIcmp64[Index].IsSigned) {
2502 Variable *ScratchReg = makeReg(IceType_i32);
2503 _cmp(Src0Lo, Src1LoRF);
2504 _sbcs(ScratchReg, Src0Hi, Src1HiRF);
2505 // ScratchReg isn't going to be used, but we need the side-effect of
2506 // setting flags from this operation.
2507 Context.insert(InstFakeUse::create(Func, ScratchReg));
2509 _cmp(Src0Hi, Src1HiRF);
2510 _cmp(Src0Lo, Src1LoRF, CondARM32::EQ);
2512 _mov(T, One, TableIcmp64[Index].C1);
2513 _mov_redefined(T, Zero, TableIcmp64[Index].C2);
2518 // a=icmp cond b, c ==>
2526 // where the unsigned/sign extension is not needed for 32-bit. They also have
2527 // special cases for EQ and NE. E.g., for NE:
2528 // <extend to tb, tc>
2536 // cmp tb, c, lsl #<N>
2540 // the left shift is by 0, 16, or 24, which allows the comparison to focus on
2541 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For
2542 // the unsigned case, for some reason it does similar to GCC and does a uxtb
2543 // first. It's not clear to me why that special-casing is needed.
2545 // We'll go with the LLVM way for now, since it's shorter and has just as few
2547 int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());
2548 assert(ShiftAmt >= 0);
2549 Constant *ShiftConst = nullptr;
2550 Variable *Src0R = nullptr;
2551 Variable *T = makeReg(IceType_i32);
2553 ShiftConst = Ctx->getConstantInt32(ShiftAmt);
2554 Src0R = makeReg(IceType_i32);
2555 _lsl(Src0R, legalizeToReg(Src0), ShiftConst);
2557 Src0R = legalizeToReg(Src0);
2561 Variable *Src1R = legalizeToReg(Src1);
2562 OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create(
2563 Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst);
2564 _cmp(Src0R, Src1RShifted);
2566 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex);
2567 _cmp(Src0R, Src1RF);
2569 _mov_redefined(T, One, getIcmp32Mapping(Inst->getCondition()));
2574 void TargetARM32::lowerInsertElement(const InstInsertElement *Inst) {
2576 UnimplementedError(Func->getContext()->getFlags());
2580 inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
2581 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
2582 return Integer->getValue();
2583 return Intrinsics::MemoryOrderInvalid;
2585 } // end of anonymous namespace
2587 void TargetARM32::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
2588 Operand *Ptr, Operand *Val) {
2590 // ldrex contents, [addr]
2591 // op tmp, contents, operand
2592 // strex success, tmp, [addr]
2594 // fake-use(addr, operand) @ prevents undesirable clobbering.
2595 // mov dest, contents
2596 assert(Dest != nullptr);
2597 Type DestTy = Dest->getType();
2601 OperandARM32Mem *Mem;
2602 Variable *PtrContentsReg;
2603 Variable *PtrContentsHiReg;
2604 Variable *PtrContentsLoReg;
2605 Variable *Value = Func->makeVariable(DestTy);
2607 Variable *ValueHiReg;
2608 Variable *ValueLoReg;
2609 Variable *Success = makeReg(IceType_i32);
2613 Operand *_0 = Ctx->getConstantZero(IceType_i32);
2614 InstARM32Label *Retry = InstARM32Label::create(Func, this);
2616 if (DestTy == IceType_i64) {
2617 Variable64On32 *PtrContentsReg64 = makeI64RegPair();
2618 PtrContentsHiReg = PtrContentsReg64->getHi();
2619 PtrContentsLoReg = PtrContentsReg64->getLo();
2620 PtrContentsReg = PtrContentsReg64;
2622 llvm::cast<Variable64On32>(Value)->initHiLo(Func);
2623 Variable64On32 *ValueReg64 = makeI64RegPair();
2624 ValueHiReg = ValueReg64->getHi();
2625 ValueLoReg = ValueReg64->getLo();
2626 ValueReg = ValueReg64;
2628 Variable64On32 *TmpReg64 = makeI64RegPair();
2629 TmpHiReg = TmpReg64->getHi();
2630 TmpLoReg = TmpReg64->getLo();
2633 PtrContentsReg = makeReg(DestTy);
2634 PtrContentsHiReg = nullptr;
2635 PtrContentsLoReg = PtrContentsReg;
2637 ValueReg = makeReg(DestTy);
2638 ValueHiReg = nullptr;
2639 ValueLoReg = ValueReg;
2641 TmpReg = makeReg(DestTy);
2646 if (DestTy == IceType_i64) {
2647 Context.insert(InstFakeDef::create(Func, Value));
2649 lowerAssign(InstAssign::create(Func, Value, Val));
2651 Variable *PtrVar = Func->makeVariable(IceType_i32);
2652 lowerAssign(InstAssign::create(Func, PtrVar, Ptr));
2655 Context.insert(Retry);
2656 Mem = formMemoryOperand(PtrVar, DestTy);
2657 if (DestTy == IceType_i64) {
2658 Context.insert(InstFakeDef::create(Func, ValueReg, Value));
2660 lowerAssign(InstAssign::create(Func, ValueReg, Value));
2661 if (DestTy == IceType_i8 || DestTy == IceType_i16) {
2662 _uxt(ValueReg, ValueReg);
2664 _ldrex(PtrContentsReg, Mem);
2666 if (DestTy == IceType_i64) {
2667 Context.insert(InstFakeDef::create(Func, TmpReg, ValueReg));
2669 switch (Operation) {
2671 Func->setError("Unknown AtomicRMW operation");
2673 case Intrinsics::AtomicAdd:
2674 if (DestTy == IceType_i64) {
2675 _adds(TmpLoReg, PtrContentsLoReg, ValueLoReg);
2676 _adc(TmpHiReg, PtrContentsHiReg, ValueHiReg);
2678 _add(TmpLoReg, PtrContentsLoReg, ValueLoReg);
2681 case Intrinsics::AtomicSub:
2682 if (DestTy == IceType_i64) {
2683 _subs(TmpLoReg, PtrContentsLoReg, ValueLoReg);
2684 _sbc(TmpHiReg, PtrContentsHiReg, ValueHiReg);
2686 _sub(TmpLoReg, PtrContentsLoReg, ValueLoReg);
2689 case Intrinsics::AtomicOr:
2690 _orr(TmpLoReg, PtrContentsLoReg, ValueLoReg);
2691 if (DestTy == IceType_i64) {
2692 _orr(TmpHiReg, PtrContentsHiReg, ValueHiReg);
2695 case Intrinsics::AtomicAnd:
2696 _and(TmpLoReg, PtrContentsLoReg, ValueLoReg);
2697 if (DestTy == IceType_i64) {
2698 _and(TmpHiReg, PtrContentsHiReg, ValueHiReg);
2701 case Intrinsics::AtomicXor:
2702 _eor(TmpLoReg, PtrContentsLoReg, ValueLoReg);
2703 if (DestTy == IceType_i64) {
2704 _eor(TmpHiReg, PtrContentsHiReg, ValueHiReg);
2707 case Intrinsics::AtomicExchange:
2708 _mov(TmpLoReg, ValueLoReg);
2709 if (DestTy == IceType_i64) {
2710 _mov(TmpHiReg, ValueHiReg);
2714 _strex(Success, TmpReg, Mem);
2716 _br(Retry, CondARM32::NE);
2718 // The following fake-uses ensure that Subzero will not clobber them in the
2719 // load-linked/store-conditional loop above. We might have to spill them, but
2720 // spilling is preferable over incorrect behavior.
2721 Context.insert(InstFakeUse::create(Func, PtrVar));
2722 if (auto *Value64 = llvm::dyn_cast<Variable64On32>(Value)) {
2723 Context.insert(InstFakeUse::create(Func, Value64->getHi()));
2724 Context.insert(InstFakeUse::create(Func, Value64->getLo()));
2726 Context.insert(InstFakeUse::create(Func, Value));
2729 if (DestTy == IceType_i8 || DestTy == IceType_i16) {
2730 _uxt(PtrContentsReg, PtrContentsReg);
2733 if (DestTy == IceType_i64) {
2734 Context.insert(InstFakeUse::create(Func, PtrContentsReg));
2736 lowerAssign(InstAssign::create(Func, Dest, PtrContentsReg));
2737 if (auto *Dest64 = llvm::dyn_cast<Variable64On32>(Dest)) {
2738 Context.insert(InstFakeUse::create(Func, Dest64->getLo()));
2739 Context.insert(InstFakeUse::create(Func, Dest64->getHi()));
2741 Context.insert(InstFakeUse::create(Func, Dest));
2745 void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
2746 Variable *Dest = Instr->getDest();
2747 Type DestTy = (Dest != nullptr) ? Dest->getType() : IceType_void;
2748 Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID;
2750 case Intrinsics::AtomicFence:
2751 case Intrinsics::AtomicFenceAll:
2752 assert(Dest == nullptr);
2755 case Intrinsics::AtomicIsLockFree: {
2756 Operand *ByteSize = Instr->getArg(0);
2757 auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize);
2758 if (CI == nullptr) {
2759 // The PNaCl ABI requires the byte size to be a compile-time constant.
2760 Func->setError("AtomicIsLockFree byte size should be compile-time const");
2763 static constexpr int32_t NotLockFree = 0;
2764 static constexpr int32_t LockFree = 1;
2765 int32_t Result = NotLockFree;
2766 switch (CI->getValue()) {
2774 _mov(Dest, legalizeToReg(Ctx->getConstantInt32(Result)));
2777 case Intrinsics::AtomicLoad: {
2778 assert(isScalarIntegerType(DestTy));
2779 // We require the memory address to be naturally aligned. Given that is the
2780 // case, then normal loads are atomic.
2781 if (!Intrinsics::isMemoryOrderValid(
2782 ID, getConstantMemoryOrder(Instr->getArg(1)))) {
2783 Func->setError("Unexpected memory ordering for AtomicLoad");
2788 if (DestTy == IceType_i64) {
2789 // ldrex is the only arm instruction that is guaranteed to load a 64-bit
2790 // integer atomically. Everything else works with a regular ldr.
2791 T = makeI64RegPair();
2792 _ldrex(T, formMemoryOperand(Instr->getArg(0), IceType_i64));
2794 T = makeReg(DestTy);
2795 _ldr(T, formMemoryOperand(Instr->getArg(0), DestTy));
2798 lowerAssign(InstAssign::create(Func, Dest, T));
2799 // Make sure the atomic load isn't elided when unused, by adding a FakeUse.
2800 // Since lowerLoad may fuse the load w/ an arithmetic instruction, insert
2801 // the FakeUse on the last-inserted instruction's dest.
2803 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
2806 case Intrinsics::AtomicStore: {
2807 // We require the memory address to be naturally aligned. Given that is the
2808 // case, then normal loads are atomic.
2809 if (!Intrinsics::isMemoryOrderValid(
2810 ID, getConstantMemoryOrder(Instr->getArg(2)))) {
2811 Func->setError("Unexpected memory ordering for AtomicStore");
2814 Operand *Value = Instr->getArg(0);
2815 Type ValueTy = Value->getType();
2816 assert(isScalarIntegerType(ValueTy));
2817 Operand *Addr = Instr->getArg(1);
2819 if (ValueTy == IceType_i64) {
2820 // Atomic 64-bit stores require a load-locked/store-conditional loop using
2821 // ldrexd, and strexd. The lowered code is:
2824 // ldrexd t.lo, t.hi, [addr]
2825 // strexd success, value.lo, value.hi, [addr]
2828 // fake-use(addr, value.lo, value.hi)
2830 // The fake-use is needed to prevent those variables from being clobbered
2831 // in the loop (which will happen under register pressure.)
2832 Variable64On32 *Tmp = makeI64RegPair();
2833 Variable64On32 *ValueVar =
2834 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
2835 Variable *AddrVar = makeReg(IceType_i32);
2836 Variable *Success = makeReg(IceType_i32);
2837 OperandARM32Mem *Mem;
2838 Operand *_0 = Ctx->getConstantZero(IceType_i32);
2839 InstARM32Label *Retry = InstARM32Label::create(Func, this);
2840 Variable64On32 *NewReg = makeI64RegPair();
2841 ValueVar->initHiLo(Func);
2842 ValueVar->mustNotHaveReg();
2845 lowerAssign(InstAssign::create(Func, ValueVar, Value));
2846 lowerAssign(InstAssign::create(Func, AddrVar, Addr));
2848 Context.insert(Retry);
2849 Context.insert(InstFakeDef::create(Func, NewReg));
2850 lowerAssign(InstAssign::create(Func, NewReg, ValueVar));
2851 Mem = formMemoryOperand(AddrVar, IceType_i64);
2853 // This fake-use both prevents the ldrex from being dead-code eliminated,
2854 // while also keeping liveness happy about all defs being used.
2856 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
2857 _strex(Success, NewReg, Mem);
2859 _br(Retry, CondARM32::NE);
2861 Context.insert(InstFakeUse::create(Func, ValueVar->getLo()));
2862 Context.insert(InstFakeUse::create(Func, ValueVar->getHi()));
2863 Context.insert(InstFakeUse::create(Func, AddrVar));
2867 // non-64-bit stores are atomically as long as the address is aligned. This
2868 // is PNaCl, so addresses are aligned.
2869 Variable *T = makeReg(ValueTy);
2872 lowerAssign(InstAssign::create(Func, T, Value));
2873 _str(T, formMemoryOperand(Addr, ValueTy));
2877 case Intrinsics::AtomicCmpxchg: {
2878 // The initial lowering for cmpxchg was:
2881 // ldrex tmp, [addr]
2882 // cmp tmp, expected
2883 // mov expected, tmp
2885 // strex success, new, [addr]
2888 // mov dest, expected
2890 // Besides requiring two branches, that lowering could also potentially
2891 // write to memory (in mov expected, tmp) unless we were OK with increasing
2892 // the register pressure and requiring expected to be an infinite-weight
2893 // variable (spoiler alert: that was a problem for i64 cmpxchg.) Through
2894 // careful rewritting, and thanks to predication, we now implement the
2898 // ldrex tmp, [addr]
2899 // cmp tmp, expected
2900 // strexeq success, new, [addr]
2901 // movne expected, tmp
2902 // cmpeq success, #0
2904 // mov dest, expected
2906 // Predication lets us move the strex ahead of the mov expected, tmp, which
2907 // allows tmp to be a non-infinite weight temporary. We wanted to avoid
2908 // writing to memory between ldrex and strex because, even though most times
2909 // that would cause no issues, if any interleaving memory write aliased
2910 // [addr] than we would have undefined behavior. Undefined behavior isn't
2911 // cool, so we try to avoid it. See the "Synchronization and semaphores"
2912 // section of the "ARM Architecture Reference Manual."
2914 assert(isScalarIntegerType(DestTy));
2915 // We require the memory address to be naturally aligned. Given that is the
2916 // case, then normal loads are atomic.
2917 if (!Intrinsics::isMemoryOrderValid(
2918 ID, getConstantMemoryOrder(Instr->getArg(3)),
2919 getConstantMemoryOrder(Instr->getArg(4)))) {
2920 Func->setError("Unexpected memory ordering for AtomicCmpxchg");
2924 OperandARM32Mem *Mem;
2926 Variable *Expected, *ExpectedReg;
2927 Variable *New, *NewReg;
2928 Variable *Success = makeReg(IceType_i32);
2929 Operand *_0 = Ctx->getConstantZero(IceType_i32);
2930 InstARM32Label *Retry = InstARM32Label::create(Func, this);
2932 if (DestTy == IceType_i64) {
2933 Variable64On32 *TmpReg64 = makeI64RegPair();
2934 Variable64On32 *New64 =
2935 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
2936 Variable64On32 *NewReg64 = makeI64RegPair();
2937 Variable64On32 *Expected64 =
2938 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
2939 Variable64On32 *ExpectedReg64 = makeI64RegPair();
2941 New64->initHiLo(Func);
2942 New64->mustNotHaveReg();
2943 Expected64->initHiLo(Func);
2944 Expected64->mustNotHaveReg();
2949 Expected = Expected64;
2950 ExpectedReg = ExpectedReg64;
2952 TmpReg = makeReg(DestTy);
2953 New = Func->makeVariable(DestTy);
2954 NewReg = makeReg(DestTy);
2955 Expected = Func->makeVariable(DestTy);
2956 ExpectedReg = makeReg(DestTy);
2959 Mem = formMemoryOperand(Instr->getArg(0), DestTy);
2960 if (DestTy == IceType_i64) {
2961 Context.insert(InstFakeDef::create(Func, Expected));
2963 lowerAssign(InstAssign::create(Func, Expected, Instr->getArg(1)));
2964 if (DestTy == IceType_i64) {
2965 Context.insert(InstFakeDef::create(Func, New));
2967 lowerAssign(InstAssign::create(Func, New, Instr->getArg(2)));
2970 Context.insert(Retry);
2971 if (DestTy == IceType_i64) {
2972 Context.insert(InstFakeDef::create(Func, ExpectedReg, Expected));
2974 lowerAssign(InstAssign::create(Func, ExpectedReg, Expected));
2975 if (DestTy == IceType_i64) {
2976 Context.insert(InstFakeDef::create(Func, NewReg, New));
2978 lowerAssign(InstAssign::create(Func, NewReg, New));
2980 _ldrex(TmpReg, Mem);
2982 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
2983 if (DestTy == IceType_i64) {
2984 auto *TmpReg64 = llvm::cast<Variable64On32>(TmpReg);
2985 auto *ExpectedReg64 = llvm::cast<Variable64On32>(ExpectedReg);
2986 // lowerAssign above has added fake-defs for TmpReg and ExpectedReg. Let's
2987 // keep liveness happy, shall we?
2988 Context.insert(InstFakeUse::create(Func, TmpReg));
2989 Context.insert(InstFakeUse::create(Func, ExpectedReg));
2990 _cmp(TmpReg64->getHi(), ExpectedReg64->getHi());
2991 _cmp(TmpReg64->getLo(), ExpectedReg64->getLo(), CondARM32::EQ);
2993 _cmp(TmpReg, ExpectedReg);
2995 _strex(Success, NewReg, Mem, CondARM32::EQ);
2996 if (DestTy == IceType_i64) {
2997 auto *TmpReg64 = llvm::cast<Variable64On32>(TmpReg);
2998 auto *Expected64 = llvm::cast<Variable64On32>(Expected);
2999 _mov_redefined(Expected64->getHi(), TmpReg64->getHi(), CondARM32::NE);
3000 _mov_redefined(Expected64->getLo(), TmpReg64->getLo(), CondARM32::NE);
3001 auto *FakeDef = InstFakeDef::create(Func, Expected, TmpReg);
3002 Context.insert(FakeDef);
3003 FakeDef->setDestRedefined();
3005 _mov_redefined(Expected, TmpReg, CondARM32::NE);
3007 _cmp(Success, _0, CondARM32::EQ);
3008 _br(Retry, CondARM32::NE);
3010 lowerAssign(InstAssign::create(Func, Dest, Expected));
3011 Context.insert(InstFakeUse::create(Func, Expected));
3012 if (auto *New64 = llvm::dyn_cast<Variable64On32>(New)) {
3013 Context.insert(InstFakeUse::create(Func, New64->getLo()));
3014 Context.insert(InstFakeUse::create(Func, New64->getHi()));
3016 Context.insert(InstFakeUse::create(Func, New));
3020 case Intrinsics::AtomicRMW: {
3021 if (!Intrinsics::isMemoryOrderValid(
3022 ID, getConstantMemoryOrder(Instr->getArg(3)))) {
3023 Func->setError("Unexpected memory ordering for AtomicRMW");
3027 Dest, static_cast<uint32_t>(
3028 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),
3029 Instr->getArg(1), Instr->getArg(2));
3032 case Intrinsics::Bswap: {
3033 Operand *Val = Instr->getArg(0);
3034 Type Ty = Val->getType();
3035 if (Ty == IceType_i64) {
3036 Val = legalizeUndef(Val);
3037 Variable *Val_Lo = legalizeToReg(loOperand(Val));
3038 Variable *Val_Hi = legalizeToReg(hiOperand(Val));
3039 Variable *T_Lo = makeReg(IceType_i32);
3040 Variable *T_Hi = makeReg(IceType_i32);
3041 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3042 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3048 assert(Ty == IceType_i32 || Ty == IceType_i16);
3049 Variable *ValR = legalizeToReg(Val);
3050 Variable *T = makeReg(Ty);
3052 if (Val->getType() == IceType_i16) {
3054 legalize(Ctx->getConstantInt32(16), Legal_Reg | Legal_Flex);
3055 _lsr(T, T, Sixteen);
3061 case Intrinsics::Ctpop: {
3062 Operand *Val = Instr->getArg(0);
3063 InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType())
3069 // The popcount helpers always return 32-bit values, while the intrinsic's
3070 // signature matches some 64-bit platform's native instructions and expect
3071 // to fill a 64-bit reg. Thus, clear the upper bits of the dest just in
3072 // case the user doesn't do that in the IR or doesn't toss the bits via
3074 if (Val->getType() == IceType_i64) {
3075 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3076 Constant *Zero = Ctx->getConstantZero(IceType_i32);
3077 Variable *T = makeReg(Zero->getType());
3083 case Intrinsics::Ctlz: {
3084 // The "is zero undef" parameter is ignored and we always return a
3085 // well-defined value.
3086 Operand *Val = Instr->getArg(0);
3088 Variable *ValHiR = nullptr;
3089 if (Val->getType() == IceType_i64) {
3090 Val = legalizeUndef(Val);
3091 ValLoR = legalizeToReg(loOperand(Val));
3092 ValHiR = legalizeToReg(hiOperand(Val));
3094 ValLoR = legalizeToReg(Val);
3096 lowerCLZ(Dest, ValLoR, ValHiR);
3099 case Intrinsics::Cttz: {
3100 // Essentially like Clz, but reverse the bits first.
3101 Operand *Val = Instr->getArg(0);
3103 Variable *ValHiR = nullptr;
3104 if (Val->getType() == IceType_i64) {
3105 Val = legalizeUndef(Val);
3106 ValLoR = legalizeToReg(loOperand(Val));
3107 ValHiR = legalizeToReg(hiOperand(Val));
3108 Variable *TLo = makeReg(IceType_i32);
3109 Variable *THi = makeReg(IceType_i32);
3115 ValLoR = legalizeToReg(Val);
3116 Variable *T = makeReg(IceType_i32);
3120 lowerCLZ(Dest, ValLoR, ValHiR);
3123 case Intrinsics::Fabs: {
3124 Type DestTy = Dest->getType();
3125 Variable *T = makeReg(DestTy);
3126 if (isVectorType(DestTy)) {
3127 // Add a fake def to keep liveness consistent in the meantime.
3128 Context.insert(InstFakeDef::create(Func, T));
3130 UnimplementedError(Func->getContext()->getFlags());
3133 _vabs(T, legalizeToReg(Instr->getArg(0)));
3137 case Intrinsics::Longjmp: {
3138 InstCall *Call = makeHelperCall(H_call_longjmp, nullptr, 2);
3139 Call->addArg(Instr->getArg(0));
3140 Call->addArg(Instr->getArg(1));
3144 case Intrinsics::Memcpy: {
3145 // In the future, we could potentially emit an inline memcpy/memset, etc.
3146 // for intrinsic calls w/ a known length.
3147 InstCall *Call = makeHelperCall(H_call_memcpy, nullptr, 3);
3148 Call->addArg(Instr->getArg(0));
3149 Call->addArg(Instr->getArg(1));
3150 Call->addArg(Instr->getArg(2));
3154 case Intrinsics::Memmove: {
3155 InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3);
3156 Call->addArg(Instr->getArg(0));
3157 Call->addArg(Instr->getArg(1));
3158 Call->addArg(Instr->getArg(2));
3162 case Intrinsics::Memset: {
3163 // The value operand needs to be extended to a stack slot size because the
3164 // PNaCl ABI requires arguments to be at least 32 bits wide.
3165 Operand *ValOp = Instr->getArg(1);
3166 assert(ValOp->getType() == IceType_i8);
3167 Variable *ValExt = Func->makeVariable(stackSlotType());
3168 lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp));
3169 // Technically, ARM has their own __aeabi_memset, but we can use plain
3170 // memset too. The value and size argument need to be flipped if we ever
3171 // decide to use __aeabi_memset.
3172 InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3);
3173 Call->addArg(Instr->getArg(0));
3174 Call->addArg(ValExt);
3175 Call->addArg(Instr->getArg(2));
3179 case Intrinsics::NaClReadTP: {
3180 if (Ctx->getFlags().getUseSandboxing()) {
3181 UnimplementedError(Func->getContext()->getFlags());
3183 InstCall *Call = makeHelperCall(H_call_read_tp, Dest, 0);
3188 case Intrinsics::Setjmp: {
3189 InstCall *Call = makeHelperCall(H_call_setjmp, Dest, 1);
3190 Call->addArg(Instr->getArg(0));
3194 case Intrinsics::Sqrt: {
3195 Variable *Src = legalizeToReg(Instr->getArg(0));
3196 Variable *T = makeReg(Dest->getType());
3201 case Intrinsics::Stacksave: {
3202 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
3206 case Intrinsics::Stackrestore: {
3207 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
3208 Operand *Val = legalize(Instr->getArg(0), Legal_Reg | Legal_Flex);
3209 _mov_redefined(SP, Val);
3212 case Intrinsics::Trap:
3215 case Intrinsics::UnknownIntrinsic:
3216 Func->setError("Should not be lowering UnknownIntrinsic");
3222 void TargetARM32::lowerCLZ(Variable *Dest, Variable *ValLoR, Variable *ValHiR) {
3223 Type Ty = Dest->getType();
3224 assert(Ty == IceType_i32 || Ty == IceType_i64);
3225 Variable *T = makeReg(IceType_i32);
3227 if (Ty == IceType_i64) {
3228 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3229 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3231 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
3232 Operand *ThirtyTwo =
3233 legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
3235 Variable *T2 = makeReg(IceType_i32);
3236 _add(T2, T, ThirtyTwo);
3237 _clz(T2, ValHiR, CondARM32::NE);
3238 // T2 is actually a source as well when the predicate is not AL (since it
3239 // may leave T2 alone). We use _set_dest_redefined to prolong the liveness
3240 // of T2 as if it was used as a source.
3241 _set_dest_redefined();
3243 Variable *T3 = makeReg(Zero->getType());
3252 void TargetARM32::lowerLoad(const InstLoad *Load) {
3253 // A Load instruction can be treated the same as an Assign instruction, after
3254 // the source operand is transformed into an OperandARM32Mem operand.
3255 Type Ty = Load->getDest()->getType();
3256 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
3257 Variable *DestLoad = Load->getDest();
3259 // TODO(jvoung): handled folding opportunities. Sign and zero extension can
3260 // be folded into a load.
3261 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);
3262 lowerAssign(Assign);
3265 void TargetARM32::doAddressOptLoad() {}
3267 void TargetARM32::randomlyInsertNop(float Probability,
3268 RandomNumberGenerator &RNG) {
3269 RandomNumberGeneratorWrapper RNGW(RNG);
3270 if (RNGW.getTrueWithProbability(Probability)) {
3271 UnimplementedError(Func->getContext()->getFlags());
3275 void TargetARM32::lowerPhi(const InstPhi * /*Inst*/) {
3276 Func->setError("Phi found in regular instruction list");
3279 void TargetARM32::lowerRet(const InstRet *Inst) {
3280 Variable *Reg = nullptr;
3281 if (Inst->hasRetValue()) {
3282 Operand *Src0 = Inst->getRetValue();
3283 Type Ty = Src0->getType();
3284 if (Ty == IceType_i64) {
3285 Src0 = legalizeUndef(Src0);
3286 Variable *R0 = legalizeToReg(loOperand(Src0), RegARM32::Reg_r0);
3287 Variable *R1 = legalizeToReg(hiOperand(Src0), RegARM32::Reg_r1);
3289 Context.insert(InstFakeUse::create(Func, R1));
3290 } else if (Ty == IceType_f32) {
3291 Variable *S0 = legalizeToReg(Src0, RegARM32::Reg_s0);
3293 } else if (Ty == IceType_f64) {
3294 Variable *D0 = legalizeToReg(Src0, RegARM32::Reg_d0);
3296 } else if (isVectorType(Src0->getType())) {
3297 Variable *Q0 = legalizeToReg(Src0, RegARM32::Reg_q0);
3300 Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex);
3301 Reg = makeReg(Src0F->getType(), RegARM32::Reg_r0);
3302 _mov(Reg, Src0F, CondARM32::AL);
3305 // Add a ret instruction even if sandboxing is enabled, because addEpilog
3306 // explicitly looks for a ret instruction as a marker for where to insert the
3307 // frame removal instructions. addEpilog is responsible for restoring the
3308 // "lr" register as needed prior to this ret instruction.
3309 _ret(getPhysicalRegister(RegARM32::Reg_lr), Reg);
3310 // Add a fake use of sp to make sure sp stays alive for the entire function.
3311 // Otherwise post-call sp adjustments get dead-code eliminated.
3312 // TODO: Are there more places where the fake use should be inserted? E.g.
3313 // "void f(int n){while(1) g(n);}" may not have a ret instruction.
3314 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
3315 Context.insert(InstFakeUse::create(Func, SP));
3318 void TargetARM32::lowerSelect(const InstSelect *Inst) {
3319 Variable *Dest = Inst->getDest();
3320 Type DestTy = Dest->getType();
3321 Operand *SrcT = Inst->getTrueOperand();
3322 Operand *SrcF = Inst->getFalseOperand();
3323 Operand *Condition = Inst->getCondition();
3325 if (isVectorType(DestTy)) {
3326 Variable *T = makeReg(DestTy);
3327 Context.insert(InstFakeDef::create(Func, T));
3329 UnimplementedError(Func->getContext()->getFlags());
3332 // TODO(jvoung): handle folding opportunities.
3333 // cmp cond, #0; mov t, SrcF; mov_cond t, SrcT; mov dest, t
3334 Variable *CmpOpnd0 = legalizeToReg(Condition);
3335 Type CmpOpnd0Ty = CmpOpnd0->getType();
3336 Operand *CmpOpnd1 = Ctx->getConstantZero(IceType_i32);
3337 assert(CmpOpnd0Ty == IceType_i1);
3338 if (CmpOpnd0Ty != IceType_i32)
3339 _uxt(CmpOpnd0, CmpOpnd0);
3340 _cmp(CmpOpnd0, CmpOpnd1);
3341 static constexpr CondARM32::Cond Cond = CondARM32::NE;
3342 if (DestTy == IceType_i64) {
3343 SrcT = legalizeUndef(SrcT);
3344 SrcF = legalizeUndef(SrcF);
3345 // Set the low portion.
3346 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3347 Operand *SrcFLo = legalize(loOperand(SrcF), Legal_Reg | Legal_Flex);
3348 Variable *TLo = makeReg(SrcFLo->getType());
3350 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Flex);
3351 _mov_redefined(TLo, SrcTLo, Cond);
3353 // Set the high portion.
3354 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3355 Operand *SrcFHi = legalize(hiOperand(SrcF), Legal_Reg | Legal_Flex);
3356 Variable *THi = makeReg(SrcFHi->getType());
3358 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Flex);
3359 _mov_redefined(THi, SrcTHi, Cond);
3364 if (isFloatingType(DestTy)) {
3365 Variable *T = makeReg(DestTy);
3366 SrcF = legalizeToReg(SrcF);
3367 assert(DestTy == SrcF->getType());
3369 SrcT = legalizeToReg(SrcT);
3370 assert(DestTy == SrcT->getType());
3371 _mov(T, SrcT, Cond);
3372 _set_dest_redefined();
3377 SrcF = legalize(SrcF, Legal_Reg | Legal_Flex);
3378 Variable *T = makeReg(SrcF->getType());
3380 SrcT = legalize(SrcT, Legal_Reg | Legal_Flex);
3381 _mov_redefined(T, SrcT, Cond);
3385 void TargetARM32::lowerStore(const InstStore *Inst) {
3386 Operand *Value = Inst->getData();
3387 Operand *Addr = Inst->getAddr();
3388 OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
3389 Type Ty = NewAddr->getType();
3391 if (Ty == IceType_i64) {
3392 Value = legalizeUndef(Value);
3393 Variable *ValueHi = legalizeToReg(hiOperand(Value));
3394 Variable *ValueLo = legalizeToReg(loOperand(Value));
3395 _str(ValueHi, llvm::cast<OperandARM32Mem>(hiOperand(NewAddr)));
3396 _str(ValueLo, llvm::cast<OperandARM32Mem>(loOperand(NewAddr)));
3398 Variable *ValueR = legalizeToReg(Value);
3399 _str(ValueR, NewAddr);
3403 void TargetARM32::doAddressOptStore() {}
3405 void TargetARM32::lowerSwitch(const InstSwitch *Inst) {
3406 // This implements the most naive possible lowering.
3407 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
3408 Operand *Src0 = Inst->getComparison();
3409 SizeT NumCases = Inst->getNumCases();
3410 if (Src0->getType() == IceType_i64) {
3411 Src0 = legalizeUndef(Src0);
3412 Variable *Src0Lo = legalizeToReg(loOperand(Src0));
3413 Variable *Src0Hi = legalizeToReg(hiOperand(Src0));
3414 for (SizeT I = 0; I < NumCases; ++I) {
3415 Operand *ValueLo = Ctx->getConstantInt32(Inst->getValue(I));
3416 Operand *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32);
3417 ValueLo = legalize(ValueLo, Legal_Reg | Legal_Flex);
3418 ValueHi = legalize(ValueHi, Legal_Reg | Legal_Flex);
3419 _cmp(Src0Lo, ValueLo);
3420 _cmp(Src0Hi, ValueHi, CondARM32::EQ);
3421 _br(Inst->getLabel(I), CondARM32::EQ);
3423 _br(Inst->getLabelDefault());
3427 Variable *Src0Var = legalizeToReg(Src0);
3428 // If Src0 is not an i32, we left shift it -- see the icmp lowering for the
3430 assert(Src0Var->mustHaveReg());
3431 const size_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());
3432 assert(ShiftAmt < 32);
3434 Operand *ShiftConst = Ctx->getConstantInt32(ShiftAmt);
3435 Variable *T = makeReg(IceType_i32);
3436 _lsl(T, Src0Var, ShiftConst);
3440 for (SizeT I = 0; I < NumCases; ++I) {
3441 Operand *Value = Ctx->getConstantInt32(Inst->getValue(I) << ShiftAmt);
3442 Value = legalize(Value, Legal_Reg | Legal_Flex);
3443 _cmp(Src0Var, Value);
3444 _br(Inst->getLabel(I), CondARM32::EQ);
3446 _br(Inst->getLabelDefault());
3449 void TargetARM32::lowerUnreachable(const InstUnreachable * /*Inst*/) {
3453 void TargetARM32::prelowerPhis() {
3454 PhiLowering::prelowerPhis32Bit<TargetARM32>(this, Context.getNode(), Func);
3457 Variable *TargetARM32::makeVectorOfZeros(Type Ty, int32_t RegNum) {
3458 Variable *Reg = makeReg(Ty, RegNum);
3459 Context.insert(InstFakeDef::create(Func, Reg));
3460 UnimplementedError(Func->getContext()->getFlags());
3464 // Helper for legalize() to emit the right code to lower an operand to a
3465 // register of the appropriate type.
3466 Variable *TargetARM32::copyToReg(Operand *Src, int32_t RegNum) {
3467 Type Ty = Src->getType();
3468 Variable *Reg = makeReg(Ty, RegNum);
3473 Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
3475 Type Ty = From->getType();
3476 // Assert that a physical register is allowed. To date, all calls to
3477 // legalize() allow a physical register. Legal_Flex converts registers to the
3478 // right type OperandARM32FlexReg as needed.
3479 assert(Allowed & Legal_Reg);
3480 // Go through the various types of operands: OperandARM32Mem,
3481 // OperandARM32Flex, Constant, and Variable. Given the above assertion, if
3482 // type of operand is not legal (e.g., OperandARM32Mem and !Legal_Mem), we
3483 // can always copy to a register.
3484 if (auto Mem = llvm::dyn_cast<OperandARM32Mem>(From)) {
3485 static const struct {
3489 #define X(tag, elementty, int_width, vec_width, sbits, ubits, rraddr) \
3490 { (ubits) > 0, rraddr } \
3495 // Before doing anything with a Mem operand, we need to ensure that the
3496 // Base and Index components are in physical registers.
3497 Variable *Base = Mem->getBase();
3498 Variable *Index = Mem->getIndex();
3499 ConstantInteger32 *Offset = Mem->getOffset();
3500 assert(Index == nullptr || Offset == nullptr);
3501 Variable *RegBase = nullptr;
3502 Variable *RegIndex = nullptr;
3504 RegBase = legalizeToReg(Base);
3507 RegIndex = legalizeToReg(Index);
3508 if (!MemTraits[Ty].CanHaveIndex) {
3509 Variable *T = makeReg(IceType_i32, getReservedTmpReg());
3510 _add(T, RegBase, RegIndex);
3515 if (Offset && Offset->getValue() != 0) {
3516 static constexpr bool SignExt = false;
3517 if (!MemTraits[Ty].CanHaveOffset ||
3518 !OperandARM32Mem::canHoldOffset(Ty, SignExt, Offset->getValue())) {
3519 Variable *T = legalizeToReg(Offset, getReservedTmpReg());
3520 _add(T, T, RegBase);
3522 Offset = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(0));
3526 // Create a new operand if there was a change.
3527 if (Base != RegBase || Index != RegIndex) {
3528 // There is only a reg +/- reg or reg + imm form.
3529 // Figure out which to re-create.
3530 if (RegBase && RegIndex) {
3531 Mem = OperandARM32Mem::create(Func, Ty, RegBase, RegIndex,
3532 Mem->getShiftOp(), Mem->getShiftAmt(),
3533 Mem->getAddrMode());
3535 Mem = OperandARM32Mem::create(Func, Ty, RegBase, Offset,
3536 Mem->getAddrMode());
3539 if (Allowed & Legal_Mem) {
3542 Variable *Reg = makeReg(Ty, RegNum);
3549 if (auto Flex = llvm::dyn_cast<OperandARM32Flex>(From)) {
3550 if (!(Allowed & Legal_Flex)) {
3551 if (auto FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) {
3552 if (FlexReg->getShiftOp() == OperandARM32::kNoShift) {
3553 From = FlexReg->getReg();
3554 // Fall through and let From be checked as a Variable below, where it
3555 // may or may not need a register.
3557 return copyToReg(Flex, RegNum);
3560 return copyToReg(Flex, RegNum);
3567 if (llvm::isa<Constant>(From)) {
3568 if (llvm::isa<ConstantUndef>(From)) {
3569 From = legalizeUndef(From, RegNum);
3570 if (isVectorType(Ty))
3573 // There should be no constants of vector type (other than undef).
3574 assert(!isVectorType(Ty));
3575 bool CanBeFlex = Allowed & Legal_Flex;
3576 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
3579 uint32_t Value = static_cast<uint32_t>(C32->getValue());
3580 // Check if the immediate will fit in a Flexible second operand, if a
3581 // Flexible second operand is allowed. We need to know the exact value,
3582 // so that rules out relocatable constants. Also try the inverse and use
3585 OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) {
3586 return OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
3587 } else if (CanBeFlex && OperandARM32FlexImm::canHoldImm(
3588 ~Value, &RotateAmt, &Immed_8)) {
3590 OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
3591 Variable *Reg = makeReg(Ty, RegNum);
3592 _mvn(Reg, InvertedFlex);
3595 // Do a movw/movt to a register.
3596 Variable *Reg = makeReg(Ty, RegNum);
3597 uint32_t UpperBits = (Value >> 16) & 0xFFFF;
3599 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32);
3600 if (UpperBits != 0) {
3601 _movt(Reg, Ctx->getConstantInt32(UpperBits));
3605 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {
3606 Variable *Reg = makeReg(Ty, RegNum);
3611 assert(isScalarFloatingType(Ty));
3612 // Load floats/doubles from literal pool.
3613 // TODO(jvoung): Allow certain immediates to be encoded directly in an
3614 // operand. See Table A7-18 of the ARM manual: "Floating-point modified
3615 // immediate constants". Or, for 32-bit floating point numbers, just
3616 // encode the raw bits into a movw/movt pair to GPR, and vmov to an SREG,
3617 // instead of using a movw/movt pair to get the const-pool address then
3620 llvm::raw_string_ostream StrBuf(Buffer);
3621 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx);
3622 llvm::cast<Constant>(From)->setShouldBePooled(true);
3623 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);
3624 Variable *BaseReg = makeReg(getPointerType());
3625 _movw(BaseReg, Offset);
3626 _movt(BaseReg, Offset);
3627 From = formMemoryOperand(BaseReg, Ty);
3628 return copyToReg(From, RegNum);
3632 if (auto Var = llvm::dyn_cast<Variable>(From)) {
3633 // Check if the variable is guaranteed a physical register. This can happen
3634 // either when the variable is pre-colored or when it is assigned infinite
3636 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
3637 // We need a new physical register for the operand if:
3638 // Mem is not allowed and Var isn't guaranteed a physical
3640 // RegNum is required and Var->getRegNum() doesn't match.
3641 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
3642 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
3643 From = copyToReg(From, RegNum);
3647 llvm_unreachable("Unhandled operand kind in legalize()");
3652 /// Provide a trivial wrapper to legalize() for this common usage.
3653 Variable *TargetARM32::legalizeToReg(Operand *From, int32_t RegNum) {
3654 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
3657 /// Legalize undef values to concrete values.
3658 Operand *TargetARM32::legalizeUndef(Operand *From, int32_t RegNum) {
3659 Type Ty = From->getType();
3660 if (llvm::isa<ConstantUndef>(From)) {
3661 // Lower undefs to zero. Another option is to lower undefs to an
3662 // uninitialized register; however, using an uninitialized register results
3663 // in less predictable code.
3665 // If in the future the implementation is changed to lower undef values to
3666 // uninitialized registers, a FakeDef will be needed:
3667 // Context.insert(InstFakeDef::create(Func, Reg)); This is in order to
3668 // ensure that the live range of Reg is not overestimated. If the constant
3669 // being lowered is a 64 bit value, then the result should be split and the
3670 // lo and hi components will need to go in uninitialized registers.
3671 if (isVectorType(Ty))
3672 return makeVectorOfZeros(Ty, RegNum);
3673 return Ctx->getConstantZero(Ty);
3678 OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) {
3679 OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand);
3680 // It may be the case that address mode optimization already creates an
3681 // OperandARM32Mem, so in that case it wouldn't need another level of
3684 return llvm::cast<OperandARM32Mem>(legalize(Mem));
3686 // If we didn't do address mode optimization, then we only have a base/offset
3687 // to work with. ARM always requires a base register, so just use that to
3688 // hold the operand.
3689 Variable *Base = legalizeToReg(Operand);
3690 return OperandARM32Mem::create(
3692 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)));
3695 Variable64On32 *TargetARM32::makeI64RegPair() {
3696 Variable64On32 *Reg =
3697 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
3698 Reg->setMustHaveReg();
3699 Reg->initHiLo(Func);
3700 Reg->getLo()->setMustNotHaveReg();
3701 Reg->getHi()->setMustNotHaveReg();
3705 Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) {
3706 // There aren't any 64-bit integer registers for ARM32.
3707 assert(Type != IceType_i64);
3708 Variable *Reg = Func->makeVariable(Type);
3709 if (RegNum == Variable::NoRegister)
3710 Reg->setMustHaveReg();
3712 Reg->setRegNum(RegNum);
3716 void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) {
3717 assert(llvm::isPowerOf2_32(Align));
3721 // Use AND or BIC to mask off the bits, depending on which immediate fits (if
3722 // it fits at all). Assume Align is usually small, in which case BIC works
3723 // better. Thus, this rounds down to the alignment.
3724 if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) {
3725 Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex);
3726 _bic(Reg, Reg, Mask);
3728 Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex);
3729 _and(Reg, Reg, Mask);
3733 void TargetARM32::postLower() {
3734 if (Ctx->getFlags().getOptLevel() == Opt_m1)
3736 markRedefinitions();
3739 void TargetARM32::makeRandomRegisterPermutation(
3740 llvm::SmallVectorImpl<int32_t> &Permutation,
3741 const llvm::SmallBitVector &ExcludeRegisters, uint64_t Salt) const {
3743 (void)ExcludeRegisters;
3745 UnimplementedError(Func->getContext()->getFlags());
3748 void TargetARM32::emit(const ConstantInteger32 *C) const {
3749 if (!BuildDefs::dump())
3751 Ostream &Str = Ctx->getStrEmit();
3752 Str << getConstantPrefix() << C->getValue();
3755 void TargetARM32::emit(const ConstantInteger64 *) const {
3756 llvm::report_fatal_error("Not expecting to emit 64-bit integers");
3759 void TargetARM32::emit(const ConstantFloat *C) const {
3761 UnimplementedError(Ctx->getFlags());
3764 void TargetARM32::emit(const ConstantDouble *C) const {
3766 UnimplementedError(Ctx->getFlags());
3769 void TargetARM32::emit(const ConstantUndef *) const {
3770 llvm::report_fatal_error("undef value encountered by emitter.");
3773 TargetDataARM32::TargetDataARM32(GlobalContext *Ctx)
3774 : TargetDataLowering(Ctx) {}
3776 void TargetDataARM32::lowerGlobals(const VariableDeclarationList &Vars,
3777 const IceString &SectionSuffix) {
3778 switch (Ctx->getFlags().getOutFileType()) {
3780 ELFObjectWriter *Writer = Ctx->getObjectWriter();
3781 Writer->writeDataSection(Vars, llvm::ELF::R_ARM_ABS32, SectionSuffix);
3785 const IceString &TranslateOnly = Ctx->getFlags().getTranslateOnly();
3786 OstreamLocker L(Ctx);
3787 for (const VariableDeclaration *Var : Vars) {
3788 if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) {
3789 emitGlobal(*Var, SectionSuffix);
3797 template <typename T> struct ConstantPoolEmitterTraits;
3799 static_assert(sizeof(uint64_t) == 8,
3800 "uint64_t is supposed to be 8 bytes wide.");
3802 // TODO(jpp): implement the following when implementing constant randomization:
3803 // * template <> struct ConstantPoolEmitterTraits<uint8_t>
3804 // * template <> struct ConstantPoolEmitterTraits<uint16_t>
3805 // * template <> struct ConstantPoolEmitterTraits<uint32_t>
3806 template <> struct ConstantPoolEmitterTraits<float> {
3807 using ConstantType = ConstantFloat;
3808 static constexpr Type IceType = IceType_f32;
3809 // AsmTag and TypeName can't be constexpr because llvm::StringRef is unhappy
3810 // about them being constexpr.
3811 static const char AsmTag[];
3812 static const char TypeName[];
3813 static uint64_t bitcastToUint64(float Value) {
3814 static_assert(sizeof(Value) == sizeof(uint32_t),
3815 "Float should be 4 bytes.");
3816 uint32_t IntValue = *reinterpret_cast<uint32_t *>(&Value);
3817 return static_cast<uint64_t>(IntValue);
3820 const char ConstantPoolEmitterTraits<float>::AsmTag[] = ".long";
3821 const char ConstantPoolEmitterTraits<float>::TypeName[] = "f32";
3823 template <> struct ConstantPoolEmitterTraits<double> {
3824 using ConstantType = ConstantDouble;
3825 static constexpr Type IceType = IceType_f64;
3826 static const char AsmTag[];
3827 static const char TypeName[];
3828 static uint64_t bitcastToUint64(double Value) {
3829 static_assert(sizeof(double) == sizeof(uint64_t),
3830 "Double should be 8 bytes.");
3831 return *reinterpret_cast<uint64_t *>(&Value);
3834 const char ConstantPoolEmitterTraits<double>::AsmTag[] = ".quad";
3835 const char ConstantPoolEmitterTraits<double>::TypeName[] = "f64";
3837 template <typename T>
3839 Ostream &Str, const GlobalContext *Ctx,
3840 const typename ConstantPoolEmitterTraits<T>::ConstantType *Const) {
3841 using Traits = ConstantPoolEmitterTraits<T>;
3842 Const->emitPoolLabel(Str, Ctx);
3843 Str << ":\n\t" << Traits::AsmTag << "\t0x";
3844 T Value = Const->getValue();
3845 Str.write_hex(Traits::bitcastToUint64(Value));
3846 Str << "\t@" << Traits::TypeName << " " << Value << "\n";
3849 template <typename T> void emitConstantPool(GlobalContext *Ctx) {
3850 if (!BuildDefs::dump()) {
3854 using Traits = ConstantPoolEmitterTraits<T>;
3855 static constexpr size_t MinimumAlignment = 4;
3856 SizeT Align = std::max(MinimumAlignment, typeAlignInBytes(Traits::IceType));
3857 assert((Align % 4) == 0 && "Constants should be aligned");
3858 Ostream &Str = Ctx->getStrEmit();
3859 ConstantList Pool = Ctx->getConstantPool(Traits::IceType);
3861 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",%progbits," << Align
3863 << "\t.align\t" << Align << "\n";
3865 if (Ctx->getFlags().shouldReorderPooledConstants()) {
3866 // TODO(jpp): add constant pooling.
3867 UnimplementedError(Ctx->getFlags());
3870 for (Constant *C : Pool) {
3871 if (!C->getShouldBePooled()) {
3875 emitConstant<T>(Str, Ctx, llvm::dyn_cast<typename Traits::ConstantType>(C));
3878 } // end of anonymous namespace
3880 void TargetDataARM32::lowerConstants() {
3881 if (Ctx->getFlags().getDisableTranslation())
3883 switch (Ctx->getFlags().getOutFileType()) {
3885 UnimplementedError(Ctx->getFlags());
3889 OstreamLocker L(Ctx);
3890 emitConstantPool<float>(Ctx);
3891 emitConstantPool<double>(Ctx);
3897 void TargetDataARM32::lowerJumpTables() {
3898 if (Ctx->getFlags().getDisableTranslation())
3900 switch (Ctx->getFlags().getOutFileType()) {
3902 UnimplementedError(Ctx->getFlags());
3905 // Already emitted from Cfg
3908 // TODO(kschimpf): Fill this in when we get more information.
3914 TargetHeaderARM32::TargetHeaderARM32(GlobalContext *Ctx)
3915 : TargetHeaderLowering(Ctx), CPUFeatures(Ctx->getFlags()) {}
3917 void TargetHeaderARM32::lower() {
3918 OstreamLocker L(Ctx);
3919 Ostream &Str = Ctx->getStrEmit();
3920 Str << ".syntax unified\n";
3921 // Emit build attributes in format: .eabi_attribute TAG, VALUE. See Sec. 2 of
3922 // "Addenda to, and Errata in the ABI for the ARM architecture"
3923 // http://infocenter.arm.com
3924 // /help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf
3926 // Tag_conformance should be be emitted first in a file-scope sub-subsection
3927 // of the first public subsection of the attributes.
3928 Str << ".eabi_attribute 67, \"2.09\" @ Tag_conformance\n";
3929 // Chromebooks are at least A15, but do A9 for higher compat. For some
3930 // reason, the LLVM ARM asm parser has the .cpu directive override the mattr
3931 // specified on the commandline. So to test hwdiv, we need to set the .cpu
3932 // directive higher (can't just rely on --mattr=...).
3933 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
3934 Str << ".cpu cortex-a15\n";
3936 Str << ".cpu cortex-a9\n";
3938 Str << ".eabi_attribute 6, 10 @ Tag_CPU_arch: ARMv7\n"
3939 << ".eabi_attribute 7, 65 @ Tag_CPU_arch_profile: App profile\n";
3940 Str << ".eabi_attribute 8, 1 @ Tag_ARM_ISA_use: Yes\n"
3941 << ".eabi_attribute 9, 2 @ Tag_THUMB_ISA_use: Thumb-2\n";
3942 Str << ".fpu neon\n"
3943 << ".eabi_attribute 17, 1 @ Tag_ABI_PCS_GOT_use: permit directly\n"
3944 << ".eabi_attribute 20, 1 @ Tag_ABI_FP_denormal\n"
3945 << ".eabi_attribute 21, 1 @ Tag_ABI_FP_exceptions\n"
3946 << ".eabi_attribute 23, 3 @ Tag_ABI_FP_number_model: IEEE 754\n"
3947 << ".eabi_attribute 34, 1 @ Tag_CPU_unaligned_access\n"
3948 << ".eabi_attribute 24, 1 @ Tag_ABI_align_needed: 8-byte\n"
3949 << ".eabi_attribute 25, 1 @ Tag_ABI_align_preserved: 8-byte\n"
3950 << ".eabi_attribute 28, 1 @ Tag_ABI_VFP_args\n"
3951 << ".eabi_attribute 36, 1 @ Tag_FP_HP_extension\n"
3952 << ".eabi_attribute 38, 1 @ Tag_ABI_FP_16bit_format\n"
3953 << ".eabi_attribute 42, 1 @ Tag_MPextension_use\n"
3954 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n";
3955 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
3956 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n";
3958 // Technically R9 is used for TLS with Sandboxing, and we reserve it.
3959 // However, for compatibility with current NaCl LLVM, don't claim that.
3960 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
3963 } // end of namespace Ice