if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Var))
Var64On32->initHiLo(this);
- processAllocas();
-
// The set of translation passes and their order are determined by the
// target.
getTarget()->translate();
getTarget()->lowerArguments();
}
-void Cfg::sortAllocas(CfgVector<Inst *> &Allocas, InstList &Insts,
- bool IsKnownFrameOffset) {
+void Cfg::sortAndCombineAllocas(CfgVector<Inst *> &Allocas,
+ uint32_t CombinedAlignment, InstList &Insts,
+ AllocaBaseVariableType BaseVariableType) {
if (Allocas.empty())
return;
- // Sort by decreasing alignment. This does not really matter at the moment,
- // but will allow compacting stack allocation when we fuse to one alloca.
+ // Sort by decreasing alignment.
std::sort(Allocas.begin(), Allocas.end(), [](Inst *I1, Inst *I2) {
auto *A1 = llvm::dyn_cast<InstAlloca>(I1);
auto *A2 = llvm::dyn_cast<InstAlloca>(I2);
return A1->getAlignInBytes() > A2->getAlignInBytes();
});
+ // Process the allocas in order of decreasing stack alignment. This allows
+ // us to pack less-aligned pieces after more-aligned ones, resulting in less
+ // stack growth. It also allows there to be at most one stack alignment "and"
+ // instruction for a whole list of allocas.
+ uint32_t CurrentOffset = 0;
+ CfgVector<int32_t> Offsets;
for (Inst *Instr : Allocas) {
auto *Alloca = llvm::cast<InstAlloca>(Instr);
- // Move the alloca to its sorted position.
- InstAlloca *NewAlloca =
- InstAlloca::create(this, Alloca->getSizeInBytes(),
- Alloca->getAlignInBytes(), Alloca->getDest());
- if (IsKnownFrameOffset)
- NewAlloca->setKnownFrameOffset();
- Insts.push_front(NewAlloca);
+ // Adjust the size of the allocation up to the next multiple of the
+ // object's alignment.
+ uint32_t Alignment = std::max(Alloca->getAlignInBytes(), 1u);
+ auto *ConstSize =
+ llvm::dyn_cast<ConstantInteger32>(Alloca->getSizeInBytes());
+ uint32_t Size = Utils::applyAlignment(ConstSize->getValue(), Alignment);
+ if (BaseVariableType == BVT_FramePointer) {
+ // Addressing is relative to the frame pointer. Subtract the offset after
+ // adding the size of the alloca, because it grows downwards from the
+ // frame pointer.
+ Offsets.push_back(-(CurrentOffset + Size));
+ } else {
+ // Addressing is relative to the stack pointer or to a user pointer. Add
+ // the offset before adding the size of the object, because it grows
+ // upwards from the stack pointer.
+ Offsets.push_back(CurrentOffset);
+ }
+ // Update the running offset of the fused alloca region.
+ CurrentOffset += Size;
+ }
+ // Round the offset up to the alignment granularity to use as the size.
+ uint32_t TotalSize = Utils::applyAlignment(CurrentOffset, CombinedAlignment);
+ // Ensure every alloca was assigned an offset.
+ assert(Allocas.size() == Offsets.size());
+ Variable *BaseVariable = makeVariable(IceType_i32);
+ Variable *AllocaDest = BaseVariable;
+ // Emit one addition for each alloca after the first.
+ for (size_t i = 0; i < Allocas.size(); ++i) {
+ auto *Alloca = llvm::cast<InstAlloca>(Allocas[i]);
+ switch (BaseVariableType) {
+ case BVT_FramePointer:
+ case BVT_UserPointer: {
+ // Emit a new addition operation to replace the alloca.
+ Operand *AllocaOffset = Ctx->getConstantInt32(Offsets[i]);
+ InstArithmetic *Add =
+ InstArithmetic::create(this, InstArithmetic::Add, Alloca->getDest(),
+ BaseVariable, AllocaOffset);
+ Insts.push_front(Add);
+ } break;
+ case BVT_StackPointer: {
+ // Emit a fake definition of the rematerializable variable.
+ Variable *Dest = Alloca->getDest();
+ InstFakeDef *Def = InstFakeDef::create(this, Dest);
+ Dest->setRematerializable(getTarget()->getStackReg(), Offsets[i]);
+ Insts.push_front(Def);
+ } break;
+ }
Alloca->setDeleted();
}
+ Operand *AllocaSize = Ctx->getConstantInt32(TotalSize);
+ switch (BaseVariableType) {
+ case BVT_FramePointer: {
+ // Adjust the return of the alloca to the top of the returned region.
+ AllocaDest = makeVariable(IceType_i32);
+ InstArithmetic *Add = InstArithmetic::create(
+ this, InstArithmetic::Add, BaseVariable, AllocaDest, AllocaSize);
+ Insts.push_front(Add);
+ } break;
+ case BVT_StackPointer: {
+ // Emit a fake use to keep the Alloca live.
+ InstFakeUse *Use = InstFakeUse::create(this, AllocaDest);
+ Insts.push_front(Use);
+ } break;
+ case BVT_UserPointer:
+ break;
+ }
+ // And insert the fused alloca.
+ InstAlloca *CombinedAlloca =
+ InstAlloca::create(this, AllocaSize, CombinedAlignment, AllocaDest);
+ CombinedAlloca->setKnownFrameOffset();
+ Insts.push_front(CombinedAlloca);
}
-void Cfg::processAllocas() {
+void Cfg::processAllocas(bool SortAndCombine) {
const uint32_t StackAlignment = getTarget()->getStackAlignment();
CfgNode *EntryNode = getEntryNode();
- // Allocas in the entry block that have constant size and alignment less
- // than or equal to the function's stack alignment.
- CfgVector<Inst *> FixedAllocas;
- // Allocas in the entry block that have constant size and alignment greater
- // than the function's stack alignment.
- CfgVector<Inst *> AlignedAllocas;
// LLVM enforces power of 2 alignment.
assert(llvm::isPowerOf2_32(StackAlignment));
- // Collect the Allocas into the two vectors.
- bool RequiresFramePointer = false;
+ // Determine if there are large alignment allocations in the entry block or
+ // dynamic allocations (variable size in the entry block).
+ bool HasLargeAlignment = false;
+ bool HasDynamicAllocation = false;
for (Inst &Instr : EntryNode->getInsts()) {
if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) {
- if (!llvm::isa<Constant>(Alloca->getSizeInBytes())) {
- // Variable-sized allocations require a frame pointer.
- RequiresFramePointer = true;
- continue;
- }
uint32_t AlignmentParam = Alloca->getAlignInBytes();
- // For default align=0, set it to the real value 1, to avoid any
- // bit-manipulation problems below.
- AlignmentParam = std::max(AlignmentParam, 1u);
- assert(llvm::isPowerOf2_32(AlignmentParam));
- if (AlignmentParam > StackAlignment) {
- // Allocations aligned more than the stack require a frame pointer.
- RequiresFramePointer = true;
- AlignedAllocas.push_back(Alloca);
- } else
- FixedAllocas.push_back(Alloca);
+ if (AlignmentParam > StackAlignment)
+ HasLargeAlignment = true;
+ if (llvm::isa<Constant>(Alloca->getSizeInBytes()))
+ Alloca->setKnownFrameOffset();
+ else {
+ HasDynamicAllocation = true;
+ // If Allocas are not sorted, the first dynamic allocation causes
+ // later Allocas to be at unknown offsets relative to the stack/frame.
+ if (!SortAndCombine)
+ break;
+ }
}
}
- // Look for alloca instructions in other blocks
+ // Don't do the heavyweight sorting and layout for low optimization levels.
+ if (!SortAndCombine)
+ return;
+ // Any alloca outside the entry block is a dynamic allocation.
for (CfgNode *Node : Nodes) {
if (Node == EntryNode)
continue;
for (Inst &Instr : Node->getInsts()) {
if (llvm::isa<InstAlloca>(&Instr)) {
// Allocations outside the entry block require a frame pointer.
- RequiresFramePointer = true;
+ HasDynamicAllocation = true;
break;
}
}
- if (RequiresFramePointer)
+ if (HasDynamicAllocation)
break;
}
// Mark the target as requiring a frame pointer.
- if (RequiresFramePointer)
+ if (HasLargeAlignment || HasDynamicAllocation)
getTarget()->setHasFramePointer();
+ // Collect the Allocas into the two vectors.
+ // Allocas in the entry block that have constant size and alignment less
+ // than or equal to the function's stack alignment.
+ CfgVector<Inst *> FixedAllocas;
+ // Allocas in the entry block that have constant size and alignment greater
+ // than the function's stack alignment.
+ CfgVector<Inst *> AlignedAllocas;
+ // Maximum alignment used for the dynamic/aligned allocas.
+ uint32_t MaxAlignment = StackAlignment;
+ for (Inst &Instr : EntryNode->getInsts()) {
+ if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) {
+ if (!llvm::isa<Constant>(Alloca->getSizeInBytes()))
+ continue;
+ uint32_t AlignmentParam = Alloca->getAlignInBytes();
+ // For default align=0, set it to the real value 1, to avoid any
+ // bit-manipulation problems below.
+ AlignmentParam = std::max(AlignmentParam, 1u);
+ assert(llvm::isPowerOf2_32(AlignmentParam));
+ if (HasDynamicAllocation && AlignmentParam > StackAlignment) {
+ // If we have both dynamic allocations and large stack alignments,
+ // high-alignment allocations are pulled out with their own base.
+ AlignedAllocas.push_back(Alloca);
+ } else {
+ FixedAllocas.push_back(Alloca);
+ }
+ MaxAlignment = std::max(AlignmentParam, MaxAlignment);
+ }
+ }
// Add instructions to the head of the entry block in reverse order.
InstList &Insts = getEntryNode()->getInsts();
- // Fixed, large alignment alloca addresses do not have known offset.
- sortAllocas(AlignedAllocas, Insts, false);
- // Fixed, small alignment alloca addresses have known offset.
- sortAllocas(FixedAllocas, Insts, true);
+ if (HasDynamicAllocation && HasLargeAlignment) {
+ // We are using a frame pointer, but fixed large-alignment alloca addresses,
+ // do not have a known offset from either the stack or frame pointer.
+ // They grow up from a user pointer from an alloca.
+ sortAndCombineAllocas(AlignedAllocas, MaxAlignment, Insts, BVT_UserPointer);
+ }
+ // Otherwise, fixed size allocas are always addressed relative to the stack
+ // unless there are dynamic allocas.
+ // TODO(sehr): re-enable frame pointer and decrementing addressing.
+ AllocaBaseVariableType BasePointerType =
+ (HasDynamicAllocation ? BVT_UserPointer : BVT_StackPointer);
+ sortAndCombineAllocas(FixedAllocas, MaxAlignment, Insts, BasePointerType);
}
void Cfg::doAddressOpt() {
void advancedPhiLowering();
void reorderNodes();
void shuffleNodes();
- void sortAllocas(CfgVector<Inst *> &Allocas, InstList &Insts,
- bool IsKnownFrameOffset);
- /// Merge all the fixed-size allocas in the entry block.
- void processAllocas();
+
+ enum AllocaBaseVariableType {
+ BVT_StackPointer,
+ BVT_FramePointer,
+ BVT_UserPointer
+ };
+ void sortAndCombineAllocas(CfgVector<Inst *> &Allocas,
+ uint32_t CombinedAlignment, InstList &Insts,
+ AllocaBaseVariableType BaseVariableType);
+ /// Scan allocas to determine whether we need to use a frame pointer.
+ /// If SortAndCombine == true, merge all the fixed-size allocas in the
+ /// entry block and emit stack or frame pointer-relative addressing.
+ void processAllocas(bool SortAndCombine);
void doAddressOpt();
void doArgLowering();
void doNopInsertion();
void MachineTraits<TargetX8632>::X86OperandMem::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
+ const ::Ice::TargetLowering *Target = Func->getTarget();
+ // If the base is rematerializable, we need to replace it with the correct
+ // physical register (esp or ebp), and update the Offset.
+ int32_t Disp = 0;
+ if (getBase() && getBase()->isRematerializable()) {
+ Disp += getBase()->getStackOffset();
+ if (!getIgnoreStackAdjust())
+ Disp += Target->getStackAdjustment();
+ }
+ // The index should never be rematerializable. But if we ever allow it, then
+ // we should make sure the rematerialization offset is shifted by the Shift
+ // value.
+ if (getIndex())
+ assert(!getIndex()->isRematerializable());
Ostream &Str = Func->getContext()->getStrEmit();
if (SegmentReg != DefaultSegment) {
assert(SegmentReg >= 0 && SegmentReg < SegReg_NUM);
}
// Emit as Offset(Base,Index,1<<Shift). Offset is emitted without the leading
// '$'. Omit the (Base,Index,1<<Shift) part if Base==nullptr.
- if (!Offset) {
+ if (getOffset() == 0 && Disp == 0) {
// No offset, emit nothing.
- } else if (const auto *CI = llvm::dyn_cast<ConstantInteger32>(Offset)) {
- if (Base == nullptr || CI->getValue())
+ } else if (getOffset() == 0 && Disp != 0) {
+ Str << Disp;
+ } else if (const auto *CI = llvm::dyn_cast<ConstantInteger32>(getOffset())) {
+ if (getBase() == nullptr || CI->getValue() || Disp != 0)
// Emit a non-zero offset without a leading '$'.
- Str << CI->getValue();
- } else if (const auto *CR = llvm::dyn_cast<ConstantRelocatable>(Offset)) {
+ Str << CI->getValue() + Disp;
+ } else if (const auto *CR =
+ llvm::dyn_cast<ConstantRelocatable>(getOffset())) {
+ // TODO(sehr): ConstantRelocatable still needs updating for
+ // rematerializable base/index and Disp.
+ assert(Disp == 0);
CR->emitWithoutPrefix(Func->getTarget());
} else {
llvm_unreachable("Invalid offset type for x86 mem operand");
}
- if (Base || Index) {
+ if (getBase() || getIndex()) {
Str << "(";
- if (Base)
- Base->emit(Func);
- if (Index) {
+ if (getBase())
+ getBase()->emit(Func);
+ if (getIndex()) {
Str << ",";
- Index->emit(Func);
- if (Shift)
- Str << "," << (1u << Shift);
+ getIndex()->emit(Func);
+ if (getShift())
+ Str << "," << (1u << getShift());
}
Str << ")";
}
}
bool Dumped = false;
Str << "[";
- if (Base) {
+ int32_t Disp = 0;
+ if (getBase() && getBase()->isRematerializable()) {
+ Disp += getBase()->getStackOffset();
+ if (!getIgnoreStackAdjust())
+ Disp += Func->getTarget()->getStackAdjustment();
+ }
+ if (getBase()) {
if (Func)
- Base->dump(Func);
+ getBase()->dump(Func);
else
- Base->dump(Str);
+ getBase()->dump(Str);
Dumped = true;
}
- if (Index) {
- if (Base)
+ if (getIndex()) {
+ assert(!getIndex()->isRematerializable());
+ if (getBase())
Str << "+";
- if (Shift > 0)
- Str << (1u << Shift) << "*";
+ if (getShift() > 0)
+ Str << (1u << getShift()) << "*";
if (Func)
- Index->dump(Func);
+ getIndex()->dump(Func);
else
- Index->dump(Str);
+ getIndex()->dump(Str);
Dumped = true;
}
// Pretty-print the Offset.
bool OffsetIsZero = false;
bool OffsetIsNegative = false;
- if (!Offset) {
+ if (getOffset() == 0 && Disp == 0) {
OffsetIsZero = true;
- } else if (const auto *CI = llvm::dyn_cast<ConstantInteger32>(Offset)) {
- OffsetIsZero = (CI->getValue() == 0);
- OffsetIsNegative = (static_cast<int32_t>(CI->getValue()) < 0);
+ } else if (getOffset() == 0 && Disp != 0) {
+ OffsetIsZero = (Disp == 0);
+ OffsetIsNegative = (Disp < 0);
+ } else if (const auto *CI = llvm::dyn_cast<ConstantInteger32>(getOffset())) {
+ OffsetIsZero = (CI->getValue() + Disp == 0);
+ OffsetIsNegative = (static_cast<int32_t>(CI->getValue()) + Disp < 0);
} else {
- assert(llvm::isa<ConstantRelocatable>(Offset));
+ assert(llvm::isa<ConstantRelocatable>(getOffset()) && Disp == 0);
}
if (Dumped) {
if (!OffsetIsZero) { // Suppress if Offset is known to be 0
if (!OffsetIsNegative) // Suppress if Offset is known to be negative
Str << "+";
- Offset->dump(Func, Str);
+ getOffset()->dump(Func, Str);
}
} else {
// There is only the offset.
- Offset->dump(Func, Str);
+ getOffset()->dump(Func, Str);
}
Str << "]";
}
MachineTraits<TargetX8632>::Address
MachineTraits<TargetX8632>::X86OperandMem::toAsmAddress(
- MachineTraits<TargetX8632>::Assembler *Asm) const {
+ MachineTraits<TargetX8632>::Assembler *Asm,
+ const Ice::TargetLowering *Target) const {
int32_t Disp = 0;
+ if (getBase() && getBase()->isRematerializable()) {
+ Disp += getBase()->getStackOffset();
+ if (!getIgnoreStackAdjust()) {
+ Disp += Target->getStackAdjustment();
+ }
+ }
+ // The index should never be rematerializable. But if we ever allow it, then
+ // we should make sure the rematerialization offset is shifted by the Shift
+ // value.
+ if (getIndex())
+ assert(!getIndex()->isRematerializable());
AssemblerFixup *Fixup = nullptr;
// Determine the offset (is it relocatable?)
if (getOffset()) {
if (const auto *CI = llvm::dyn_cast<ConstantInteger32>(getOffset())) {
- Disp = static_cast<int32_t>(CI->getValue());
+ Disp += static_cast<int32_t>(CI->getValue());
} else if (const auto CR =
llvm::dyn_cast<ConstantRelocatable>(getOffset())) {
- Disp = CR->getOffset();
+ Disp += CR->getOffset();
Fixup = Asm->createFixup(RelFixup, CR);
} else {
llvm_unreachable("Unexpected offset type");
MachineTraits<TargetX8664>::Address
MachineTraits<TargetX8664>::X86OperandMem::toAsmAddress(
- MachineTraits<TargetX8664>::Assembler *Asm) const {
+ MachineTraits<TargetX8664>::Assembler *Asm,
+ const Ice::TargetLowering *Target) const {
+ // TODO(sehr): handle rematerializable base/index.
+ (void)Target;
+ if (getBase())
+ assert(!getBase()->isRematerializable());
+ if (getIndex())
+ assert(!getIndex()->isRematerializable());
int32_t Disp = 0;
AssemblerFixup *Fixup = nullptr;
// Determine the offset (is it relocatable?)
Ostream &Str = Func->getContext()->getStrEmit();
assert(this->getSrcSize() == 1);
Str << "\tcall\t";
- if (const auto *CI = llvm::dyn_cast<ConstantInteger32>(getCallTarget())) {
+ Operand *CallTarget = getCallTarget();
+ TargetLowering *Target = Func->getTarget();
+ if (const auto *CI = llvm::dyn_cast<ConstantInteger32>(CallTarget)) {
// Emit without a leading '$'.
Str << CI->getValue();
- } else if (const auto CallTarget =
- llvm::dyn_cast<ConstantRelocatable>(getCallTarget())) {
- CallTarget->emitWithoutPrefix(Func->getTarget());
+ } else if (const auto DirectCallTarget =
+ llvm::dyn_cast<ConstantRelocatable>(CallTarget)) {
+ DirectCallTarget->emitWithoutPrefix(Target);
} else {
Str << "*";
- getCallTarget()->emit(Func);
+ CallTarget->emit(Func);
}
- Func->getTarget()->resetStackAdjustment();
+ Target->resetStackAdjustment();
}
template <class Machine>
void InstX86Call<Machine>::emitIAS(const Cfg *Func) const {
typename InstX86Base<Machine>::Traits::Assembler *Asm =
Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
- Operand *Target = getCallTarget();
- if (const auto *Var = llvm::dyn_cast<Variable>(Target)) {
+ Operand *CallTarget = getCallTarget();
+ TargetLowering *Target = Func->getTarget();
+ if (const auto *Var = llvm::dyn_cast<Variable>(CallTarget)) {
if (Var->hasReg()) {
Asm->call(InstX86Base<Machine>::Traits::getEncodedGPR(Var->getRegNum()));
} else {
Asm->call(
static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
- Func->getTarget())
+ Target)
->stackVarToAsmOperand(Var));
}
} else if (const auto *Mem = llvm::dyn_cast<
typename InstX86Base<Machine>::Traits::X86OperandMem>(
- Target)) {
+ CallTarget)) {
assert(Mem->getSegmentRegister() ==
InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
- Asm->call(Mem->toAsmAddress(Asm));
- } else if (const auto *CR = llvm::dyn_cast<ConstantRelocatable>(Target)) {
+ Asm->call(Mem->toAsmAddress(Asm, Target));
+ } else if (const auto *CR = llvm::dyn_cast<ConstantRelocatable>(CallTarget)) {
assert(CR->getOffset() == 0 && "We only support calling a function");
Asm->call(CR);
- } else if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Target)) {
+ } else if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(CallTarget)) {
Asm->call(Immediate(Imm->getValue()));
} else {
llvm_unreachable("Unexpected operand type");
}
- Func->getTarget()->resetStackAdjustment();
+ Target->resetStackAdjustment();
}
template <class Machine>
void emitIASOpTyGPR(const Cfg *Func, Type Ty, const Operand *Op,
const typename InstX86Base<
Machine>::Traits::Assembler::GPREmitterOneOp &Emitter) {
+ TargetLowering *Target = Func->getTarget();
typename InstX86Base<Machine>::Traits::Assembler *Asm =
Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
if (const auto *Var = llvm::dyn_cast<Variable>(Op)) {
} else {
typename InstX86Base<Machine>::Traits::Address StackAddr(
static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
- Func->getTarget())
+ Target)
->stackVarToAsmOperand(Var));
(Asm->*(Emitter.Addr))(Ty, StackAddr);
}
} else if (const auto *Mem = llvm::dyn_cast<
typename InstX86Base<Machine>::Traits::X86OperandMem>(Op)) {
Mem->emitSegmentOverride(Asm);
- (Asm->*(Emitter.Addr))(Ty, Mem->toAsmAddress(Asm));
+ (Asm->*(Emitter.Addr))(Ty, Mem->toAsmAddress(Asm, Target));
} else {
llvm_unreachable("Unexpected operand type");
}
const Cfg *Func, Type Ty, const Variable *Var, const Operand *Src,
const typename InstX86Base<Machine>::Traits::Assembler::GPREmitterRegOp
&Emitter) {
+ TargetLowering *Target = Func->getTarget();
typename InstX86Base<Machine>::Traits::Assembler *Asm =
Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
assert(Var->hasReg());
} else {
typename InstX86Base<Machine>::Traits::Address SrcStackAddr =
static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
- Func->getTarget())
+ Target)
->stackVarToAsmOperand(SrcVar);
(Asm->*(Emitter.GPRAddr))(Ty, VarReg, SrcStackAddr);
}
} else if (const auto *Mem = llvm::dyn_cast<
typename InstX86Base<Machine>::Traits::X86OperandMem>(Src)) {
Mem->emitSegmentOverride(Asm);
- (Asm->*(Emitter.GPRAddr))(Ty, VarReg, Mem->toAsmAddress(Asm));
+ (Asm->*(Emitter.GPRAddr))(Ty, VarReg, Mem->toAsmAddress(Asm, Target));
} else if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src)) {
(Asm->*(Emitter.GPRImm))(Ty, VarReg, Immediate(Imm->getValue()));
} else if (const auto *Reloc = llvm::dyn_cast<ConstantRelocatable>(Src)) {
const Cfg *Func, Type Ty, const Operand *Op0, const Operand *Op1,
const typename InstX86Base<Machine>::Traits::Assembler::GPREmitterAddrOp
&Emitter) {
+ TargetLowering *Target = Func->getTarget();
if (const auto *Op0Var = llvm::dyn_cast<Variable>(Op0)) {
assert(!Op0Var->hasReg());
typename InstX86Base<Machine>::Traits::Address StackAddr(
static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
- Func->getTarget())
+ Target)
->stackVarToAsmOperand(Op0Var));
emitIASAddrOpTyGPR<Machine>(Func, Ty, StackAddr, Op1, Emitter);
} else if (const auto *Op0Mem = llvm::dyn_cast<
typename InstX86Base<Machine>::Traits::Assembler *Asm =
Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
Op0Mem->emitSegmentOverride(Asm);
- emitIASAddrOpTyGPR<Machine>(Func, Ty, Op0Mem->toAsmAddress(Asm), Op1,
- Emitter);
+ emitIASAddrOpTyGPR<Machine>(Func, Ty, Op0Mem->toAsmAddress(Asm, Target),
+ Op1, Emitter);
} else if (const auto *Split = llvm::dyn_cast<
typename InstX86Base<Machine>::Traits::VariableSplit>(Op0)) {
emitIASAddrOpTyGPR<Machine>(Func, Ty, Split->toAsmAddress(Func), Op1,
const Cfg *Func, Type Ty, const Variable *Var, const Operand *Src,
const typename InstX86Base<Machine>::Traits::Assembler::XmmEmitterShiftOp
&Emitter) {
+ TargetLowering *Target = Func->getTarget();
typename InstX86Base<Machine>::Traits::Assembler *Asm =
Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
assert(Var->hasReg());
} else {
typename InstX86Base<Machine>::Traits::Address SrcStackAddr =
static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
- Func->getTarget())
+ Target)
->stackVarToAsmOperand(SrcVar);
(Asm->*(Emitter.XmmAddr))(Ty, VarReg, SrcStackAddr);
}
typename InstX86Base<Machine>::Traits::X86OperandMem>(Src)) {
assert(Mem->getSegmentRegister() ==
InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
- (Asm->*(Emitter.XmmAddr))(Ty, VarReg, Mem->toAsmAddress(Asm));
+ (Asm->*(Emitter.XmmAddr))(Ty, VarReg, Mem->toAsmAddress(Asm, Target));
} else if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src)) {
(Asm->*(Emitter.XmmImm))(Ty, VarReg, Immediate(Imm->getValue()));
} else {
const Cfg *Func, Type Ty, const Variable *Var, const Operand *Src,
const typename InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp
&Emitter) {
+ TargetLowering *Target = Func->getTarget();
typename InstX86Base<Machine>::Traits::Assembler *Asm =
Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
assert(Var->hasReg());
} else {
typename InstX86Base<Machine>::Traits::Address SrcStackAddr =
static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
- Func->getTarget())
+ Target)
->stackVarToAsmOperand(SrcVar);
(Asm->*(Emitter.XmmAddr))(Ty, VarReg, SrcStackAddr);
}
typename InstX86Base<Machine>::Traits::X86OperandMem>(Src)) {
assert(Mem->getSegmentRegister() ==
InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
- (Asm->*(Emitter.XmmAddr))(Ty, VarReg, Mem->toAsmAddress(Asm));
+ (Asm->*(Emitter.XmmAddr))(Ty, VarReg, Mem->toAsmAddress(Asm, Target));
} else if (const auto *Imm = llvm::dyn_cast<Constant>(Src)) {
(Asm->*(Emitter.XmmAddr))(
Ty, VarReg,
Type SrcTy, const Operand *Src,
const typename InstX86Base<Machine>::Traits::Assembler::
template CastEmitterRegOp<DReg_t, SReg_t> &Emitter) {
+ TargetLowering *Target = Func->getTarget();
typename InstX86Base<Machine>::Traits::Assembler *Asm =
Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
assert(Dest->hasReg());
} else {
typename InstX86Base<Machine>::Traits::Address SrcStackAddr =
static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
- Func->getTarget())
+ Target)
->stackVarToAsmOperand(SrcVar);
(Asm->*(Emitter.RegAddr))(DestTy, DestReg, SrcTy, SrcStackAddr);
}
} else if (const auto *Mem = llvm::dyn_cast<
typename InstX86Base<Machine>::Traits::X86OperandMem>(Src)) {
Mem->emitSegmentOverride(Asm);
- (Asm->*(Emitter.RegAddr))(DestTy, DestReg, SrcTy, Mem->toAsmAddress(Asm));
+ (Asm->*(Emitter.RegAddr))(DestTy, DestReg, SrcTy,
+ Mem->toAsmAddress(Asm, Target));
} else {
llvm_unreachable("Unexpected operand type");
}
const Operand *Src1,
const typename InstX86Base<Machine>::Traits::Assembler::
template ThreeOpImmEmitter<DReg_t, SReg_t> Emitter) {
+ TargetLowering *Target = Func->getTarget();
typename InstX86Base<Machine>::Traits::Assembler *Asm =
Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
// This only handles Dest being a register, and Src1 being an immediate.
} else {
typename InstX86Base<Machine>::Traits::Address SrcStackAddr =
static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
- Func->getTarget())
+ Target)
->stackVarToAsmOperand(SrcVar);
(Asm->*(Emitter.RegAddrImm))(DispatchTy, DestReg, SrcStackAddr, Imm);
}
} else if (const auto *Mem = llvm::dyn_cast<
typename InstX86Base<Machine>::Traits::X86OperandMem>(Src0)) {
Mem->emitSegmentOverride(Asm);
- (Asm->*(Emitter.RegAddrImm))(DispatchTy, DestReg, Mem->toAsmAddress(Asm),
- Imm);
+ (Asm->*(Emitter.RegAddrImm))(DispatchTy, DestReg,
+ Mem->toAsmAddress(Asm, Target), Imm);
} else {
llvm_unreachable("Unexpected operand type");
}
const Cfg *Func, const Variable *Dest, const Operand *Src,
const typename InstX86Base<Machine>::Traits::Assembler::XmmEmitterMovOps
Emitter) {
+ TargetLowering *Target = Func->getTarget();
typename InstX86Base<Machine>::Traits::Assembler *Asm =
Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
if (Dest->hasReg()) {
InstX86Base<Machine>::Traits::getEncodedXmm(SrcVar->getRegNum()));
} else {
typename InstX86Base<Machine>::Traits::Address StackAddr(
- static_cast<typename InstX86Base<Machine>::Traits::TargetLowering
- *>(Func->getTarget())
+ static_cast<
+ typename InstX86Base<Machine>::Traits::TargetLowering *>(Target)
->stackVarToAsmOperand(SrcVar));
(Asm->*(Emitter.XmmAddr))(DestReg, StackAddr);
}
typename InstX86Base<Machine>::Traits::X86OperandMem>(Src)) {
assert(SrcMem->getSegmentRegister() ==
InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
- (Asm->*(Emitter.XmmAddr))(DestReg, SrcMem->toAsmAddress(Asm));
+ (Asm->*(Emitter.XmmAddr))(DestReg, SrcMem->toAsmAddress(Asm, Target));
} else {
llvm_unreachable("Unexpected operand type");
}
} else {
typename InstX86Base<Machine>::Traits::Address StackAddr(
static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
- Func->getTarget())
+ Target)
->stackVarToAsmOperand(Dest));
// Src must be a register in this case.
const auto *SrcVar = llvm::cast<Variable>(Src);
char buf[30];
bool TypesAreValid = this->getDest()->getType() == IceType_v4i32 ||
this->getDest()->getType() == IceType_v8i16;
+ TargetLowering *Target = Func->getTarget();
bool InstructionSetIsValid =
this->getDest()->getType() == IceType_v8i16 ||
static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
- Func->getTarget())
+ Target)
->getInstructionSet() >= InstX86Base<Machine>::Traits::SSE4_1;
(void)TypesAreValid;
(void)InstructionSetIsValid;
void InstX86Pmull<Machine>::emitIAS(const Cfg *Func) const {
Type Ty = this->getDest()->getType();
bool TypesAreValid = Ty == IceType_v4i32 || Ty == IceType_v8i16;
+ TargetLowering *Target = Func->getTarget();
bool InstructionSetIsValid =
Ty == IceType_v8i16 ||
static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
- Func->getTarget())
+ Target)
->getInstructionSet() >= InstX86Base<Machine>::Traits::SSE4_1;
(void)TypesAreValid;
(void)InstructionSetIsValid;
void InstX86Blendvps<Machine>::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
+ TargetLowering *Target = Func->getTarget();
assert(static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
- Func->getTarget())
+ Target)
->getInstructionSet() >= InstX86Base<Machine>::Traits::SSE4_1);
emitVariableBlendInst<Machine>(this->Opcode, this, Func);
}
template <class Machine>
void InstX86Blendvps<Machine>::emitIAS(const Cfg *Func) const {
+ TargetLowering *Target = Func->getTarget();
assert(static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
- Func->getTarget())
+ Target)
->getInstructionSet() >= InstX86Base<Machine>::Traits::SSE4_1);
static const typename InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp
Emitter = {&InstX86Base<Machine>::Traits::Assembler::blendvps,
void InstX86Pblendvb<Machine>::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
+ TargetLowering *Target = Func->getTarget();
assert(static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
- Func->getTarget())
+ Target)
->getInstructionSet() >= InstX86Base<Machine>::Traits::SSE4_1);
emitVariableBlendInst<Machine>(this->Opcode, this, Func);
}
template <class Machine>
void InstX86Pblendvb<Machine>::emitIAS(const Cfg *Func) const {
+ TargetLowering *Target = Func->getTarget();
assert(static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
- Func->getTarget())
+ Target)
->getInstructionSet() >= InstX86Base<Machine>::Traits::SSE4_1);
static const typename InstX86Base<Machine>::Traits::Assembler::XmmEmitterRegOp
Emitter = {&InstX86Base<Machine>::Traits::Assembler::pblendvb,
template <class Machine>
void InstX86Insertps<Machine>::emitIAS(const Cfg *Func) const {
assert(this->getSrcSize() == 3);
+ TargetLowering *Target = Func->getTarget();
assert(static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
- Func->getTarget())
+ Target)
->getInstructionSet() >= InstX86Base<Machine>::Traits::SSE4_1);
const Variable *Dest = this->getDest();
assert(Dest == this->getSrc(0));
(InstX86Base<Machine>::Traits::Is64Bit));
typename InstX86Base<Machine>::Traits::Assembler *Asm =
Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+ TargetLowering *Target = Func->getTarget();
if (const auto *SrcVar = llvm::dyn_cast<Variable>(Src)) {
if (SrcVar->hasReg()) {
Asm->cmov(
SrcTy, Condition, InstX86Base<Machine>::Traits::getEncodedGPR(
this->getDest()->getRegNum()),
static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
- Func->getTarget())
+ Target)
->stackVarToAsmOperand(SrcVar));
}
} else if (const auto *Mem = llvm::dyn_cast<
InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
Asm->cmov(SrcTy, Condition, InstX86Base<Machine>::Traits::getEncodedGPR(
this->getDest()->getRegNum()),
- Mem->toAsmAddress(Asm));
+ Mem->toAsmAddress(Asm, Target));
} else {
llvm_unreachable("Unexpected operand type");
}
// Assuming there isn't any load folding for cmpps, and vector constants are
// not allowed in PNaCl.
assert(llvm::isa<Variable>(this->getSrc(1)));
+ TargetLowering *Target = Func->getTarget();
const auto *SrcVar = llvm::cast<Variable>(this->getSrc(1));
if (SrcVar->hasReg()) {
Asm->cmpps(InstX86Base<Machine>::Traits::getEncodedXmm(
} else {
typename InstX86Base<Machine>::Traits::Address SrcStackAddr =
static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
- Func->getTarget())
+ Target)
->stackVarToAsmOperand(SrcVar);
Asm->cmpps(InstX86Base<Machine>::Traits::getEncodedXmm(
this->getDest()->getRegNum()),
typename InstX86Base<Machine>::Traits::Assembler *Asm =
Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
Type Ty = this->getSrc(0)->getType();
+ TargetLowering *Target = Func->getTarget();
const auto Mem =
llvm::cast<typename InstX86Base<Machine>::Traits::X86OperandMem>(
this->getSrc(0));
assert(Mem->getSegmentRegister() ==
InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
const typename InstX86Base<Machine>::Traits::Address Addr =
- Mem->toAsmAddress(Asm);
+ Mem->toAsmAddress(Asm, Target);
const auto *VarReg = llvm::cast<Variable>(this->getSrc(2));
assert(VarReg->hasReg());
const typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister Reg =
this->getSrc(0));
assert(Mem->getSegmentRegister() ==
InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
+ TargetLowering *Target = Func->getTarget();
const typename InstX86Base<Machine>::Traits::Address Addr =
- Mem->toAsmAddress(Asm);
+ Mem->toAsmAddress(Asm, Target);
Asm->cmpxchg8b(Addr, this->Locked);
}
InstX86Base<Machine>::Traits::getEncodedXmm(SrcVar->getRegNum());
typename InstX86Base<Machine>::Traits::Assembler *Asm =
Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+ TargetLowering *Target = Func->getTarget();
if (const auto *DestVar = llvm::dyn_cast<Variable>(Dest)) {
assert(!DestVar->hasReg());
typename InstX86Base<Machine>::Traits::Address StackAddr(
static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
- Func->getTarget())
+ Target)
->stackVarToAsmOperand(DestVar));
Asm->movss(DestTy, StackAddr, SrcReg);
} else {
Dest);
assert(DestMem->getSegmentRegister() ==
InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
- Asm->movss(DestTy, DestMem->toAsmAddress(Asm), SrcReg);
+ Asm->movss(DestTy, DestMem->toAsmAddress(Asm, Target), SrcReg);
}
return;
} else {
assert(DestMem->getSegmentRegister() ==
InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
assert(SrcVar->hasReg());
- Asm->movups(DestMem->toAsmAddress(Asm),
+ TargetLowering *Target = Func->getTarget();
+ Asm->movups(DestMem->toAsmAddress(Asm, Target),
InstX86Base<Machine>::Traits::getEncodedXmm(SrcVar->getRegNum()));
}
assert(DestMem->getSegmentRegister() ==
InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
assert(SrcVar->hasReg());
- Asm->movq(DestMem->toAsmAddress(Asm),
+ TargetLowering *Target = Func->getTarget();
+ Asm->movq(DestMem->toAsmAddress(Asm, Target),
InstX86Base<Machine>::Traits::getEncodedXmm(SrcVar->getRegNum()));
}
// TODO: This assert disallows usages such as copying a floating
// point value between a vector and a scalar (which movss is used for). Clean
// this up.
- assert(Func->getTarget()->typeWidthInBytesOnStack(DestTy) ==
- Func->getTarget()->typeWidthInBytesOnStack(SrcTy));
+ TargetLowering *Target = Func->getTarget();
+ assert(Target->typeWidthInBytesOnStack(DestTy) ==
+ Target->typeWidthInBytesOnStack(SrcTy));
const Operand *NewSrc = Src;
if (auto *SrcVar = llvm::dyn_cast<Variable>(Src)) {
int32_t NewRegNum = Variable::NoRegister;
// TODO: This assert disallows usages such as copying a floating
// point value between a vector and a scalar (which movss is used for). Clean
// this up.
- assert(
- Func->getTarget()->typeWidthInBytesOnStack(this->getDest()->getType()) ==
- Func->getTarget()->typeWidthInBytesOnStack(Src->getType()));
+ TargetLowering *Target = Func->getTarget();
+ assert(Target->typeWidthInBytesOnStack(this->getDest()->getType()) ==
+ Target->typeWidthInBytesOnStack(Src->getType()));
if (Dest->hasReg()) {
if (isScalarFloatingType(DestTy)) {
emitIASRegOpTyXMM<Machine>(Func, DestTy, Dest, Src, XmmRegEmitter);
// decide on the emitters.
typename InstX86Base<Machine>::Traits::Address StackAddr(
static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
- Func->getTarget())
+ Target)
->stackVarToAsmOperand(Dest));
if (isScalarFloatingType(SrcTy)) {
// Src must be a register.
assert(this->getSrcSize() == 1);
const Variable *Dest = this->getDest();
const auto *SrcVar = llvm::cast<Variable>(this->getSrc(0));
+ TargetLowering *Target = Func->getTarget();
// For insert/extract element (one of Src/Dest is an Xmm vector and the other
// is an int type).
if (SrcVar->getType() == IceType_i32 ||
} else {
typename InstX86Base<Machine>::Traits::Address StackAddr(
static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
- Func->getTarget())
+ Target)
->stackVarToAsmOperand(SrcVar));
Asm->movd(SrcVar->getType(), DestReg, StackAddr);
}
} else {
typename InstX86Base<Machine>::Traits::Address StackAddr(
static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
- Func->getTarget())
+ Target)
->stackVarToAsmOperand(Dest));
Asm->movd(Dest->getType(), StackAddr, SrcReg);
}
Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
assert(this->getSrcSize() == 1);
const Operand *Src = this->getSrc(0);
+ TargetLowering *Target = Func->getTarget();
Type Ty = Src->getType();
if (const auto *Var = llvm::dyn_cast<Variable>(Src)) {
if (Var->hasReg()) {
} else {
typename InstX86Base<Machine>::Traits::Address StackAddr(
static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
- Func->getTarget())
+ Target)
->stackVarToAsmOperand(Var));
Asm->fld(Ty, StackAddr);
}
typename InstX86Base<Machine>::Traits::X86OperandMem>(Src)) {
assert(Mem->getSegmentRegister() ==
InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
- Asm->fld(Ty, Mem->toAsmAddress(Asm));
+ Asm->fld(Ty, Mem->toAsmAddress(Asm, Target));
} else if (const auto *Imm = llvm::dyn_cast<Constant>(Src)) {
Asm->fld(Ty, InstX86Base<Machine>::Traits::Address::ofConstPool(Asm, Imm));
} else {
Asm->fstp(InstX86Base<Machine>::Traits::RegisterSet::getEncodedSTReg(0));
return;
}
+ TargetLowering *Target = Func->getTarget();
Type Ty = Dest->getType();
if (!Dest->hasReg()) {
typename InstX86Base<Machine>::Traits::Address StackAddr(
static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
- Func->getTarget())
+ Target)
->stackVarToAsmOperand(Dest));
Asm->fstp(Ty, StackAddr);
} else {
Ostream &Str = Func->getContext()->getStrEmit();
assert(this->getSrcSize() == 2);
// pextrb and pextrd are SSE4.1 instructions.
+ TargetLowering *Target = Func->getTarget();
assert(this->getSrc(0)->getType() == IceType_v8i16 ||
this->getSrc(0)->getType() == IceType_v8i1 ||
static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
- Func->getTarget())
+ Target)
->getInstructionSet() >= InstX86Base<Machine>::Traits::SSE4_1);
Str << "\t" << this->Opcode
<< InstX86Base<Machine>::Traits::TypeAttributes[this->getSrc(0)
const Variable *Dest = this->getDest();
Type DispatchTy = InstX86Base<Machine>::Traits::getInVectorElementType(
this->getSrc(0)->getType());
+ TargetLowering *Target = Func->getTarget();
assert(DispatchTy == IceType_i16 ||
static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
- Func->getTarget())
+ Target)
->getInstructionSet() >= InstX86Base<Machine>::Traits::SSE4_1);
// pextrw must take a register dest. There is an SSE4.1 version that takes a
// memory dest, but we aren't using it. For uniformity, just restrict them
Ostream &Str = Func->getContext()->getStrEmit();
assert(this->getSrcSize() == 3);
// pinsrb and pinsrd are SSE4.1 instructions.
+ TargetLowering *Target = Func->getTarget();
assert(this->getDest()->getType() == IceType_v8i16 ||
this->getDest()->getType() == IceType_v8i1 ||
static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
- Func->getTarget())
+ Target)
->getInstructionSet() >= InstX86Base<Machine>::Traits::SSE4_1);
Str << "\t" << this->Opcode
<< InstX86Base<
// pinsrb and pinsrd are SSE4.1 instructions.
const Operand *Src0 = this->getSrc(1);
Type DispatchTy = Src0->getType();
+ TargetLowering *Target = Func->getTarget();
assert(DispatchTy == IceType_i16 ||
static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
- Func->getTarget())
+ Target)
->getInstructionSet() >= InstX86Base<Machine>::Traits::SSE4_1);
// If src1 is a register, it should always be r32 (this should fall out from
// the encodings for ByteRegs overlapping the encodings for r32), but we have
Asm->popl(InstX86Base<Machine>::Traits::getEncodedGPR(
this->getDest()->getRegNum()));
} else {
+ TargetLowering *Target = Func->getTarget();
Asm->popl(
static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
- Func->getTarget())
+ Target)
->stackVarToAsmOperand(this->getDest()));
}
}
return;
Ostream &Str = Func->getContext()->getStrEmit();
Str << "\tsubl\t$" << Amount << ", %esp";
- Func->getTarget()->updateStackAdjustment(Amount);
+ TargetLowering *Target = Func->getTarget();
+ Target->updateStackAdjustment(Amount);
}
template <class Machine>
Asm->sub(IceType_i32,
InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp,
Immediate(Amount));
- Func->getTarget()->updateStackAdjustment(Amount);
+ TargetLowering *Target = Func->getTarget();
+ Target->updateStackAdjustment(Amount);
}
template <class Machine>
assert(this->getSrcSize() == 0);
typename InstX86Base<Machine>::Traits::Assembler *Asm =
Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
+ TargetLowering *Target = Func->getTarget();
if (this->getDest()->hasReg())
Asm->setcc(Condition, InstX86Base<Machine>::Traits::getEncodedByteReg(
this->getDest()->getRegNum()));
Asm->setcc(
Condition,
static_cast<typename InstX86Base<Machine>::Traits::TargetLowering *>(
- Func->getTarget())
+ Target)
->stackVarToAsmOperand(this->getDest()));
return;
}
this->getSrc(0));
assert(Mem->getSegmentRegister() ==
InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
+ TargetLowering *Target = Func->getTarget();
const typename InstX86Base<Machine>::Traits::Address Addr =
- Mem->toAsmAddress(Asm);
+ Mem->toAsmAddress(Asm, Target);
const auto *VarReg = llvm::cast<Variable>(this->getSrc(1));
assert(VarReg->hasReg());
const typename InstX86Base<Machine>::Traits::RegisterSet::GPRRegister Reg =
this->getSrc(0));
assert(Mem->getSegmentRegister() ==
InstX86Base<Machine>::Traits::X86OperandMem::DefaultSegment);
+ TargetLowering *Target = Func->getTarget();
const typename InstX86Base<Machine>::Traits::Address Addr =
- Mem->toAsmAddress(Asm);
+ Mem->toAsmAddress(Asm, Target);
Asm->xchg(Ty, Addr, Reg1);
}
bool mustNotHaveReg() const {
return RegRequirement == RR_MustNotHaveRegister;
}
+ void setRematerializable(int32_t NewRegNum, int32_t NewOffset) {
+ IsRematerializable = true;
+ setRegNum(NewRegNum);
+ setStackOffset(NewOffset);
+ setMustHaveReg();
+ }
+ bool isRematerializable() const { return IsRematerializable; }
void setRegClass(uint8_t RC) { RegisterClass = static_cast<RegClass>(RC); }
RegClass getRegClass() const { return RegisterClass; }
/// and validating live ranges. This is usually reserved for the stack
/// pointer and other physical registers specifically referenced by name.
bool IgnoreLiveness = false;
+ // If IsRematerializable, RegNum keeps track of which register (stack or frame
+ // pointer), and StackOffset is the known offset from that register.
+ bool IsRematerializable = false;
RegRequirement RegRequirement = RR_MayHaveRegister;
RegClass RegisterClass;
/// RegNum is the allocated register, or NoRegister if it isn't
// TODO(stichnot): share passes with X86?
// https://code.google.com/p/nativeclient/issues/detail?id=4094
+ // Do not merge Alloca instructions, and lay out the stack.
+ static constexpr bool SortAndCombineAllocas = false;
+ Func->processAllocas(SortAndCombineAllocas);
+ Func->dump("After Alloca processing");
+
if (!Ctx->getFlags().getPhiEdgeSplit()) {
// Lower Phi instructions.
Func->placePhiLoads();
// TODO: share passes with X86?
+ // Do not merge Alloca instructions, and lay out the stack.
+ static constexpr bool SortAndCombineAllocas = false;
+ Func->processAllocas(SortAndCombineAllocas);
+ Func->dump("After Alloca processing");
+
Func->placePhiLoads();
if (Func->hasError())
return;
// TODO(stichnot): share passes with X86?
// https://code.google.com/p/nativeclient/issues/detail?id=4094
+ // Merge Alloca instructions, and lay out the stack.
+ static constexpr bool SortAndCombineAllocas = true;
+ Func->processAllocas(SortAndCombineAllocas);
+ Func->dump("After Alloca processing");
+
if (!Ctx->getFlags().getPhiEdgeSplit()) {
// Lower Phi instructions.
Func->placePhiLoads();
// TODO: share passes with X86?
+ // Do not merge Alloca instructions, and lay out the stack.
+ static constexpr bool SortAndCombineAllocas = false;
+ Func->processAllocas(SortAndCombineAllocas);
+ Func->dump("After Alloca processing");
+
Func->placePhiLoads();
if (Func->hasError())
return;
Variable *esp =
Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
- StackArgLocations.push_back(
- Traits::X86OperandMem::create(Func, Ty, esp, Loc));
+ auto *Mem = Traits::X86OperandMem::create(Func, Ty, esp, Loc);
+ // Stack stores for arguments are fixed to esp.
+ Mem->setIgnoreStackAdjust(true);
+ StackArgLocations.push_back(Mem);
ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
}
}
uint16_t getShift() const { return Shift; }
SegmentRegisters getSegmentRegister() const { return SegmentReg; }
void emitSegmentOverride(Assembler *Asm) const;
- Address toAsmAddress(Assembler *Asm) const;
+ Address toAsmAddress(Assembler *Asm,
+ const Ice::TargetLowering *Target) const;
void emit(const Cfg *Func) const override;
using X86Operand::dump;
bool getRandomized() const { return Randomized; }
+ void setIgnoreStackAdjust(bool Ignore) { IgnoreStackAdjust = Ignore; }
+ bool getIgnoreStackAdjust() const { return IgnoreStackAdjust; }
+
private:
X86OperandMem(Cfg *Func, Type Ty, Variable *Base, Constant *Offset,
Variable *Index, uint16_t Shift, SegmentRegisters SegmentReg);
/// memory operands are generated in
/// TargetX86Base::randomizeOrPoolImmediate()
bool Randomized;
+ /// Memory operations involving the stack pointer need to know when the
+ /// stack pointer was moved temporarily. Ignore that adjustment in
+ /// cases that should be pinned to the stack pointer, such as outgoing
+ /// arguments to calls.
+ bool IgnoreStackAdjust = false;
};
/// VariableSplit is a way to treat an f64 memory location as a pair of i32
uint16_t getShift() const { return Shift; }
SegmentRegisters getSegmentRegister() const { return DefaultSegment; }
void emitSegmentOverride(Assembler *) const {}
- Address toAsmAddress(Assembler *Asm) const;
+ Address toAsmAddress(Assembler *Asm,
+ const Ice::TargetLowering *Target) const;
void emit(const Cfg *Func) const override;
using X86Operand::dump;
bool getRandomized() const { return Randomized; }
+ void setIgnoreStackAdjust(bool Ignore) { IgnoreStackAdjust = Ignore; }
+ bool getIgnoreStackAdjust() const { return IgnoreStackAdjust; }
+
private:
X86OperandMem(Cfg *Func, Type Ty, Variable *Base, Constant *Offset,
Variable *Index, uint16_t Shift);
/// memory operands are generated in
/// TargetX86Base::randomizeOrPoolImmediate()
bool Randomized = false;
+ /// Memory operations involving the stack pointer need to know when the
+ /// stack pointer was moved temporarily. Ignore that adjustment in
+ /// cases that should be pinned to the stack pointer, such as outgoing
+ /// arguments to calls.
+ bool IgnoreStackAdjust = false;
};
/// VariableSplit is a way to treat an f64 memory location as a pair of i32
Legal_Reg = 1 << 0, // physical register, not stack location
Legal_Imm = 1 << 1,
Legal_Mem = 1 << 2, // includes [eax+4*ecx] as well as [esp+12]
- Legal_All = ~Legal_None
+ Legal_Rematerializable = 1 << 3,
+ Legal_All = ~Legal_Rematerializable
};
using LegalMask = uint32_t;
Operand *legalize(Operand *From, LegalMask Allowed = Legal_All,
template <class Machine> void TargetX86Base<Machine>::translateO2() {
TimerMarker T(TimerStack::TT_O2, Func);
+ // Merge Alloca instructions, and lay out the stack.
+ static constexpr bool SortAndCombineAllocas = true;
+ Func->processAllocas(SortAndCombineAllocas);
+ Func->dump("After Alloca processing");
+
if (!Ctx->getFlags().getPhiEdgeSplit()) {
// Lower Phi instructions.
Func->placePhiLoads();
template <class Machine> void TargetX86Base<Machine>::translateOm1() {
TimerMarker T(TimerStack::TT_Om1, Func);
+ // Do not merge Alloca instructions, and lay out the stack.
+ static constexpr bool SortAndCombineAllocas = false;
+ Func->processAllocas(SortAndCombineAllocas);
+ Func->dump("After Alloca processing");
+
Func->placePhiLoads();
if (Func->hasError())
return;
template <class Machine>
void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) {
if (!Inst->getKnownFrameOffset())
- IsEbpBasedFrame = true;
+ setHasFramePointer();
// Conservatively require the stack to be aligned. Some stack adjustment
// operations implemented below assume that the stack is aligned before the
// alloca. All the alloca code ensures that the stack alignment is preserved
uint32_t Alignment =
std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES);
if (Alignment > Traits::X86_STACK_ALIGNMENT_BYTES) {
+ setHasFramePointer();
_and(esp, Ctx->getConstantInt32(-Alignment));
}
if (const auto *ConstantTotalSize =
Variable *RegBase = nullptr;
Variable *RegIndex = nullptr;
if (Base) {
- RegBase = legalizeToReg(Base);
+ RegBase = llvm::cast<Variable>(
+ legalize(Base, Legal_Reg | Legal_Rematerializable));
}
if (Index) {
- RegIndex = legalizeToReg(Index);
+ RegIndex = llvm::cast<Variable>(
+ legalize(Index, Legal_Reg | Legal_Rematerializable));
}
if (Base != RegBase || Index != RegIndex) {
Mem = Traits::X86OperandMem::create(Func, Ty, RegBase, Mem->getOffset(),
// either when the variable is pre-colored or when it is assigned infinite
// weight.
bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
+ bool MustRematerialize =
+ (Var->isRematerializable() && !(Allowed & Legal_Rematerializable));
// We need a new physical register for the operand if:
- // Mem is not allowed and Var isn't guaranteed a physical
- // register, or
- // RegNum is required and Var->getRegNum() doesn't match.
- if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
- (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
+ // - Mem is not allowed and Var isn't guaranteed a physical register, or
+ // - RegNum is required and Var->getRegNum() doesn't match, or
+ // - Var is a rematerializable variable and rematerializable pass-through is
+ // not allowed (in which case we need an lea instruction).
+ if (MustRematerialize) {
+ assert(Ty == IceType_i32);
+ Variable *NewVar = makeReg(Ty, RegNum);
+ // Since Var is rematerializable, the offset will be added when the lea is
+ // emitted.
+ constexpr Constant *NoOffset = nullptr;
+ auto *Mem = Traits::X86OperandMem::create(Func, Ty, Var, NoOffset);
+ _lea(NewVar, Mem);
+ From = NewVar;
+ } else if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
+ (RegNum != Variable::NoRegister && RegNum != Var->getRegNum()) ||
+ MustRematerialize) {
From = copyToReg(From, RegNum);
}
return From;
--- /dev/null
+; This is a basic test of the alloca instruction and a call.
+
+; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
+; RUN: --target x8632 -i %s --args -O2 -allow-externally-defined-symbols \
+; RUN: | %if --need=target_X8632 --command FileCheck %s
+
+declare void @copy(i32 %arg1, i8* %arr1, i8* %arr2, i8* %arr3, i8* %arr4);
+
+; Test that alloca base addresses get passed correctly to functions.
+define internal void @caller1(i32 %arg) {
+entry:
+ %a1 = alloca i8, i32 32, align 4
+ %p1 = bitcast i8* %a1 to i32*
+ store i32 %arg, i32* %p1, align 1
+ call void @copy(i32 %arg, i8* %a1, i8* %a1, i8* %a1, i8* %a1)
+ ret void
+}
+
+; CHECK-LABEL: caller1
+; CHECK-NEXT: sub esp,0xc
+; CHECK-NEXT: mov eax,DWORD PTR [esp+0x10]
+; CHECK-NEXT: sub esp,0x20
+; CHECK-NEXT: mov ecx,esp
+; CHECK-NEXT: mov DWORD PTR [esp],eax
+; CHECK-NEXT: sub esp,0x20
+; CHECK-NEXT: mov DWORD PTR [esp],eax
+; CHECK-NEXT: lea eax,[esp+0x20]
+; CHECK-NEXT: mov DWORD PTR [esp+0x4],eax
+; CHECK-NEXT: lea eax,[esp+0x20]
+; CHECK-NEXT: mov DWORD PTR [esp+0x8],eax
+; CHECK-NEXT: lea eax,[esp+0x20]
+; CHECK-NEXT: mov DWORD PTR [esp+0xc],eax
+; CHECK-NEXT: lea eax,[esp+0x20]
+; CHECK-NEXT: mov DWORD PTR [esp+0x10],eax
+; CHECK-NEXT: call
+; CHECK-NEXT: add esp,0x20
+; CHECK-NEXT: add esp,0x2c
+; CHECK-NEXT: ret
+
+; Test that alloca base addresses get passed correctly to functions.
+define internal void @caller2(i32 %arg) {
+entry:
+ %a1 = alloca i8, i32 32, align 4
+ %a2 = alloca i8, i32 32, align 4
+ %p1 = bitcast i8* %a1 to i32*
+ %p2 = bitcast i8* %a2 to i32*
+ store i32 %arg, i32* %p1, align 1
+ store i32 %arg, i32* %p2, align 1
+ call void @copy(i32 %arg, i8* %a1, i8* %a2, i8* %a1, i8* %a2)
+ ret void
+}
+
+; CHECK-LABEL: caller2
+; CHECK-NEXT: sub esp,0xc
+; CHECK-NEXT: mov eax,DWORD PTR [esp+0x10]
+; CHECK-NEXT: sub esp,0x40
+; CHECK-NEXT: mov ecx,esp
+; CHECK-NEXT: mov DWORD PTR [esp],eax
+; CHECK-NEXT: mov DWORD PTR [esp+0x20],eax
+; CHECK-NEXT: sub esp,0x20
+; CHECK-NEXT: mov DWORD PTR [esp],eax
+; CHECK-NEXT: lea eax,[esp+0x20]
+; CHECK-NEXT: mov DWORD PTR [esp+0x4],eax
+; CHECK-NEXT: lea eax,[esp+0x40]
+; CHECK-NEXT: mov DWORD PTR [esp+0x8],eax
+; CHECK-NEXT: lea eax,[esp+0x20]
+; CHECK-NEXT: mov DWORD PTR [esp+0xc],eax
+; CHECK-NEXT: lea eax,[esp+0x40]
+; CHECK-NEXT: mov DWORD PTR [esp+0x10],eax
+; CHECK-NEXT: call
+; CHECK-NEXT: add esp,0x20
+; CHECK-NEXT: add esp,0x4c
+; CHECK-NEXT: ret
--- /dev/null
+; This is a basic test of the alloca instruction.
+
+; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
+; RUN: --target x8632 -i %s --args -O2 -allow-externally-defined-symbols \
+; RUN: | %if --need=target_X8632 --command FileCheck %s
+
+; Test that a sequence of allocas with less than stack alignment get fused.
+define internal void @fused_small_align(i32 %arg) {
+entry:
+ %a1 = alloca i8, i32 8, align 4
+ %a2 = alloca i8, i32 12, align 4
+ %a3 = alloca i8, i32 16, align 8
+ %p1 = bitcast i8* %a1 to i32*
+ %p2 = bitcast i8* %a2 to i32*
+ %p3 = bitcast i8* %a3 to i32*
+ store i32 %arg, i32* %p1, align 1
+ store i32 %arg, i32* %p2, align 1
+ store i32 %arg, i32* %p3, align 1
+ ret void
+}
+; CHECK-LABEL: fused_small_align
+; CHECK-NEXT: sub esp,0xc
+; CHECK-NEXT: mov eax,DWORD PTR [esp+0x10]
+; CHECK-NEXT: sub esp,0x30
+; CHECK-NEXT: mov {{.*}},esp
+; CHECK-NEXT: mov DWORD PTR [esp+0x10],eax
+; CHECK-NEXT: mov DWORD PTR [esp+0x18],eax
+; CHECK-NEXT: mov DWORD PTR [esp],eax
+; CHECK-NEXT: add esp,0x3c
+
+; Test that a sequence of allocas with greater than stack alignment get fused.
+define internal void @fused_large_align(i32 %arg) {
+entry:
+ %a1 = alloca i8, i32 8, align 32
+ %a2 = alloca i8, i32 12, align 64
+ %a3 = alloca i8, i32 16, align 32
+ %p1 = bitcast i8* %a1 to i32*
+ %p2 = bitcast i8* %a2 to i32*
+ %p3 = bitcast i8* %a3 to i32*
+ store i32 %arg, i32* %p1, align 1
+ store i32 %arg, i32* %p2, align 1
+ store i32 %arg, i32* %p3, align 1
+ ret void
+}
+; CHECK-LABEL: fused_large_align
+; CHECK-NEXT: push ebp
+; CHECK-NEXT: mov ebp,esp
+; CHECK-NEXT: sub esp,0x8
+; CHECK-NEXT: mov eax,DWORD PTR [ebp+0x8]
+; CHECK-NEXT: and esp,0xffffffc0
+; CHECK-NEXT: sub esp,0x80
+; CHECK-NEXT: mov ecx,esp
+; CHECK-NEXT: mov DWORD PTR [esp+0x40],eax
+; CHECK-NEXT: mov DWORD PTR [esp],eax
+; CHECK-NEXT: mov DWORD PTR [esp+0x60],eax
+; CHECK-NEXT: mov esp,ebp
+; CHECK-NEXT: pop ebp
}
; CHECK-LABEL: test_fused_load_sub_a
; alloca store
-; CHECK: mov {{.*}},esp
; CHECK: mov DWORD PTR {{.*}},0x3e7
; atomic store (w/ its own mfence)
; The load + sub are optimized into one everywhere.
}
; CHECK-LABEL: test_fused_load_sub_b
; alloca store
-; CHECK: mov {{.*}},esp
; CHECK: mov DWORD PTR {{.*}},0x3e7
; atomic store (w/ its own mfence)
; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a
}
; CHECK-LABEL: test_fused_load_sub_c
; alloca store
-; CHECK: mov {{.*}},esp
; CHECK: mov DWORD PTR {{.*}},0x3e7
; atomic store (w/ its own mfence)
; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a