# Not specifying -Wl,--gc-sections but instead doing bitcode linking GC w/ LTO.
SB_LDFLAGS := $(LINKOPTLEVEL) $(LD_EXTRA)
+# List the target-specific source files first, which generally take longer to
+# compile, in the hope of improving parallel build time.
SRCS = \
- IceAssembler.cpp \
IceAssemblerARM32.cpp \
+ IceInstARM32.cpp \
+ IceInstMIPS32.cpp \
+ IceInstX8632.cpp \
+ IceInstX8664.cpp \
+ IceTargetLowering.cpp \
+ IceTargetLoweringARM32.cpp \
+ IceTargetLoweringMIPS32.cpp \
+ IceTargetLoweringX8632.cpp \
+ IceTargetLoweringX8664.cpp \
+ IceAssembler.cpp \
IceBrowserCompileServer.cpp \
IceCfg.cpp \
IceCfgNode.cpp \
IceGlobalContext.cpp \
IceGlobalInits.cpp \
IceInst.cpp \
- IceInstARM32.cpp \
- IceInstMIPS32.cpp \
- IceInstX8632.cpp \
- IceInstX8664.cpp \
IceIntrinsics.cpp \
IceLiveness.cpp \
IceLoopAnalyzer.cpp \
IceRegAlloc.cpp \
IceRNG.cpp \
IceSwitchLowering.cpp \
- IceTargetLowering.cpp \
- IceTargetLoweringARM32.cpp \
- IceTargetLoweringMIPS32.cpp \
- IceTargetLoweringX8632.cpp \
- IceTargetLoweringX8664.cpp \
IceThreading.cpp \
IceTimerTree.cpp \
IceTranslator.cpp \
check-unit: $(OBJDIR)/run_unittests
$(OBJDIR)/run_unittests
-ALLSPEC := 177.mesa 179.art 183.equake 188.ammp 164.gzip 175.vpr 176.gcc \
- 181.mcf 186.crafty 197.parser 253.perlbmk 254.gap 255.vortex \
- 256.bzip2 300.twolf 252.eon
+# List the spec2k components in roughly reverse order of runtime, to help with
+# parallel execution speed.
+ALLSPEC := 253.perlbmk 177.mesa 188.ammp 256.bzip2 164.gzip 179.art 183.equake \
+ 175.vpr 176.gcc 181.mcf 186.crafty 197.parser 254.gap 255.vortex \
+ 300.twolf 252.eon
.PHONY: $(ALLSPEC)
TARGET := x8632
SPEC := -O2 --filetype=asm
endif
-%.spec2k: %
+%.spec2k: % $(OBJDIR)/pnacl-sz make_symlink runtime
./pydir/szbuild_spec2k.py -v --force --target=$(TARGETFLAG) $(SPEC) $<
( cd ../../../tests/spec2k; \
./run_all.sh RunTimedBenchmarks $(SETUP) train $< )
-check-spec: $(OBJDIR)/pnacl-sz make_symlink $(ALLSPEC:=.spec2k)
+check-spec: $(ALLSPEC:=.spec2k)
check: check-lit check-unit check-xtest
return;
// Sort by decreasing alignment. This does not really matter at the moment,
// but will allow compacting stack allocation when we fuse to one alloca.
- std::sort(Allocas.begin(), Allocas.end(),
- [](Inst *I1, Inst *I2) {
- auto *A1 = llvm::dyn_cast<InstAlloca>(I1);
- auto *A2 = llvm::dyn_cast<InstAlloca>(I2);
- return A1->getAlignInBytes() > A2->getAlignInBytes();
- });
- for (Inst *Instr: Allocas) {
+ std::sort(Allocas.begin(), Allocas.end(), [](Inst *I1, Inst *I2) {
+ auto *A1 = llvm::dyn_cast<InstAlloca>(I1);
+ auto *A2 = llvm::dyn_cast<InstAlloca>(I2);
+ return A1->getAlignInBytes() > A2->getAlignInBytes();
+ });
+ for (Inst *Instr : Allocas) {
auto *Alloca = llvm::cast<InstAlloca>(Instr);
// Move the alloca to its sorted position.
- InstAlloca *NewAlloca = InstAlloca::create(this,
- Alloca->getSizeInBytes(),
- Alloca->getAlignInBytes(),
- Alloca->getDest());
+ InstAlloca *NewAlloca =
+ InstAlloca::create(this, Alloca->getSizeInBytes(),
+ Alloca->getAlignInBytes(), Alloca->getDest());
if (IsKnownFrameOffset)
NewAlloca->setKnownFrameOffset();
Insts.push_front(NewAlloca);
// Allocations aligned more than the stack require a frame pointer.
RequiresFramePointer = true;
AlignedAllocas.push_back(Alloca);
- }
- else
+ } else
FixedAllocas.push_back(Alloca);
}
}
PhiDesc() = delete;
PhiDesc(const PhiDesc &) = delete;
PhiDesc &operator=(const PhiDesc &) = delete;
+
public:
PhiDesc(InstPhi *Phi, Variable *Dest) : Phi(Phi), Dest(Dest) {}
PhiDesc(PhiDesc &&) = default;
if (Item2.Processed)
continue;
// There shouldn't be two different Phis with the same Dest variable or
- // register.
+ // register.
assert((&Item == &Item2) || !sameVarOrReg(Target, Dest, Item2.Dest));
if (sameVarOrReg(Target, Dest, Item2.Src))
++Item.NumPred;
X(Reg_bl, 3, "bl", Reg_ebx, 0,1,0,0, 1,0,0,0,1, 0, 0,0,0,1,1, \
REGLIST2(RegX8632, ebx, bx)) \
/* High 8-bit registers */ \
- X(Reg_ah, 4, "ah", Reg_eax, 1,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,1, \
+ X(Reg_ah, 4, "ah", Reg_eax, 1,0,0,0, 1,0,0,0,1, 0, 0,0,0,0,1, \
REGLIST2(RegX8632, eax, ax)) \
- X(Reg_ch, 5, "ch", Reg_ecx, 1,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,1, \
+ X(Reg_ch, 5, "ch", Reg_ecx, 1,0,0,0, 1,0,0,0,1, 0, 0,0,0,0,1, \
REGLIST2(RegX8632, ecx, cx)) \
- X(Reg_dh, 6, "dh", Reg_edx, 1,0,0,0, 1,0,0,0,0, 0, 0,0,0,0,1, \
+ X(Reg_dh, 6, "dh", Reg_edx, 1,0,0,0, 1,0,0,0,1, 0, 0,0,0,0,1, \
REGLIST2(RegX8632, edx, dx)) \
- X(Reg_bh, 7, "bh", Reg_ebx, 0,1,0,0, 1,0,0,0,0, 0, 0,0,0,0,1, \
+ X(Reg_bh, 7, "bh", Reg_ebx, 0,1,0,0, 1,0,0,0,1, 0, 0,0,0,0,1, \
REGLIST2(RegX8632, ebx, bx)) \
/* End of 8-bit register set */
//#define X(val, encode, name, base, scratch, preserved, stackptr, frameptr,
//#define X(val, emit)
#define ICETYPEX8632_TABLE \
- /* tag, element type, cvt , sdss, pack, width, fld */ \
- X(IceType_void, IceType_void, "?", "", "", "", "") \
- X(IceType_i1, IceType_void, "si", "", "", "b", "") \
- X(IceType_i8, IceType_void, "si", "", "", "b", "") \
- X(IceType_i16, IceType_void, "si", "", "", "w", "") \
- X(IceType_i32, IceType_void, "si", "", "", "l", "") \
- X(IceType_i64, IceType_void, "si", "", "", "q", "") \
- X(IceType_f32, IceType_void, "ss", "ss", "d", "", "s") \
- X(IceType_f64, IceType_void, "sd", "sd", "q", "", "l") \
- X(IceType_v4i1, IceType_i32, "?", "", "d", "", "") \
- X(IceType_v8i1, IceType_i16, "?", "", "w", "", "") \
- X(IceType_v16i1, IceType_i8, "?", "", "b", "", "") \
- X(IceType_v16i8, IceType_i8, "?", "", "b", "", "") \
- X(IceType_v8i16, IceType_i16, "?", "", "w", "", "") \
- X(IceType_v4i32, IceType_i32, "dq", "", "d", "", "") \
- X(IceType_v4f32, IceType_f32, "ps", "", "d", "", "")
+ /* tag, element type, cvt , sdss, pack, width, fld */ \
+ X(void, void, "?", "", "", "", "") \
+ X(i1, void, "si", "", "", "b", "") \
+ X(i8, void, "si", "", "", "b", "") \
+ X(i16, void, "si", "", "", "w", "") \
+ X(i32, void, "si", "", "", "l", "") \
+ X(i64, void, "si", "", "", "q", "") \
+ X(f32, void, "ss", "ss", "d", "", "s") \
+ X(f64, void, "sd", "sd", "q", "", "l") \
+ X(v4i1, i32, "?", "", "d", "", "") \
+ X(v8i1, i16, "?", "", "w", "", "") \
+ X(v16i1, i8, "?", "", "b", "", "") \
+ X(v16i8, i8, "?", "", "b", "", "") \
+ X(v8i16, i16, "?", "", "w", "", "") \
+ X(v4i32, i32, "dq", "", "d", "", "") \
+ X(v4f32, f32, "ps", "", "d", "", "")
//#define X(tag, elementty, cvt, sdss, pack, width, fld)
#endif // SUBZERO_SRC_ICEINSTX8632_DEF
//#define X(val, emit)
#define ICETYPEX8664_TABLE \
- /* tag , element type, cvt , sdss, pack, width, fld */ \
- X(IceType_void, IceType_void, "?", "", "", "", "") \
- X(IceType_i1, IceType_void, "si", "", "", "b", "") \
- X(IceType_i8, IceType_void, "si", "", "", "b", "") \
- X(IceType_i16, IceType_void, "si", "", "", "w", "") \
- X(IceType_i32, IceType_void, "si", "", "", "l", "") \
- X(IceType_i64, IceType_void, "si", "", "", "q", "") \
- X(IceType_f32, IceType_void, "ss", "ss", "d", "", "s") \
- X(IceType_f64, IceType_void, "sd", "sd", "q", "", "l") \
- X(IceType_v4i1, IceType_i32, "?", "", "d", "", "") \
- X(IceType_v8i1, IceType_i16, "?", "", "w", "", "") \
- X(IceType_v16i1, IceType_i8, "?", "", "b", "", "") \
- X(IceType_v16i8, IceType_i8, "?", "", "b", "", "") \
- X(IceType_v8i16, IceType_i16, "?", "", "w", "", "") \
- X(IceType_v4i32, IceType_i32, "dq", "", "d", "", "") \
- X(IceType_v4f32, IceType_f32, "ps", "", "d", "", "")
+ /* tag, element type, cvt , sdss, pack, width, fld */ \
+ X(void, void, "?", "", "", "", "") \
+ X(i1, void, "si", "", "", "b", "") \
+ X(i8, void, "si", "", "", "b", "") \
+ X(i16, void, "si", "", "", "w", "") \
+ X(i32, void, "si", "", "", "l", "") \
+ X(i64, void, "si", "", "", "q", "") \
+ X(f32, void, "ss", "ss", "d", "", "s") \
+ X(f64, void, "sd", "sd", "q", "", "l") \
+ X(v4i1, i32, "?", "", "d", "", "") \
+ X(v8i1, i16, "?", "", "w", "", "") \
+ X(v16i1, i8, "?", "", "b", "", "") \
+ X(v16i8, i8, "?", "", "b", "", "") \
+ X(v8i16, i16, "?", "", "w", "", "") \
+ X(v4i32, i32, "dq", "", "d", "", "") \
+ X(v4f32, f32, "ps", "", "d", "", "")
//#define X(tag, elementty, cvt, sdss, pack, width, fld)
#endif // SUBZERO_SRC_ICEINSTX8664_DEF
Ostream &Str = Func->getContext()->getStrEmit();
assert(this->getSrcSize() == 1);
Operand *Src0 = this->getSrc(0);
- assert(llvm::isa<Variable>(Src0));
+ int32_t DestReg = this->getDest()->getRegNum();
+ int32_t SrcReg = llvm::cast<Variable>(Src0)->getRegNum();
+ (void)DestReg;
+ (void)SrcReg;
switch (Src0->getType()) {
default:
llvm_unreachable("unexpected source type!");
break;
case IceType_i8:
- assert(llvm::cast<Variable>(Src0)->getRegNum() ==
- InstX86Base<Machine>::Traits::RegisterSet::Reg_al);
- assert(this->getDest()->getRegNum() ==
- InstX86Base<Machine>::Traits::RegisterSet::Reg_ax);
+ assert(SrcReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_al);
+ assert(DestReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_ax ||
+ DestReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_ah);
Str << "\t"
<< "cbtw";
break;
case IceType_i16:
- assert(llvm::cast<Variable>(Src0)->getRegNum() ==
- InstX86Base<Machine>::Traits::RegisterSet::Reg_ax);
- assert(this->getDest()->getRegNum() ==
- InstX86Base<Machine>::Traits::RegisterSet::Reg_dx);
+ assert(SrcReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_ax);
+ assert(DestReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_dx);
Str << "\t"
<< "cwtd";
break;
case IceType_i32:
- assert(llvm::cast<Variable>(Src0)->getRegNum() ==
- InstX86Base<Machine>::Traits::RegisterSet::Reg_eax);
- assert(this->getDest()->getRegNum() ==
- InstX86Base<Machine>::Traits::RegisterSet::Reg_edx);
+ assert(SrcReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_eax);
+ assert(DestReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_edx);
Str << "\t"
<< "cltd";
break;
case IceType_i64:
- assert(this->getDest()->getRegNum() ==
- InstX86Base<Machine>::Traits::RegisterSet::Reg_edx);
+ assert(DestReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_edx);
Str << "\t"
<< "cdto";
break;
Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
assert(this->getSrcSize() == 1);
Operand *Src0 = this->getSrc(0);
- assert(llvm::isa<Variable>(Src0));
+ int32_t DestReg = this->getDest()->getRegNum();
+ int32_t SrcReg = llvm::cast<Variable>(Src0)->getRegNum();
+ (void)DestReg;
+ (void)SrcReg;
switch (Src0->getType()) {
default:
llvm_unreachable("unexpected source type!");
break;
case IceType_i8:
- assert(llvm::cast<Variable>(Src0)->getRegNum() ==
- InstX86Base<Machine>::Traits::RegisterSet::Reg_al);
- assert(this->getDest()->getRegNum() ==
- InstX86Base<Machine>::Traits::RegisterSet::Reg_ax);
+ assert(SrcReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_al);
+ assert(DestReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_ax ||
+ DestReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_ah);
Asm->cbw();
break;
case IceType_i16:
- assert(llvm::cast<Variable>(Src0)->getRegNum() ==
- InstX86Base<Machine>::Traits::RegisterSet::Reg_ax);
- assert(this->getDest()->getRegNum() ==
- InstX86Base<Machine>::Traits::RegisterSet::Reg_dx);
+ assert(SrcReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_ax);
+ assert(DestReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_dx);
Asm->cwd();
break;
case IceType_i32:
- assert(llvm::cast<Variable>(Src0)->getRegNum() ==
- InstX86Base<Machine>::Traits::RegisterSet::Reg_eax);
- assert(this->getDest()->getRegNum() ==
- InstX86Base<Machine>::Traits::RegisterSet::Reg_edx);
+ assert(SrcReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_eax);
+ assert(DestReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_edx);
Asm->cdq();
break;
case IceType_i64:
- assert(this->getDest()->getRegNum() ==
- InstX86Base<Machine>::Traits::RegisterSet::Reg_edx);
+ assert(DestReg == InstX86Base<Machine>::Traits::RegisterSet::Reg_edx);
Asm->cqo();
break;
}
} else {
Str << "\tmov"
<< (!isScalarFloatingType(DestTy)
- ? this->getWidthString(SrcTy)
+ ? this->getWidthString(DestTy)
: InstX86Base<Machine>::Traits::TypeAttributes[DestTy]
.SdSsString) << "\t";
}
- // For an integer truncation operation, src is wider than dest. Ideally, we
- // use a mov instruction whose data width matches the narrower dest. This is
- // a problem if e.g. src is a register like esi or si where there is no 8-bit
- // version of the register. To be safe, we instead widen the dest to match
- // src. This works even for stack-allocated dest variables because
- // typeWidthOnStack() pads to a 4-byte boundary even if only a lower portion
- // is used.
+ // For an integer truncation operation, src is wider than dest. In this case,
+ // we use a mov instruction whose data width matches the narrower dest.
// TODO: This assert disallows usages such as copying a floating
// point value between a vector and a scalar (which movss is used for). Clean
// this up.
assert(Func->getTarget()->typeWidthInBytesOnStack(DestTy) ==
Func->getTarget()->typeWidthInBytesOnStack(SrcTy));
- Src->emit(Func);
+ const Operand *NewSrc = Src;
+ if (auto *SrcVar = llvm::dyn_cast<Variable>(Src)) {
+ int32_t NewRegNum = Variable::NoRegister;
+ if (SrcVar->hasReg())
+ NewRegNum = InstX86Base<Machine>::Traits::getGprForType(
+ DestTy, SrcVar->getRegNum());
+ if (SrcTy != DestTy)
+ NewSrc = SrcVar->asType(DestTy, NewRegNum);
+ }
+ NewSrc->emit(Func);
Str << ", ";
- int32_t NewRegNum = Variable::NoRegister;
- if (this->getDest()->hasReg())
- NewRegNum = InstX86Base<Machine>::Traits::getGprForType(
- SrcTy, this->getDest()->getRegNum());
- const Variable *NewDest = SrcTy == DestTy
- ? this->getDest()
- : this->getDest()->asType(SrcTy, NewRegNum);
- NewDest->emit(Func);
+ this->getDest()->emit(Func);
}
template <class Machine>
Machine>::Traits::Assembler::GPREmitterAddrOp GPRAddrEmitter = {
&InstX86Base<Machine>::Traits::Assembler::mov,
&InstX86Base<Machine>::Traits::Assembler::mov};
- // For an integer truncation operation, src is wider than dest. Ideally, we
- // use a mov instruction whose data width matches the narrower dest. This is
- // a problem if e.g. src is a register like esi or si where there is no 8-bit
- // version of the register. To be safe, we instead widen the dest to match
- // src. This works even for stack-allocated dest variables because
- // typeWidthOnStack() pads to a 4-byte boundary even if only a lower portion
- // is used.
+ // For an integer truncation operation, src is wider than dest. In this case,
+ // we use a mov instruction whose data width matches the narrower dest.
// TODO: This assert disallows usages such as copying a floating
// point value between a vector and a scalar (which movss is used for). Clean
// this up.
return;
}
if (isScalarIntegerType(SrcTy)) {
- DestTy = SrcTy;
+ SrcTy = DestTy;
}
emitIASRegOpTyGPR<Machine>(Func, DestTy, Dest, Src, GPRRegEmitter);
return;
Ostream &operator<<(Ostream &Str, const LiveRange &L);
+/// RegClass indicates the physical register class that a Variable may be
+/// register-allocated from. By default, a variable's register class is
+/// directly associated with its type. However, the target lowering may define
+/// additional target-specific register classes by extending the set of enum
+/// values.
+enum RegClass : uint8_t {
+// Define RC_void, RC_i1, RC_i8, etc.
+#define X(tag, sizeLog2, align, elts, elty, str) RC_##tag = IceType_##tag,
+ ICETYPE_TABLE
+#undef X
+ RC_Target,
+ // Leave plenty of space for target-specific values.
+ RC_Max = std::numeric_limits<uint8_t>::max()
+};
+static_assert(RC_Target == static_cast<RegClass>(IceType_NUM),
+ "Expected RC_Target and IceType_NUM to be the same");
+
/// Variable represents an operand that is register-allocated or
/// stack-allocated. If it is register-allocated, it will ultimately have a
/// non-negative RegNum field.
return RegRequirement == RR_MustNotHaveRegister;
}
+ void setRegClass(uint8_t RC) { RegisterClass = static_cast<RegClass>(RC); }
+ RegClass getRegClass() const { return RegisterClass; }
+
LiveRange &getLiveRange() { return Live; }
const LiveRange &getLiveRange() const { return Live; }
void setLiveRange(const LiveRange &Range) { Live = Range; }
protected:
Variable(OperandKind K, Type Ty, SizeT Index)
- : Operand(K, Ty), Number(Index) {
+ : Operand(K, Ty), Number(Index),
+ RegisterClass(static_cast<RegClass>(Ty)) {
Vars = VarsReal;
Vars[0] = this;
NumVars = 1;
/// pointer and other physical registers specifically referenced by name.
bool IgnoreLiveness = false;
RegRequirement RegRequirement = RR_MayHaveRegister;
+ RegClass RegisterClass;
/// RegNum is the allocated register, or NoRegister if it isn't
/// register-allocated.
int32_t RegNum = NoRegister;
Iter.Cur = Unhandled.back();
Unhandled.pop_back();
dumpLiveRangeTrace("\nConsidering ", Iter.Cur);
- Iter.RegMask =
- RegMaskFull & Target->getRegisterSetForType(Iter.Cur->getType());
+ Iter.RegMask = RegMaskFull & Target->getRegistersForVariable(Iter.Cur);
KillsRange.trim(Iter.Cur->getLiveRange().getStart());
// Check for pre-colored ranges. If Cur is pre-colored, it definitely gets
// Disable AllowOverlap if an Active variable, which is not Prefer, shares
// Prefer's register, and has a definition within Cur's live range.
if (Iter.AllowOverlap) {
+ const llvm::SmallBitVector &Aliases = *RegAliases[Iter.PreferReg];
for (const Variable *Item : Active) {
int32_t RegNum = Item->getRegNumTmp();
- // TODO(stichnot): Consider aliases of RegNum. This is probably a
- // correctness issue.
- if (Item != Iter.Prefer && RegNum == Iter.PreferReg &&
+ if (Item != Iter.Prefer && Aliases[RegNum] &&
overlapsDefs(Func, Iter.Cur, Item)) {
Iter.AllowOverlap = false;
dumpDisableOverlap(Func, Item, "Active");
#include "IceDefs.h"
#include "IceInstARM32.def"
+#include "IceOperand.h" // RC_Target
#include "IceTypes.h"
namespace Ice {
static const char *RegNames[];
};
+// Extend enum RegClass with ARM32-specific register classes (if any).
+enum RegClassARM32 : uint8_t { RCARM32_NUM = RC_Target };
+
} // end of namespace Ice
#endif // SUBZERO_SRC_ICEREGISTERSARM32_H
#include "IceDefs.h"
#include "IceInstMIPS32.def"
+#include "IceOperand.h" // RC_Target
#include "IceTypes.h"
namespace Ice {
} // end of namespace RegMIPS32
+// Extend enum RegClass with MIPS32-specific register classes (if any).
+enum RegClassMIPS32 : uint8_t { RCMIPS32_NUM = RC_Target };
+
} // end of namespace Ice
#endif // SUBZERO_SRC_ICEREGISTERSMIPS32_H
virtual llvm::SmallBitVector getRegisterSet(RegSetMask Include,
RegSetMask Exclude) const = 0;
- virtual const llvm::SmallBitVector &getRegisterSetForType(Type Ty) const = 0;
+ virtual const llvm::SmallBitVector &
+ getRegistersForVariable(const Variable *Var) const = 0;
virtual const llvm::SmallBitVector &getAliasesForRegister(SizeT) const = 0;
void regAlloc(RegAllocKind Kind);
IceString getRegName(SizeT RegNum, Type Ty) const override;
llvm::SmallBitVector getRegisterSet(RegSetMask Include,
RegSetMask Exclude) const override;
- const llvm::SmallBitVector &getRegisterSetForType(Type Ty) const override {
- return TypeToRegisterSet[Ty];
+ const llvm::SmallBitVector &
+ getRegistersForVariable(const Variable *Var) const override {
+ RegClass RC = Var->getRegClass();
+ assert(RC < RC_Target);
+ return TypeToRegisterSet[RC];
}
const llvm::SmallBitVector &getAliasesForRegister(SizeT Reg) const override {
return RegisterAliases[Reg];
bool MaybeLeafFunc = true;
size_t SpillAreaSizeBytes = 0;
// TODO(jpp): std::array instead of array.
- static llvm::SmallBitVector TypeToRegisterSet[IceType_NUM];
+ static llvm::SmallBitVector TypeToRegisterSet[RCARM32_NUM];
static llvm::SmallBitVector RegisterAliases[RegARM32::Reg_NUM];
static llvm::SmallBitVector ScratchRegs;
llvm::SmallBitVector RegsUsed;
IceString getRegName(SizeT RegNum, Type Ty) const override;
llvm::SmallBitVector getRegisterSet(RegSetMask Include,
RegSetMask Exclude) const override;
- const llvm::SmallBitVector &getRegisterSetForType(Type Ty) const override {
- return TypeToRegisterSet[Ty];
+ const llvm::SmallBitVector &
+ getRegistersForVariable(const Variable *Var) const override {
+ RegClass RC = Var->getRegClass();
+ assert(RC < RC_Target);
+ return TypeToRegisterSet[RC];
}
const llvm::SmallBitVector &getAliasesForRegister(SizeT Reg) const override {
return RegisterAliases[Reg];
bool UsesFramePointer = false;
bool NeedsStackAlignment = false;
- static llvm::SmallBitVector TypeToRegisterSet[IceType_NUM];
+ static llvm::SmallBitVector TypeToRegisterSet[RCMIPS32_NUM];
static llvm::SmallBitVector RegisterAliases[RegMIPS32::Reg_NUM];
static llvm::SmallBitVector ScratchRegs;
llvm::SmallBitVector RegsUsed;
const MachineTraits<TargetX8632>::TableTypeX8632AttributesType
MachineTraits<TargetX8632>::TableTypeX8632Attributes[] = {
#define X(tag, elementty, cvt, sdss, pack, width, fld) \
- { elementty } \
+ { IceType_##elementty } \
,
ICETYPEX8632_TABLE
#undef X
const char *MachineTraits<TargetX8632>::TargetName = "X8632";
template <>
-std::array<llvm::SmallBitVector, IceType_NUM>
+std::array<llvm::SmallBitVector, RCX86_NUM>
TargetX86Base<TargetX8632>::TypeToRegisterSet = {};
template <>
};
// Define a set of constants based on high-level table entries.
#define X(tag, sizeLog2, align, elts, elty, str) \
- static const int _table1_##tag = tag;
+ static const int _table1_##tag = IceType_##tag;
ICETYPE_TABLE
#undef X
// Define a set of constants based on low-level table entries, and ensure the
#include "IceInstX8632.def"
#include "IceOperand.h"
#include "IceRegistersX8632.h"
-#include "IceTargetLoweringX8632.def"
#include "IceTargetLowering.h"
+#include "IceTargetLoweringX8632.def"
+#include "IceTargetLoweringX86RegClass.h"
#include <array>
}
static void initRegisterSet(
- std::array<llvm::SmallBitVector, IceType_NUM> *TypeToRegisterSet,
+ std::array<llvm::SmallBitVector, RCX86_NUM> *TypeToRegisterSet,
std::array<llvm::SmallBitVector, RegisterSet::Reg_NUM> *RegisterAliases,
llvm::SmallBitVector *ScratchRegs) {
llvm::SmallBitVector IntegerRegistersI32(RegisterSet::Reg_NUM);
llvm::SmallBitVector IntegerRegistersI8(RegisterSet::Reg_NUM);
llvm::SmallBitVector FloatRegisters(RegisterSet::Reg_NUM);
llvm::SmallBitVector VectorRegisters(RegisterSet::Reg_NUM);
+ llvm::SmallBitVector Trunc64To8Registers(RegisterSet::Reg_NUM);
+ llvm::SmallBitVector Trunc32To8Registers(RegisterSet::Reg_NUM);
+ llvm::SmallBitVector Trunc16To8Registers(RegisterSet::Reg_NUM);
+ llvm::SmallBitVector Trunc8RcvrRegisters(RegisterSet::Reg_NUM);
+ llvm::SmallBitVector AhRcvrRegisters(RegisterSet::Reg_NUM);
llvm::SmallBitVector InvalidRegisters(RegisterSet::Reg_NUM);
ScratchRegs->resize(RegisterSet::Reg_NUM);
#define X(val, encode, name, base, scratch, preserved, stackptr, frameptr, \
(IntegerRegistersI8)[RegisterSet::val] = is8; \
(FloatRegisters)[RegisterSet::val] = isXmm; \
(VectorRegisters)[RegisterSet::val] = isXmm; \
+ (Trunc64To8Registers)[RegisterSet::val] = is64To8; \
+ (Trunc32To8Registers)[RegisterSet::val] = is32To8; \
+ (Trunc16To8Registers)[RegisterSet::val] = is16To8; \
+ (Trunc8RcvrRegisters)[RegisterSet::val] = isTrunc8Rcvr; \
+ (AhRcvrRegisters)[RegisterSet::val] = isAhRcvr; \
(*RegisterAliases)[RegisterSet::val].resize(RegisterSet::Reg_NUM); \
for (SizeT RegAlias : aliases) { \
assert(!(*RegisterAliases)[RegisterSet::val][RegAlias] && \
REGX8632_TABLE;
#undef X
- (*TypeToRegisterSet)[IceType_void] = InvalidRegisters;
- (*TypeToRegisterSet)[IceType_i1] = IntegerRegistersI8;
- (*TypeToRegisterSet)[IceType_i8] = IntegerRegistersI8;
- (*TypeToRegisterSet)[IceType_i16] = IntegerRegistersI16;
- (*TypeToRegisterSet)[IceType_i32] = IntegerRegistersI32;
- (*TypeToRegisterSet)[IceType_i64] = IntegerRegistersI32;
- (*TypeToRegisterSet)[IceType_f32] = FloatRegisters;
- (*TypeToRegisterSet)[IceType_f64] = FloatRegisters;
- (*TypeToRegisterSet)[IceType_v4i1] = VectorRegisters;
- (*TypeToRegisterSet)[IceType_v8i1] = VectorRegisters;
- (*TypeToRegisterSet)[IceType_v16i1] = VectorRegisters;
- (*TypeToRegisterSet)[IceType_v16i8] = VectorRegisters;
- (*TypeToRegisterSet)[IceType_v8i16] = VectorRegisters;
- (*TypeToRegisterSet)[IceType_v4i32] = VectorRegisters;
- (*TypeToRegisterSet)[IceType_v4f32] = VectorRegisters;
+ (*TypeToRegisterSet)[RC_void] = InvalidRegisters;
+ (*TypeToRegisterSet)[RC_i1] = IntegerRegistersI8;
+ (*TypeToRegisterSet)[RC_i8] = IntegerRegistersI8;
+ (*TypeToRegisterSet)[RC_i16] = IntegerRegistersI16;
+ (*TypeToRegisterSet)[RC_i32] = IntegerRegistersI32;
+ (*TypeToRegisterSet)[RC_i64] = IntegerRegistersI32;
+ (*TypeToRegisterSet)[RC_f32] = FloatRegisters;
+ (*TypeToRegisterSet)[RC_f64] = FloatRegisters;
+ (*TypeToRegisterSet)[RC_v4i1] = VectorRegisters;
+ (*TypeToRegisterSet)[RC_v8i1] = VectorRegisters;
+ (*TypeToRegisterSet)[RC_v16i1] = VectorRegisters;
+ (*TypeToRegisterSet)[RC_v16i8] = VectorRegisters;
+ (*TypeToRegisterSet)[RC_v8i16] = VectorRegisters;
+ (*TypeToRegisterSet)[RC_v4i32] = VectorRegisters;
+ (*TypeToRegisterSet)[RC_v4f32] = VectorRegisters;
+ (*TypeToRegisterSet)[RCX86_Is64To8] = Trunc64To8Registers;
+ (*TypeToRegisterSet)[RCX86_Is32To8] = Trunc32To8Registers;
+ (*TypeToRegisterSet)[RCX86_Is16To8] = Trunc16To8Registers;
+ (*TypeToRegisterSet)[RCX86_IsTrunc8Rcvr] = Trunc8RcvrRegisters;
+ (*TypeToRegisterSet)[RCX86_IsAhRcvr] = AhRcvrRegisters;
}
static llvm::SmallBitVector
Index |= (is8 << (AttrKey++)); \
Index |= (is16 << (AttrKey++)); \
Index |= (is32 << (AttrKey++)); \
+ Index |= (is64 << (AttrKey++)); \
Index |= (isXmm << (AttrKey++)); \
+ Index |= (is16To8 << (AttrKey++)); \
+ Index |= (is32To8 << (AttrKey++)); \
+ Index |= (is64To8 << (AttrKey++)); \
+ Index |= (isTrunc8Rcvr << (AttrKey++)); \
/* val is assigned to an equivalence class based on its properties. */ \
EquivalenceClasses[Index].push_back(RegisterSet::val); \
}
const MachineTraits<TargetX8664>::TableTypeX8664AttributesType
MachineTraits<TargetX8664>::TableTypeX8664Attributes[] = {
#define X(tag, elementty, cvt, sdss, pack, width, fld) \
- { elementty } \
+ { IceType_##elementty } \
,
ICETYPEX8664_TABLE
#undef X
const char *MachineTraits<TargetX8664>::TargetName = "X8664";
template <>
-std::array<llvm::SmallBitVector, IceType_NUM>
+std::array<llvm::SmallBitVector, RCX86_NUM>
TargetX86Base<TargetX8664>::TypeToRegisterSet = {};
template <>
};
// Define a set of constants based on high-level table entries.
#define X(tag, sizeLog2, align, elts, elty, str) \
- static const int _table1_##tag = tag;
+ static const int _table1_##tag = IceType_##tag;
ICETYPE_TABLE
#undef X
// Define a set of constants based on low-level table entries, and ensure the
#include "IceRegistersX8664.h"
#include "IceTargetLowering.h"
#include "IceTargetLoweringX8664.def"
+#include "IceTargetLoweringX86RegClass.h"
#include <array>
static int32_t getGprForType(Type, int32_t RegNum) { return RegNum; }
static void initRegisterSet(
- std::array<llvm::SmallBitVector, IceType_NUM> *TypeToRegisterSet,
+ std::array<llvm::SmallBitVector, RCX86_NUM> *TypeToRegisterSet,
std::array<llvm::SmallBitVector, RegisterSet::Reg_NUM> *RegisterAliases,
llvm::SmallBitVector *ScratchRegs) {
llvm::SmallBitVector IntegerRegistersI64(RegisterSet::Reg_NUM);
llvm::SmallBitVector IntegerRegistersI8(RegisterSet::Reg_NUM);
llvm::SmallBitVector FloatRegisters(RegisterSet::Reg_NUM);
llvm::SmallBitVector VectorRegisters(RegisterSet::Reg_NUM);
+ llvm::SmallBitVector Trunc64To8Registers(RegisterSet::Reg_NUM);
+ llvm::SmallBitVector Trunc32To8Registers(RegisterSet::Reg_NUM);
+ llvm::SmallBitVector Trunc16To8Registers(RegisterSet::Reg_NUM);
+ llvm::SmallBitVector Trunc8RcvrRegisters(RegisterSet::Reg_NUM);
+ llvm::SmallBitVector AhRcvrRegisters(RegisterSet::Reg_NUM);
llvm::SmallBitVector InvalidRegisters(RegisterSet::Reg_NUM);
ScratchRegs->resize(RegisterSet::Reg_NUM);
(IntegerRegistersI8)[RegisterSet::val] = is8; \
(FloatRegisters)[RegisterSet::val] = isXmm; \
(VectorRegisters)[RegisterSet::val] = isXmm; \
+ (Trunc64To8Registers)[RegisterSet::val] = is64To8; \
+ (Trunc32To8Registers)[RegisterSet::val] = is32To8; \
+ (Trunc16To8Registers)[RegisterSet::val] = is16To8; \
+ (Trunc8RcvrRegisters)[RegisterSet::val] = isTrunc8Rcvr; \
+ (AhRcvrRegisters)[RegisterSet::val] = isAhRcvr; \
(*RegisterAliases)[RegisterSet::val].resize(RegisterSet::Reg_NUM); \
for (SizeT RegAlias : aliases) { \
assert(!(*RegisterAliases)[RegisterSet::val][RegAlias] && \
REGX8664_TABLE;
#undef X
- (*TypeToRegisterSet)[IceType_void] = InvalidRegisters;
- (*TypeToRegisterSet)[IceType_i1] = IntegerRegistersI8;
- (*TypeToRegisterSet)[IceType_i8] = IntegerRegistersI8;
- (*TypeToRegisterSet)[IceType_i16] = IntegerRegistersI16;
- (*TypeToRegisterSet)[IceType_i32] = IntegerRegistersI32;
- (*TypeToRegisterSet)[IceType_i64] = IntegerRegistersI64;
- (*TypeToRegisterSet)[IceType_f32] = FloatRegisters;
- (*TypeToRegisterSet)[IceType_f64] = FloatRegisters;
- (*TypeToRegisterSet)[IceType_v4i1] = VectorRegisters;
- (*TypeToRegisterSet)[IceType_v8i1] = VectorRegisters;
- (*TypeToRegisterSet)[IceType_v16i1] = VectorRegisters;
- (*TypeToRegisterSet)[IceType_v16i8] = VectorRegisters;
- (*TypeToRegisterSet)[IceType_v8i16] = VectorRegisters;
- (*TypeToRegisterSet)[IceType_v4i32] = VectorRegisters;
- (*TypeToRegisterSet)[IceType_v4f32] = VectorRegisters;
+ (*TypeToRegisterSet)[RC_void] = InvalidRegisters;
+ (*TypeToRegisterSet)[RC_i1] = IntegerRegistersI8;
+ (*TypeToRegisterSet)[RC_i8] = IntegerRegistersI8;
+ (*TypeToRegisterSet)[RC_i16] = IntegerRegistersI16;
+ (*TypeToRegisterSet)[RC_i32] = IntegerRegistersI32;
+ (*TypeToRegisterSet)[RC_i64] = IntegerRegistersI64;
+ (*TypeToRegisterSet)[RC_f32] = FloatRegisters;
+ (*TypeToRegisterSet)[RC_f64] = FloatRegisters;
+ (*TypeToRegisterSet)[RC_v4i1] = VectorRegisters;
+ (*TypeToRegisterSet)[RC_v8i1] = VectorRegisters;
+ (*TypeToRegisterSet)[RC_v16i1] = VectorRegisters;
+ (*TypeToRegisterSet)[RC_v16i8] = VectorRegisters;
+ (*TypeToRegisterSet)[RC_v8i16] = VectorRegisters;
+ (*TypeToRegisterSet)[RC_v4i32] = VectorRegisters;
+ (*TypeToRegisterSet)[RC_v4f32] = VectorRegisters;
+ (*TypeToRegisterSet)[RCX86_Is64To8] = Trunc64To8Registers;
+ (*TypeToRegisterSet)[RCX86_Is32To8] = Trunc32To8Registers;
+ (*TypeToRegisterSet)[RCX86_Is16To8] = Trunc16To8Registers;
+ (*TypeToRegisterSet)[RCX86_IsTrunc8Rcvr] = Trunc8RcvrRegisters;
+ (*TypeToRegisterSet)[RCX86_IsAhRcvr] = AhRcvrRegisters;
}
static llvm::SmallBitVector
Index |= (is32 << (AttrKey++)); \
Index |= (is64 << (AttrKey++)); \
Index |= (isXmm << (AttrKey++)); \
+ Index |= (is16To8 << (AttrKey++)); \
+ Index |= (is32To8 << (AttrKey++)); \
+ Index |= (is64To8 << (AttrKey++)); \
+ Index |= (isTrunc8Rcvr << (AttrKey++)); \
/* val is assigned to an equivalence class based on its properties. */ \
EquivalenceClasses[Index].push_back(RegisterSet::val); \
}
#include "IceInst.h"
#include "IceSwitchLowering.h"
#include "IceTargetLowering.h"
+#include "IceTargetLoweringX86RegClass.h"
#include "IceUtils.h"
#include <array>
IceString getRegName(SizeT RegNum, Type Ty) const override;
llvm::SmallBitVector getRegisterSet(RegSetMask Include,
RegSetMask Exclude) const override;
- const llvm::SmallBitVector &getRegisterSetForType(Type Ty) const override {
- return TypeToRegisterSet[Ty];
+ const llvm::SmallBitVector &
+ getRegistersForVariable(const Variable *Var) const override {
+ RegClass RC = Var->getRegClass();
+ assert(static_cast<RegClassX86>(RC) < RCX86_NUM);
+ return TypeToRegisterSet[RC];
}
const llvm::SmallBitVector &getAliasesForRegister(SizeT Reg) const override {
static Type firstTypeThatFitsSize(uint32_t Size,
uint32_t MaxSize = NoSizeLimit);
+ Variable *copyToReg8(Operand *Src, int32_t RegNum = Variable::NoRegister);
Variable *copyToReg(Operand *Src, int32_t RegNum = Variable::NoRegister);
/// \name Returns a vector in a register with the given constant entries.
bool NeedsStackAlignment = false;
size_t SpillAreaSizeBytes = 0;
size_t FixedAllocaSizeBytes = 0;
- static std::array<llvm::SmallBitVector, IceType_NUM> TypeToRegisterSet;
+ static std::array<llvm::SmallBitVector, RCX86_NUM> TypeToRegisterSet;
static std::array<llvm::SmallBitVector, Traits::RegisterSet::Reg_NUM>
RegisterAliases;
static llvm::SmallBitVector ScratchRegs;
// t1:ecx = c.lo & 0xff
// t2 = b.lo
// t3 = b.hi
- T_1 = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);
- _mov(T_1, Src1Lo);
+ T_1 = copyToReg8(Src1Lo, Traits::RegisterSet::Reg_cl);
_mov(T_2, Src0Lo);
_mov(T_3, Src0Hi);
switch (Op) {
template <class Machine>
void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
Variable *Dest = Inst->getDest();
+ Type Ty = Dest->getType();
Operand *Src0 = legalize(Inst->getSrc(0));
Operand *Src1 = legalize(Inst->getSrc(1));
if (Inst->isCommutative()) {
assert(SwapCount <= 1);
(void)SwapCount;
}
- if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
+ if (!Traits::Is64Bit && Ty == IceType_i64) {
// These x86-32 helper-call-involved instructions are lowered in this
// separate switch. This is because loOperand() and hiOperand() may insert
// redundant instructions for constant blinding and pooling. Such redundant
}
return;
}
- if (isVectorType(Dest->getType())) {
+ if (isVectorType(Ty)) {
// TODO: Trap on integer divide and integer modulo by zero. See:
// https://code.google.com/p/nativeclient/issues/detail?id=3899
if (llvm::isa<typename Traits::X86OperandMem>(Src1))
llvm_unreachable("Unknown arithmetic operator");
break;
case InstArithmetic::Add: {
- Variable *T = makeReg(Dest->getType());
+ Variable *T = makeReg(Ty);
_movp(T, Src0);
_padd(T, Src1);
_movp(Dest, T);
} break;
case InstArithmetic::And: {
- Variable *T = makeReg(Dest->getType());
+ Variable *T = makeReg(Ty);
_movp(T, Src0);
_pand(T, Src1);
_movp(Dest, T);
} break;
case InstArithmetic::Or: {
- Variable *T = makeReg(Dest->getType());
+ Variable *T = makeReg(Ty);
_movp(T, Src0);
_por(T, Src1);
_movp(Dest, T);
} break;
case InstArithmetic::Xor: {
- Variable *T = makeReg(Dest->getType());
+ Variable *T = makeReg(Ty);
_movp(T, Src0);
_pxor(T, Src1);
_movp(Dest, T);
} break;
case InstArithmetic::Sub: {
- Variable *T = makeReg(Dest->getType());
+ Variable *T = makeReg(Ty);
_movp(T, Src0);
_psub(T, Src1);
_movp(Dest, T);
} break;
case InstArithmetic::Mul: {
- bool TypesAreValidForPmull =
- Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16;
+ bool TypesAreValidForPmull = Ty == IceType_v4i32 || Ty == IceType_v8i16;
bool InstructionSetIsValidForPmull =
- Dest->getType() == IceType_v8i16 || InstructionSet >= Traits::SSE4_1;
+ Ty == IceType_v8i16 || InstructionSet >= Traits::SSE4_1;
if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {
- Variable *T = makeReg(Dest->getType());
+ Variable *T = makeReg(Ty);
_movp(T, Src0);
_pmull(T, Src0 == Src1 ? T : Src1);
_movp(Dest, T);
- } else if (Dest->getType() == IceType_v4i32) {
+ } else if (Ty == IceType_v4i32) {
// Lowering sequence:
// Note: The mask arguments have index 0 on the left.
//
_shufps(T1, T2, Ctx->getConstantInt32(Mask0202));
_pshufd(T4, T1, Ctx->getConstantInt32(Mask0213));
_movp(Dest, T4);
- } else if (Dest->getType() == IceType_v16i8) {
+ } else if (Ty == IceType_v16i8) {
scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
} else {
llvm::report_fatal_error("Invalid vector multiply type");
scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
break;
case InstArithmetic::Fadd: {
- Variable *T = makeReg(Dest->getType());
+ Variable *T = makeReg(Ty);
_movp(T, Src0);
_addps(T, Src1);
_movp(Dest, T);
} break;
case InstArithmetic::Fsub: {
- Variable *T = makeReg(Dest->getType());
+ Variable *T = makeReg(Ty);
_movp(T, Src0);
_subps(T, Src1);
_movp(Dest, T);
} break;
case InstArithmetic::Fmul: {
- Variable *T = makeReg(Dest->getType());
+ Variable *T = makeReg(Ty);
_movp(T, Src0);
_mulps(T, Src0 == Src1 ? T : Src1);
_movp(Dest, T);
} break;
case InstArithmetic::Fdiv: {
- Variable *T = makeReg(Dest->getType());
+ Variable *T = makeReg(Ty);
_movp(T, Src0);
_divps(T, Src1);
_movp(Dest, T);
}
// The 8-bit version of imul only allows the form "imul r/m8" where T must
// be in al.
- if (isByteSizedArithType(Dest->getType())) {
+ if (isByteSizedArithType(Ty)) {
_mov(T, Src0, Traits::RegisterSet::Reg_al);
Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
_imul(T, Src0 == Src1 ? T : Src1);
_mov(Dest, T);
} else if (auto *ImmConst = llvm::dyn_cast<ConstantInteger32>(Src1)) {
- T = makeReg(Dest->getType());
+ T = makeReg(Ty);
_imul_imm(T, Src0, ImmConst);
_mov(Dest, T);
} else {
break;
case InstArithmetic::Shl:
_mov(T, Src0);
- if (!llvm::isa<ConstantInteger32>(Src1)) {
- Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);
- _mov(Cl, Src1);
- Src1 = Cl;
- }
+ if (!llvm::isa<ConstantInteger32>(Src1))
+ Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl);
_shl(T, Src1);
_mov(Dest, T);
break;
case InstArithmetic::Lshr:
_mov(T, Src0);
- if (!llvm::isa<ConstantInteger32>(Src1)) {
- Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);
- _mov(Cl, Src1);
- Src1 = Cl;
- }
+ if (!llvm::isa<ConstantInteger32>(Src1))
+ Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl);
_shr(T, Src1);
_mov(Dest, T);
break;
case InstArithmetic::Ashr:
_mov(T, Src0);
- if (!llvm::isa<ConstantInteger32>(Src1)) {
- Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);
- _mov(Cl, Src1);
- Src1 = Cl;
- }
+ if (!llvm::isa<ConstantInteger32>(Src1))
+ Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl);
_sar(T, Src1);
_mov(Dest, T);
break;
- case InstArithmetic::Udiv:
+ case InstArithmetic::Udiv: {
// div and idiv are the few arithmetic operators that do not allow
// immediates as the operand.
Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
- if (isByteSizedArithType(Dest->getType())) {
- // For 8-bit unsigned division we need to zero-extend al into ah. A mov
- // $0, %ah (or xor %ah, %ah) would work just fine, except that the x86-64
- // assembler refuses to encode %ah (encoding %spl with a REX prefix
- // instead.) Accessing %ah in 64-bit is "tricky" as you can't encode %ah
- // with any other 8-bit register except for %a[lh], %b[lh], %c[lh], and
- // d[%lh], which means the X86 target lowering (and the register
- // allocator) would have to be aware of this restriction. For now, we
- // simply zero %eax completely, and move the dividend into %al.
- Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
- Context.insert(InstFakeDef::create(Func, T_eax));
- _xor(T_eax, T_eax);
- _mov(T, Src0, Traits::RegisterSet::Reg_al);
- _div(T, Src1, T);
- _mov(Dest, T);
- Context.insert(InstFakeUse::create(Func, T_eax));
- } else {
- Type Ty = Dest->getType();
- uint32_t Eax = Traits::RegisterSet::Reg_eax;
- uint32_t Edx = Traits::RegisterSet::Reg_edx;
- switch (Ty) {
- default:
- llvm_unreachable("Bad type for udiv");
- // fallthrough
- case IceType_i32:
- break;
- case IceType_i16:
- Eax = Traits::RegisterSet::Reg_ax;
- Edx = Traits::RegisterSet::Reg_dx;
- break;
- }
- Constant *Zero = Ctx->getConstantZero(Ty);
- _mov(T, Src0, Eax);
- _mov(T_edx, Zero, Edx);
- _div(T, Src1, T_edx);
- _mov(Dest, T);
+ uint32_t Eax = Traits::RegisterSet::Reg_eax;
+ uint32_t Edx = Traits::RegisterSet::Reg_edx;
+ switch (Ty) {
+ default:
+ llvm_unreachable("Bad type for udiv");
+ // fallthrough
+ case IceType_i32:
+ break;
+ case IceType_i16:
+ Eax = Traits::RegisterSet::Reg_ax;
+ Edx = Traits::RegisterSet::Reg_dx;
+ break;
+ case IceType_i8:
+ Eax = Traits::RegisterSet::Reg_al;
+ Edx = Traits::RegisterSet::Reg_ah;
+ break;
}
- break;
+ _mov(T, Src0, Eax);
+ _mov(T_edx, Ctx->getConstantZero(Ty), Edx);
+ _div(T, Src1, T_edx);
+ _mov(Dest, T);
+ } break;
case InstArithmetic::Sdiv:
// TODO(stichnot): Enable this after doing better performance and cross
// testing.
uint32_t UDivisor = static_cast<uint32_t>(Divisor);
if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
uint32_t LogDiv = llvm::Log2_32(UDivisor);
- Type Ty = Dest->getType();
// LLVM does the following for dest=src/(1<<log):
// t=src
// sar t,typewidth-1 // -1 if src is negative, 0 if not
}
}
Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
- switch (Type Ty = Dest->getType()) {
+ switch (Ty) {
default:
llvm_unreachable("Bad type for sdiv");
// fallthrough
_idiv(T, Src1, T_edx);
_mov(Dest, T);
break;
- case InstArithmetic::Urem:
+ case InstArithmetic::Urem: {
Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
- if (isByteSizedArithType(Dest->getType())) {
- Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
- Context.insert(InstFakeDef::create(Func, T_eax));
- _xor(T_eax, T_eax);
- _mov(T, Src0, Traits::RegisterSet::Reg_al);
- _div(T, Src1, T);
- // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't
- // mov %ah, %al because it would make x86-64 codegen more complicated. If
- // this ever becomes a problem we can introduce a pseudo rem instruction
- // that returns the remainder in %al directly (and uses a mov for copying
- // %ah to %al.)
- static constexpr uint8_t AlSizeInBits = 8;
- _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));
- _mov(Dest, T);
- Context.insert(InstFakeUse::create(Func, T_eax));
- } else {
- Type Ty = Dest->getType();
- uint32_t Eax = Traits::RegisterSet::Reg_eax;
- uint32_t Edx = Traits::RegisterSet::Reg_edx;
- switch (Ty) {
- default:
- llvm_unreachable("Bad type for urem");
- // fallthrough
- case IceType_i32:
- break;
- case IceType_i16:
- Eax = Traits::RegisterSet::Reg_ax;
- Edx = Traits::RegisterSet::Reg_dx;
- break;
- }
- Constant *Zero = Ctx->getConstantZero(Ty);
- T_edx = makeReg(Dest->getType(), Edx);
- _mov(T_edx, Zero);
- _mov(T, Src0, Eax);
- _div(T_edx, Src1, T);
- _mov(Dest, T_edx);
+ uint32_t Eax = Traits::RegisterSet::Reg_eax;
+ uint32_t Edx = Traits::RegisterSet::Reg_edx;
+ switch (Ty) {
+ default:
+ llvm_unreachable("Bad type for urem");
+ // fallthrough
+ case IceType_i32:
+ break;
+ case IceType_i16:
+ Eax = Traits::RegisterSet::Reg_ax;
+ Edx = Traits::RegisterSet::Reg_dx;
+ break;
+ case IceType_i8:
+ Eax = Traits::RegisterSet::Reg_al;
+ Edx = Traits::RegisterSet::Reg_ah;
+ break;
}
- break;
- case InstArithmetic::Srem:
+ T_edx = makeReg(Ty, Edx);
+ _mov(T_edx, Ctx->getConstantZero(Ty));
+ _mov(T, Src0, Eax);
+ _div(T_edx, Src1, T);
+ _mov(Dest, T_edx);
+ } break;
+ case InstArithmetic::Srem: {
// TODO(stichnot): Enable this after doing better performance and cross
// testing.
if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
uint32_t UDivisor = static_cast<uint32_t>(Divisor);
if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
uint32_t LogDiv = llvm::Log2_32(UDivisor);
- Type Ty = Dest->getType();
// LLVM does the following for dest=src%(1<<log):
// t=src
// sar t,typewidth-1 // -1 if src is negative, 0 if not
}
}
Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
- switch (Type Ty = Dest->getType()) {
+ uint32_t Eax = Traits::RegisterSet::Reg_eax;
+ uint32_t Edx = Traits::RegisterSet::Reg_edx;
+ switch (Ty) {
default:
llvm_unreachable("Bad type for srem");
// fallthrough
case IceType_i32:
- T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx);
- _mov(T, Src0, Traits::RegisterSet::Reg_eax);
- _cbwdq(T_edx, T);
- _idiv(T_edx, Src1, T);
- _mov(Dest, T_edx);
break;
case IceType_i16:
- T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx);
- _mov(T, Src0, Traits::RegisterSet::Reg_ax);
- _cbwdq(T_edx, T);
- _idiv(T_edx, Src1, T);
- _mov(Dest, T_edx);
+ Eax = Traits::RegisterSet::Reg_ax;
+ Edx = Traits::RegisterSet::Reg_dx;
break;
case IceType_i8:
- T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax);
- // TODO(stichnot): Use register ah for T_edx, and remove the _shr().
- // T_edx = makeReg(Ty, Traits::RegisterSet::Reg_ah);
- _mov(T, Src0, Traits::RegisterSet::Reg_al);
- _cbwdq(T_edx, T);
- _idiv(T_edx, Src1, T);
- static constexpr uint8_t AlSizeInBits = 8;
- _shr(T_edx, Ctx->getConstantInt8(AlSizeInBits));
- _mov(Dest, T_edx);
+ Eax = Traits::RegisterSet::Reg_al;
+ Edx = Traits::RegisterSet::Reg_ah;
break;
}
- break;
+ T_edx = makeReg(Ty, Edx);
+ _mov(T, Src0, Eax);
+ _cbwdq(T_edx, T);
+ _idiv(T_edx, Src1, T);
+ _mov(Dest, T_edx);
+ } break;
case InstArithmetic::Fadd:
_mov(T, Src0);
_addss(T, Src1);
break;
case InstArithmetic::Frem: {
constexpr SizeT MaxSrcs = 2;
- Type Ty = Dest->getType();
InstCall *Call = makeHelperCall(
isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);
Call->addArg(Src0);
// a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
InstCast::OpKind CastKind = Inst->getCastKind();
Variable *Dest = Inst->getDest();
+ Type DestTy = Dest->getType();
switch (CastKind) {
default:
Func->setError("Cast type not supported");
// we're unlikely to see something like that in the bitcode that the
// optimizer wouldn't have already taken care of.
Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
- if (isVectorType(Dest->getType())) {
- Type DestTy = Dest->getType();
+ if (isVectorType(DestTy)) {
if (DestTy == IceType_v16i8) {
// onemask = materialize(1,1,...); dst = (src & onemask) > 0
- Variable *OneMask = makeVectorOfOnes(Dest->getType());
+ Variable *OneMask = makeVectorOfOnes(DestTy);
Variable *T = makeReg(DestTy);
_movp(T, Src0RM);
_pand(T, OneMask);
- Variable *Zeros = makeVectorOfZeros(Dest->getType());
+ Variable *Zeros = makeVectorOfZeros(DestTy);
_pcmpgt(T, Zeros);
_movp(Dest, T);
} else {
_psra(T, ShiftConstant);
_movp(Dest, T);
}
- } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
+ } else if (!Traits::Is64Bit && DestTy == IceType_i64) {
// t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
Constant *Shift = Ctx->getConstantInt32(31);
Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
// shl t1, dst_bitwidth - 1
// sar t1, dst_bitwidth - 1
// dst = t1
- size_t DestBits =
- Traits::X86_CHAR_BIT * typeWidthInBytes(Dest->getType());
+ size_t DestBits = Traits::X86_CHAR_BIT * typeWidthInBytes(DestTy);
Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1);
- Variable *T = makeReg(Dest->getType());
- if (typeWidthInBytes(Dest->getType()) <=
- typeWidthInBytes(Src0RM->getType())) {
+ Variable *T = makeReg(DestTy);
+ if (typeWidthInBytes(DestTy) <= typeWidthInBytes(Src0RM->getType())) {
_mov(T, Src0RM);
} else {
// Widen the source using movsx or movzx. (It doesn't matter which one,
_mov(Dest, T);
} else {
// t1 = movsx src; dst = t1
- Variable *T = makeReg(Dest->getType());
+ Variable *T = makeReg(DestTy);
_movsx(T, Src0RM);
_mov(Dest, T);
}
}
case InstCast::Zext: {
Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
- if (isVectorType(Dest->getType())) {
+ if (isVectorType(DestTy)) {
// onemask = materialize(1,1,...); dest = onemask & src
- Type DestTy = Dest->getType();
Variable *OneMask = makeVectorOfOnes(DestTy);
Variable *T = makeReg(DestTy);
_movp(T, Src0RM);
_pand(T, OneMask);
_movp(Dest, T);
- } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
+ } else if (!Traits::Is64Bit && DestTy == IceType_i64) {
// t1=movzx src; dst.lo=t1; dst.hi=0
Constant *Zero = Ctx->getConstantZero(IceType_i32);
Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
_mov(DestHi, Zero);
} else if (Src0RM->getType() == IceType_i1) {
// t = Src0RM; Dest = t
- Type DestTy = Dest->getType();
Variable *T = nullptr;
if (DestTy == IceType_i8) {
_mov(T, Src0RM);
_mov(Dest, T);
} else {
// t1 = movzx src; dst = t1
- Variable *T = makeReg(Dest->getType());
+ Variable *T = makeReg(DestTy);
_movzx(T, Src0RM);
_mov(Dest, T);
}
break;
}
case InstCast::Trunc: {
- if (isVectorType(Dest->getType())) {
+ if (isVectorType(DestTy)) {
// onemask = materialize(1,1,...); dst = src & onemask
Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Type Src0Ty = Src0RM->getType();
Variable *OneMask = makeVectorOfOnes(Src0Ty);
- Variable *T = makeReg(Dest->getType());
+ Variable *T = makeReg(DestTy);
_movp(T, Src0RM);
_pand(T, OneMask);
_movp(Dest, T);
+ } else if (DestTy == IceType_i1 || DestTy == IceType_i8) {
+ // Make sure we truncate from and into valid registers.
+ Operand *Src0 = legalizeUndef(Inst->getSrc(0));
+ if (!Traits::Is64Bit && Src0->getType() == IceType_i64)
+ Src0 = loOperand(Src0);
+ Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
+ Variable *T = copyToReg8(Src0RM);
+ if (DestTy == IceType_i1)
+ _and(T, Ctx->getConstantInt1(1));
+ _mov(Dest, T);
} else {
Operand *Src0 = legalizeUndef(Inst->getSrc(0));
if (!Traits::Is64Bit && Src0->getType() == IceType_i64)
Src0 = loOperand(Src0);
Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
// t1 = trunc Src0RM; Dest = t1
- Variable *T = nullptr;
+ Variable *T = makeReg(DestTy);
_mov(T, Src0RM);
- if (Dest->getType() == IceType_i1)
- _and(T, Ctx->getConstantInt1(1));
_mov(Dest, T);
}
break;
case InstCast::Fpext: {
Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
// t1 = cvt Src0RM; Dest = t1
- Variable *T = makeReg(Dest->getType());
+ Variable *T = makeReg(DestTy);
_cvt(T, Src0RM, Traits::Insts::Cvt::Float2float);
_mov(Dest, T);
break;
}
case InstCast::Fptosi:
- if (isVectorType(Dest->getType())) {
- assert(Dest->getType() == IceType_v4i32 &&
+ if (isVectorType(DestTy)) {
+ assert(DestTy == IceType_v4i32 &&
Inst->getSrc(0)->getType() == IceType_v4f32);
Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
Src0RM = legalizeToReg(Src0RM);
- Variable *T = makeReg(Dest->getType());
+ Variable *T = makeReg(DestTy);
_cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);
_movp(Dest, T);
- } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
+ } else if (!Traits::Is64Bit && DestTy == IceType_i64) {
constexpr SizeT MaxSrcs = 1;
Type SrcType = Inst->getSrc(0)->getType();
InstCall *Call =
Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
// t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
Variable *T_1 = nullptr;
- if (Traits::Is64Bit && Dest->getType() == IceType_i64) {
+ if (Traits::Is64Bit && DestTy == IceType_i64) {
T_1 = makeReg(IceType_i64);
} else {
- assert(Dest->getType() != IceType_i64);
+ assert(DestTy != IceType_i64);
T_1 = makeReg(IceType_i32);
}
// cvt() requires its integer argument to be a GPR.
- Variable *T_2 = makeReg(Dest->getType());
+ Variable *T_2 = makeReg(DestTy);
+ if (isByteSizedType(DestTy)) {
+ assert(T_1->getType() == IceType_i32);
+ T_1->setRegClass(RCX86_Is32To8);
+ T_2->setRegClass(RCX86_IsTrunc8Rcvr);
+ }
_cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
_mov(T_2, T_1); // T_1 and T_2 may have different integer types
- if (Dest->getType() == IceType_i1)
+ if (DestTy == IceType_i1)
_and(T_2, Ctx->getConstantInt1(1));
_mov(Dest, T_2);
}
break;
case InstCast::Fptoui:
- if (isVectorType(Dest->getType())) {
- assert(Dest->getType() == IceType_v4i32 &&
+ if (isVectorType(DestTy)) {
+ assert(DestTy == IceType_v4i32 &&
Inst->getSrc(0)->getType() == IceType_v4f32);
constexpr SizeT MaxSrcs = 1;
InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs);
Call->addArg(Inst->getSrc(0));
lowerCall(Call);
- } else if (Dest->getType() == IceType_i64 ||
- (!Traits::Is64Bit && Dest->getType() == IceType_i32)) {
+ } else if (DestTy == IceType_i64 ||
+ (!Traits::Is64Bit && DestTy == IceType_i32)) {
// Use a helper for both x86-32 and x86-64.
constexpr SizeT MaxSrcs = 1;
- Type DestType = Dest->getType();
Type SrcType = Inst->getSrc(0)->getType();
IceString TargetString;
if (Traits::Is64Bit) {
TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64
: H_fptoui_f64_i64;
- } else if (isInt32Asserting32Or64(DestType)) {
+ } else if (isInt32Asserting32Or64(DestTy)) {
TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32
: H_fptoui_f64_i32;
} else {
} else {
Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
// t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
- assert(Dest->getType() != IceType_i64);
+ assert(DestTy != IceType_i64);
Variable *T_1 = nullptr;
- if (Traits::Is64Bit && Dest->getType() == IceType_i32) {
+ if (Traits::Is64Bit && DestTy == IceType_i32) {
T_1 = makeReg(IceType_i64);
} else {
- assert(Dest->getType() != IceType_i32);
+ assert(DestTy != IceType_i32);
T_1 = makeReg(IceType_i32);
}
- Variable *T_2 = makeReg(Dest->getType());
+ Variable *T_2 = makeReg(DestTy);
+ if (isByteSizedType(DestTy)) {
+ assert(T_1->getType() == IceType_i32);
+ T_1->setRegClass(RCX86_Is32To8);
+ T_2->setRegClass(RCX86_IsTrunc8Rcvr);
+ }
_cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
_mov(T_2, T_1); // T_1 and T_2 may have different integer types
- if (Dest->getType() == IceType_i1)
+ if (DestTy == IceType_i1)
_and(T_2, Ctx->getConstantInt1(1));
_mov(Dest, T_2);
}
break;
case InstCast::Sitofp:
- if (isVectorType(Dest->getType())) {
- assert(Dest->getType() == IceType_v4f32 &&
+ if (isVectorType(DestTy)) {
+ assert(DestTy == IceType_v4f32 &&
Inst->getSrc(0)->getType() == IceType_v4i32);
Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
Src0RM = legalizeToReg(Src0RM);
- Variable *T = makeReg(Dest->getType());
+ Variable *T = makeReg(DestTy);
_cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);
_movp(Dest, T);
} else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) {
// Use a helper for x86-32.
constexpr SizeT MaxSrcs = 1;
- Type DestType = Dest->getType();
InstCall *Call =
- makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32
- : H_sitofp_i64_f64,
+ makeHelperCall(isFloat32Asserting32Or64(DestTy) ? H_sitofp_i64_f32
+ : H_sitofp_i64_f64,
Dest, MaxSrcs);
// TODO: Call the correct compiler-rt helper function.
Call->addArg(Inst->getSrc(0));
assert(Src0RM->getType() != IceType_i64);
T_1 = makeReg(IceType_i32);
}
- Variable *T_2 = makeReg(Dest->getType());
+ Variable *T_2 = makeReg(DestTy);
if (Src0RM->getType() == T_1->getType())
_mov(T_1, Src0RM);
else
case InstCast::Uitofp: {
Operand *Src0 = Inst->getSrc(0);
if (isVectorType(Src0->getType())) {
- assert(Dest->getType() == IceType_v4f32 &&
- Src0->getType() == IceType_v4i32);
+ assert(DestTy == IceType_v4f32 && Src0->getType() == IceType_v4i32);
constexpr SizeT MaxSrcs = 1;
InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);
Call->addArg(Src0);
// Use a helper for x86-32 and x86-64. Also use a helper for i32 on
// x86-32.
constexpr SizeT MaxSrcs = 1;
- Type DestType = Dest->getType();
IceString TargetString;
if (isInt32Asserting32Or64(Src0->getType())) {
- TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32
- : H_uitofp_i32_f64;
+ TargetString = isFloat32Asserting32Or64(DestTy) ? H_uitofp_i32_f32
+ : H_uitofp_i32_f64;
} else {
- TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32
- : H_uitofp_i64_f64;
+ TargetString = isFloat32Asserting32Or64(DestTy) ? H_uitofp_i64_f32
+ : H_uitofp_i64_f64;
}
InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
Call->addArg(Src0);
assert(Traits::Is64Bit || Src0RM->getType() != IceType_i32);
T_1 = makeReg(IceType_i32);
}
- Variable *T_2 = makeReg(Dest->getType());
+ Variable *T_2 = makeReg(DestTy);
if (Src0RM->getType() == T_1->getType())
_mov(T_1, Src0RM);
else
}
case InstCast::Bitcast: {
Operand *Src0 = Inst->getSrc(0);
- if (Dest->getType() == Src0->getType()) {
+ if (DestTy == Src0->getType()) {
InstAssign *Assign = InstAssign::create(Func, Dest, Src0);
lowerAssign(Assign);
return;
}
- switch (Dest->getType()) {
+ switch (DestTy) {
default:
llvm_unreachable("Unexpected Bitcast dest type");
case IceType_i8: {
case IceType_i32:
case IceType_f32: {
Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
- Type DestType = Dest->getType();
Type SrcType = Src0RM->getType();
- (void)DestType;
- assert((DestType == IceType_i32 && SrcType == IceType_f32) ||
- (DestType == IceType_f32 && SrcType == IceType_i32));
+ assert((DestTy == IceType_i32 && SrcType == IceType_f32) ||
+ (DestTy == IceType_f32 && SrcType == IceType_i32));
// a.i32 = bitcast b.f32 ==>
// t.f32 = b.f32
// s.f32 = spill t.f32
} else {
Src0 = legalize(Src0);
if (llvm::isa<typename Traits::X86OperandMem>(Src0)) {
- Variable *T = Func->makeVariable(Dest->getType());
+ Variable *T = Func->makeVariable(DestTy);
_movq(T, Src0);
_movq(Dest, T);
break;
legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
Variable *T = makeReg(Ty);
_movp(T, SourceVectRM);
- if (Ty == IceType_v4f32)
+ if (Ty == IceType_v4f32) {
_insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4));
- else
- // TODO(stichnot): For the pinsrb and pinsrw instructions, when the source
- // operand is a register, it must be a full r32 register like eax, and not
- // ax/al/ah. For filetype=asm, InstX86Pinsr<Machine>::emit() compensates
- // for the use of r16 and r8 by converting them through getBaseReg(),
- // while emitIAS() validates that the original and base register encodings
- // are the same. But for an "interior" register like ah, it should
- // probably be copied into an r32 via movzx so that the types work out.
+ } else {
+ // For the pinsrb and pinsrw instructions, when the source operand is a
+ // register, it must be a full r32 register like eax, and not ax/al/ah.
+ // For filetype=asm, InstX86Pinsr<Machine>::emit() compensates for the use
+ // of r16 and r8 by converting them through getBaseReg(), while emitIAS()
+ // validates that the original and base register encodings are the same.
+ if (ElementRM->getType() == IceType_i8 &&
+ llvm::isa<Variable>(ElementRM)) {
+ // Don't use ah/bh/ch/dh for pinsrb.
+ ElementRM = copyToReg8(ElementRM);
+ }
_pinsr(T, ElementRM, Ctx->getConstantInt32(Index));
+ }
_movp(Inst->getDest(), T);
} else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
// Use shufps or movss.
return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset);
}
+/// Lowering helper to copy a scalar integer source operand into some 8-bit GPR.
+/// Src is assumed to already be legalized. If the source operand is known to
+/// be a memory or immediate operand, a simple mov will suffice. But if the
+/// source operand can be a physical register, then it must first be copied into
+/// a physical register that is truncable to 8-bit, then truncated into a
+/// physical register that can receive a truncation, and finally copied into the
+/// result 8-bit register (which in general can be any 8-bit register). For
+/// example, moving %ebp into %ah may be accomplished as:
+/// movl %ebp, %edx
+/// mov_trunc %edx, %dl // this redundant assignment is ultimately elided
+/// movb %dl, %ah
+/// On the other hand, moving a memory or immediate operand into ah:
+/// movb 4(%ebp), %ah
+/// movb $my_imm, %ah
+///
+/// Note #1. On a 64-bit target, the "movb 4(%ebp), %ah" is likely not
+/// encodable, so RegNum=Reg_ah should NOT be given as an argument. Instead,
+/// use RegNum=NoRegister and then let the caller do a separate copy into
+/// Reg_ah.
+///
+/// Note #2. ConstantRelocatable operands are also put through this process
+/// (not truncated directly) because our ELF emitter does R_386_32 relocations
+/// but not R_386_8 relocations.
+///
+/// Note #3. If Src is a Variable, the result will be an infinite-weight i8
+/// Variable with the RCX86_IsTrunc8Rcvr register class. As such, this helper
+/// is a convenient way to prevent ah/bh/ch/dh from being an (invalid) argument
+/// to the pinsrb instruction.
+template <class Machine>
+Variable *TargetX86Base<Machine>::copyToReg8(Operand *Src, int32_t RegNum) {
+ Type Ty = Src->getType();
+ assert(isScalarIntegerType(Ty));
+ assert(Ty != IceType_i1);
+ Variable *Reg = makeReg(IceType_i8, RegNum);
+ Reg->setRegClass(RCX86_IsTrunc8Rcvr);
+ if (llvm::isa<Variable>(Src) || llvm::isa<ConstantRelocatable>(Src)) {
+ Variable *SrcTruncable = makeReg(Ty);
+ switch (Ty) {
+ case IceType_i64:
+ SrcTruncable->setRegClass(RCX86_Is64To8);
+ break;
+ case IceType_i32:
+ SrcTruncable->setRegClass(RCX86_Is32To8);
+ break;
+ case IceType_i16:
+ SrcTruncable->setRegClass(RCX86_Is16To8);
+ break;
+ default:
+ // i8 - just use default register class
+ break;
+ }
+ Variable *SrcRcvr = makeReg(IceType_i8);
+ SrcRcvr->setRegClass(RCX86_IsTrunc8Rcvr);
+ _mov(SrcTruncable, Src);
+ _mov(SrcRcvr, SrcTruncable);
+ Src = SrcRcvr;
+ }
+ _mov(Reg, Src);
+ return Reg;
+}
+
/// Helper for legalize() to emit the right code to lower an operand to a
/// register of the appropriate type.
template <class Machine>
--- /dev/null
+//===- subzero/src/IceTargetLoweringX86RegClass.h - x86 reg class -*- C++ -*-=//
+//
+// The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file declares the X86 register class extensions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef SUBZERO_SRC_ICETARGETLOWERINGX86REGCLASS_H
+#define SUBZERO_SRC_ICETARGETLOWERINGX86REGCLASS_H
+
+#include "IceOperand.h" // RC_Target
+
+namespace Ice {
+namespace X86Internal {
+
+// Extend enum RegClass with x86-specific register classes.
+enum RegClassX86 : uint8_t {
+ RCX86_Is64To8 = RC_Target, // 64-bit GPR trivially truncable to 8-bit
+ RCX86_Is32To8, // 32-bit GPR trivially truncable to 8-bit
+ RCX86_Is16To8, // 16-bit GPR trivially truncable to 8-bit
+ RCX86_IsTrunc8Rcvr, // 8-bit GPR that can receive a trunc operation
+ RCX86_IsAhRcvr, // 8-bit GPR that can be a mov dest from %ah
+ RCX86_NUM
+};
+
+} // end of namespace X86Internal
+} // end of namespace Ice
+
+#endif // SUBZERO_SRC_ICETARGETLOWERINGX86REGCLASS_H
const struct TypeAttributeFields TypeAttributes[] = {
#define X(tag, sizeLog2, align, elts, elty, str) \
- { sizeLog2, align, elts, elty, str } \
+ { sizeLog2, align, elts, IceType_##elty, str } \
,
ICETYPE_TABLE
#undef X
CompareResult) \
{ \
IsVec, IsInt, IsInt & !IsVec, IsInt & IsVec, IsIntArith, IsFloat, \
- IsFloat & !IsVec, IsFloat & IsVec, IsLoadStore, IsParam, CompareResult \
+ IsFloat & !IsVec, IsFloat & IsVec, IsLoadStore, IsParam, \
+ IceType_##CompareResult \
} \
,
ICETYPE_PROPS_TABLE
//#define X(tag, str, is_elf64, e_machine, e_flags)
#define ICETYPE_TABLE \
- /* enum value, log_2(size), align, # elts, element type, printable */ \
- /* string (size and alignment in bytes) */ \
- X(IceType_void, -1, 0, 1, IceType_void, "void") \
- X(IceType_i1, 0, 1, 1, IceType_i1, "i1") \
- X(IceType_i8, 0, 1, 1, IceType_i8, "i8") \
- X(IceType_i16, 1, 1, 1, IceType_i16, "i16") \
- X(IceType_i32, 2, 1, 1, IceType_i32, "i32") \
- X(IceType_i64, 3, 1, 1, IceType_i64, "i64") \
- X(IceType_f32, 2, 4, 1, IceType_f32, "float") \
- X(IceType_f64, 3, 8, 1, IceType_f64, "double") \
- X(IceType_v4i1, 4, 1, 4, IceType_i1, "<4 x i1>") \
- X(IceType_v8i1, 4, 1, 8, IceType_i1, "<8 x i1>") \
- X(IceType_v16i1, 4, 1, 16, IceType_i1, "<16 x i1>") \
- X(IceType_v16i8, 4, 1, 16, IceType_i8, "<16 x i8>") \
- X(IceType_v8i16, 4, 2, 8, IceType_i16, "<8 x i16>") \
- X(IceType_v4i32, 4, 4, 4, IceType_i32, "<4 x i32>") \
- X(IceType_v4f32, 4, 4, 4, IceType_f32, "<4 x float>") \
+ /* enum value, log_2(size), align, # elts, element type, */ \
+ /* printable string (size and alignment in bytes) */ \
+ X(void, -1, 0, 1, void, "void") \
+ X(i1, 0, 1, 1, i1, "i1") \
+ X(i8, 0, 1, 1, i8, "i8") \
+ X(i16, 1, 1, 1, i16, "i16") \
+ X(i32, 2, 1, 1, i32, "i32") \
+ X(i64, 3, 1, 1, i64, "i64") \
+ X(f32, 2, 4, 1, f32, "float") \
+ X(f64, 3, 8, 1, f64, "double") \
+ X(v4i1, 4, 1, 4, i1, "<4 x i1>") \
+ X(v8i1, 4, 1, 8, i1, "<8 x i1>") \
+ X(v16i1, 4, 1, 16, i1, "<16 x i1>") \
+ X(v16i8, 4, 1, 16, i8, "<16 x i8>") \
+ X(v8i16, 4, 2, 8, i16, "<8 x i16>") \
+ X(v4i32, 4, 4, 4, i32, "<4 x i32>") \
+ X(v4f32, 4, 4, 4, f32, "<4 x float>") \
//#define X(tag, sizeLog2, align, elts, elty, str)
// Dictionary:
// CR - Result type of compare instruction for argument type
// (IceType_void if disallowed)
#define ICETYPE_PROPS_TABLE \
- /* Enum Value V I F IA LS P CR */ \
- X(IceType_void, 0, 0, 0, 0, 0, 0, IceType_void) \
- X(IceType_i1, 0, 1, 0, 0, 0, 0, IceType_i1) \
- X(IceType_i8, 0, 1, 0, 1, 1, 0, IceType_i1) \
- X(IceType_i16, 0, 1, 0, 1, 1, 0, IceType_i1) \
- X(IceType_i32, 0, 1, 0, 1, 1, 1, IceType_i1) \
- X(IceType_i64, 0, 1, 0, 1, 1, 1, IceType_i1) \
- X(IceType_f32, 0, 0, 1, 0, 1, 1, IceType_i1) \
- X(IceType_f64, 0, 0, 1, 0, 1, 1, IceType_i1) \
- X(IceType_v4i1, 1, 1, 0, 0, 0, 1, IceType_v4i1) \
- X(IceType_v8i1, 1, 1, 0, 0, 0, 1, IceType_v8i1) \
- X(IceType_v16i1, 1, 1, 0, 0, 0, 1, IceType_v16i1) \
- X(IceType_v16i8, 1, 1, 0, 1, 1, 1, IceType_v16i1) \
- X(IceType_v8i16, 1, 1, 0, 1, 1, 1, IceType_v8i1) \
- X(IceType_v4i32, 1, 1, 0, 1, 1, 1, IceType_v4i1) \
- X(IceType_v4f32, 1, 0, 1, 0, 1, 1, IceType_v4i1) \
+ /* Enum Value V I F IA LS P CR */ \
+ X(void, 0, 0, 0, 0, 0, 0, void) \
+ X(i1, 0, 1, 0, 0, 0, 0, i1) \
+ X(i8, 0, 1, 0, 1, 1, 0, i1) \
+ X(i16, 0, 1, 0, 1, 1, 0, i1) \
+ X(i32, 0, 1, 0, 1, 1, 1, i1) \
+ X(i64, 0, 1, 0, 1, 1, 1, i1) \
+ X(f32, 0, 0, 1, 0, 1, 1, i1) \
+ X(f64, 0, 0, 1, 0, 1, 1, i1) \
+ X(v4i1, 1, 1, 0, 0, 0, 1, v4i1) \
+ X(v8i1, 1, 1, 0, 0, 0, 1, v8i1) \
+ X(v16i1, 1, 1, 0, 0, 0, 1, v16i1) \
+ X(v16i8, 1, 1, 0, 1, 1, 1, v16i1) \
+ X(v8i16, 1, 1, 0, 1, 1, 1, v8i1) \
+ X(v4i32, 1, 1, 0, 1, 1, 1, v4i1) \
+ X(v4f32, 1, 0, 1, 0, 1, 1, v4i1) \
//#define X(tag, IsVec, IsInt, IsFloat, IsIntArith, IsLoadStore, IsParam, \
// CompareResult)
namespace Ice {
enum Type {
-#define X(tag, sizeLog2, align, elts, elty, str) tag,
+#define X(tag, sizeLog2, align, elts, elty, str) IceType_##tag,
ICETYPE_TABLE
#undef X
IceType_NUM
; CHECK-NEXT: movsx eax,ax
;
; OPTM1-LABEL: trunc64To16Signed
-; OPTM1: mov eax,DWORD PTR [esp+
+; OPTM1: mov ax,WORD PTR [esp+
; OPTM1: movsx eax,
; ARM32-LABEL: trunc64To16Signed
; CHECK-NEXT: movzx eax,ax
;
; OPTM1-LABEL: trunc64To16Unsigned
-; OPTM1: mov eax,DWORD PTR [esp+
+; OPTM1: mov ax,WORD PTR [esp+
; OPTM1: movzx eax,
; ARM32-LABEL: trunc64To16Unsigned
}
; CHECK-LABEL: trunc64To1
; CHECK: mov eax,DWORD PTR [esp+0x4]
-; CHECK: and eax,0x1
+; CHECK: and al,0x1
; CHECK-NOT: and eax,0x1
;
; OPTM1-LABEL: trunc64To1
; OPTM1: mov eax,DWORD PTR [esp+
-; OPTM1: and eax,0x1
+; OPTM1: and al,0x1
; OPTM1-NOT: and eax,0x1
; ARM32-LABEL: trunc64To1
; CHECK: sub esp,0x80
; CHECK: mov DWORD PTR [ebp-0x4],esp
; CHECK: mov eax,DWORD PTR [ebp+0xc]
-; CHECK: mov DWORD PTR [ebp-0x8],eax
+; CHECK: mov BYTE PTR [ebp-0x8],al
; CHECK: movzx eax,BYTE PTR [ebp-0x8]
; CHECK: mov DWORD PTR [ebp-0xc],eax
; CHECK: sub esp,0x10
ret i32 %old_ext
}
; CHECK-LABEL: test_atomic_cmpxchg_16
-; CHECK: mov eax,{{.*}}
+; CHECK: mov {{ax|eax}},{{.*}}
; CHECK: lock cmpxchg WORD PTR [e{{[^a].}}],{{[^a]}}x
; ARM32-LABEL: test_atomic_cmpxchg_16
; ARM32: dmb
; OPTM1_1-NEXT: movups XMMWORD PTR [esp+0x20],xmm0
; OPTM1_1-NEXT: movups XMMWORD PTR [esp+0x10],xmm1
; OPTM1_1-NEXT: movups xmm0,XMMWORD PTR [esp+0x20]
-; OPTM1_1-NEXT: pshufd xmm1,XMMWORD PTR [esp+0x20],0x31
+; OPTM1_1-NEXT: pshufd xmm6,XMMWORD PTR [esp+0x20],0x31
; OPTM1_1-NEXT: pshufd xmm2,XMMWORD PTR [esp+0x10],0x31
; OPTM1_1-NEXT: pmuludq xmm0,XMMWORD PTR [esp+0x10]
-; OPTM1_1-NEXT: pmuludq xmm1,xmm2
-; OPTM1_1-NEXT: shufps xmm0,xmm1,0x88
+; OPTM1_1-NEXT: pmuludq xmm6,xmm2
+; OPTM1_1-NEXT: shufps xmm0,xmm6,0x88
; OPTM1_1-NEXT: pshufd xmm0,xmm0,0xd8
; OPTM1_1-NEXT: movups XMMWORD PTR [esp],xmm0
; OPTM1_1-NEXT: movups xmm0,XMMWORD PTR [esp]
; CHECK_1-LABEL: mul_v4i32
; CHECK_1: movups xmm7,xmm0
; CHECK_1-NEXT: pshufd xmm0,xmm0,0x31
-; CHECK_1-NEXT: pshufd xmm4,xmm1,0x31
+; CHECK_1-NEXT: pshufd xmm5,xmm1,0x31
; CHECK_1-NEXT: pmuludq xmm7,xmm1
-; CHECK_1-NEXT: pmuludq xmm0,xmm4
+; CHECK_1-NEXT: pmuludq xmm0,xmm5
; CHECK_1-NEXT: shufps xmm7,xmm0,0x88
; CHECK_1-NEXT: pshufd xmm7,xmm7,0xd8
; CHECK_1-NEXT: movups xmm0,xmm7
; OPTM1_123-NEXT: movups XMMWORD PTR [esp+0x20],xmm0
; OPTM1_123-NEXT: movups XMMWORD PTR [esp+0x10],xmm1
; OPTM1_123-NEXT: movups xmm0,XMMWORD PTR [esp+0x20]
-; OPTM1_123-NEXT: pshufd xmm3,XMMWORD PTR [esp+0x20],0x31
-; OPTM1_123-NEXT: pshufd xmm7,XMMWORD PTR [esp+0x10],0x31
+; OPTM1_123-NEXT: pshufd xmm6,XMMWORD PTR [esp+0x20],0x31
+; OPTM1_123-NEXT: pshufd xmm2,XMMWORD PTR [esp+0x10],0x31
; OPTM1_123-NEXT: pmuludq xmm0,XMMWORD PTR [esp+0x10]
-; OPTM1_123-NEXT: pmuludq xmm3,xmm7
-; OPTM1_123-NEXT: shufps xmm0,xmm3,0x88
+; OPTM1_123-NEXT: pmuludq xmm6,xmm2
+; OPTM1_123-NEXT: shufps xmm0,xmm6,0x88
; OPTM1_123-NEXT: pshufd xmm0,xmm0,0xd8
; OPTM1_123-NEXT: movups XMMWORD PTR [esp],xmm0
; OPTM1_123-NEXT: movups xmm0,XMMWORD PTR [esp]
; OPTM1_123-NEXT: ret
; CHECK_123-LABEL: mul_v4i32
-; CHECK_123: movups xmm4,xmm0
+; CHECK_123: movups xmm5,xmm0
; CHECK_123-NEXT: pshufd xmm0,xmm0,0x31
; CHECK_123-NEXT: pshufd xmm7,xmm1,0x31
-; CHECK_123-NEXT: pmuludq xmm4,xmm1
+; CHECK_123-NEXT: pmuludq xmm5,xmm1
; CHECK_123-NEXT: pmuludq xmm0,xmm7
-; CHECK_123-NEXT: shufps xmm4,xmm0,0x88
-; CHECK_123-NEXT: pshufd xmm4,xmm4,0xd8
-; CHECK_123-NEXT: movups xmm0,xmm4
+; CHECK_123-NEXT: shufps xmm5,xmm0,0x88
+; CHECK_123-NEXT: pshufd xmm5,xmm5,0xd8
+; CHECK_123-NEXT: movups xmm0,xmm5
; CHECK_123-NEXT: ret
}
ret <4 x i32> %res
; REGALLOC-LABEL: func4
-; REGALLOC: movups xmm5,xmm0
+; REGALLOC: movups xmm3,xmm0
; REGALLOC-NEXT: pshufd xmm0,xmm0,0x31
; REGALLOC-NEXT: pshufd xmm4,xmm1,0x31
-; REGALLOC-NEXT: pmuludq xmm5,xmm1
+; REGALLOC-NEXT: pmuludq xmm3,xmm1
; REGALLOC-NEXT: pmuludq xmm0,xmm4
-; REGALLOC-NEXT: shufps xmm5,xmm0,0x88
-; REGALLOC-NEXT: pshufd xmm5,xmm5,0xd8
-; REGALLOC-NEXT: movups xmm0,xmm5
+; REGALLOC-NEXT: shufps xmm3,xmm0,0x88
+; REGALLOC-NEXT: pshufd xmm3,xmm3,0xd8
+; REGALLOC-NEXT: movups xmm0,xmm3
; REGALLOC-NEXT: ret
}