// instructions to run at the double precision rate for the device so it's
// probably best to just report no single precision denormals.
static uint32_t getFPMode(const MachineFunction &F) {
- const AMDGPUSubtarget& ST = F.getSubtarget<AMDGPUSubtarget>();
+ const SISubtarget& ST = F.getSubtarget<SISubtarget>();
// TODO: Is there any real use for the flush in only / flush out only modes?
uint32_t FP32Denormals =
void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
unsigned MaxGPR = 0;
bool killPixel = false;
- const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
- const R600RegisterInfo *RI =
- static_cast<const R600RegisterInfo *>(STM.getRegisterInfo());
+ const R600Subtarget &STM = MF.getSubtarget<R600Subtarget>();
+ const R600RegisterInfo *RI = STM.getRegisterInfo();
const R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
for (const MachineBasicBlock &MBB : MF) {
}
unsigned RsrcReg;
- if (STM.getGeneration() >= AMDGPUSubtarget::EVERGREEN) {
+ if (STM.getGeneration() >= R600Subtarget::EVERGREEN) {
// Evergreen / Northern Islands
switch (MF.getFunction()->getCallingConv()) {
default: // Fall through
void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
const MachineFunction &MF) const {
- const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
+ const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
uint64_t CodeSize = 0;
unsigned MaxSGPR = 0;
unsigned MaxVGPR = 0;
bool VCCUsed = false;
bool FlatUsed = false;
- const SIRegisterInfo *RI =
- static_cast<const SIRegisterInfo *>(STM.getRegisterInfo());
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(STM.getInstrInfo());
+ const SIRegisterInfo *RI = STM.getRegisterInfo();
+ const SIInstrInfo *TII = STM.getInstrInfo();
for (const MachineBasicBlock &MBB : MF) {
for (const MachineInstr &MI : MBB) {
if (VCCUsed)
ExtraSGPRs = 2;
- if (STM.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ if (STM.getGeneration() < SISubtarget::VOLCANIC_ISLANDS) {
if (FlatUsed)
ExtraSGPRs = 4;
} else {
ProgInfo.NumSGPR = MaxSGPR + 1;
if (STM.hasSGPRInitBug()) {
- if (ProgInfo.NumSGPR > AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG) {
+ if (ProgInfo.NumSGPR > SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG) {
LLVMContext &Ctx = MF.getFunction()->getContext();
DiagnosticInfoResourceLimit Diag(*MF.getFunction(),
"SGPRs with SGPR init bug",
Ctx.diagnose(Diag);
}
- ProgInfo.NumSGPR = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
+ ProgInfo.NumSGPR = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
}
if (MFI->NumUserSGPRs > STM.getMaxNumUserSGPRs()) {
ProgInfo.CodeLen = CodeSize;
unsigned LDSAlignShift;
- if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
+ if (STM.getGeneration() < SISubtarget::SEA_ISLANDS) {
// LDS is allocated in 64 dword blocks.
LDSAlignShift = 8;
} else {
void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
const SIProgramInfo &KernelInfo) {
- const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
+ const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
unsigned RsrcReg = getRsrcReg(MF.getFunction()->getCallingConv());
void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
const SIProgramInfo &KernelInfo) const {
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
+ const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
amd_kernel_code_t header;
AMDGPU::initDefaultAMDKernelCodeT(header, STM.getFeatureBits());
//===----------------------------------------------------------------------===//
#include "AMDGPUFrameLowering.h"
#include "AMDGPURegisterInfo.h"
-#include "R600MachineFunctionInfo.h"
+#include "AMDGPUSubtarget.h"
+
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Instructions.h"
int FI,
unsigned &FrameReg) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
+ const AMDGPURegisterInfo *RI
+ = MF.getSubtarget<AMDGPUSubtarget>().getRegisterInfo();
// Fill in FrameReg output argument.
FrameReg = RI->getFrameRegister(MF);
return OffsetBytes / (getStackWidth(MF) * 4);
}
-const TargetFrameLowering::SpillSlot *
-AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
- NumEntries = 0;
- return nullptr;
-}
-void AMDGPUFrameLowering::emitPrologue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {}
-void
-AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
-}
-
-bool
-AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const {
- return false;
-}
/// \returns The number of 32-bit sub-registers that are used when storing
/// values to the stack.
unsigned getStackWidth(const MachineFunction &MF) const;
+
int getFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg) const override;
- const SpillSlot *
- getCalleeSavedSpillSlots(unsigned &NumEntries) const override;
- void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
- bool hasFP(const MachineFunction &MF) const override;
+
+ const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const override {
+ NumEntries = 0;
+ return nullptr;
+ }
+
+ bool hasFP(const MachineFunction &MF) const override {
+ return false;
+ }
};
} // namespace llvm
#endif
: SelectionDAGISel(TM) {}
bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
- Subtarget = &static_cast<const AMDGPUSubtarget &>(MF.getSubtarget());
+ Subtarget = &MF.getSubtarget<AMDGPUSubtarget>();
return SelectionDAGISel::runOnMachineFunction(MF);
}
return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32);
}
-AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
+AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
const AMDGPUSubtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
// Lower floating point store/load to integer store/load to reduce the number
const SmallVectorImpl<ISD::OutputArg> &Outs) const;
public:
- AMDGPUTargetLowering(TargetMachine &TM, const AMDGPUSubtarget &STI);
+ AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI);
bool isFAbsFree(EVT VT) const override;
bool isFNegFree(EVT VT) const override;
// Pin the vtable to this file.
void AMDGPUInstrInfo::anchor() {}
-AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &st)
- : AMDGPUGenInstrInfo(-1, -1), ST(st) {}
-
-const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
- return RI;
-}
+AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)
+ : AMDGPUGenInstrInfo(-1, -1), ST(ST) {}
bool AMDGPUInstrInfo::enableClusterLoads() const {
return true;
return -1;
}
+ const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+ const AMDGPUFrameLowering *TFL = ST.getFrameLowering();
+
unsigned IgnoredFrameReg;
- Offset = MF.getSubtarget().getFrameLowering()->getFrameIndexReference(
- MF, -1, IgnoredFrameReg);
+ Offset = TFL->getFrameIndexReference(MF, -1, IgnoredFrameReg);
return getIndirectIndexBegin(MF) + Offset;
}
}
}
+// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
+enum SIEncodingFamily {
+ SI = 0,
+ VI = 1
+};
+
// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
// header files, so we need to wrap it in a function that takes unsigned
// instead.
namespace llvm {
namespace AMDGPU {
static int getMCOpcode(uint16_t Opcode, unsigned Gen) {
- return getMCOpcodeGen(Opcode, (enum Subtarget)Gen);
+ return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
}
}
}
-// This must be kept in sync with the SISubtarget class in SIInstrInfo.td
-enum SISubtarget {
- SI = 0,
- VI = 1
-};
-
-static enum SISubtarget AMDGPUSubtargetToSISubtarget(unsigned Gen) {
- switch (Gen) {
- default:
- return SI;
+static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
+ switch (ST.getGeneration()) {
+ case AMDGPUSubtarget::SOUTHERN_ISLANDS:
+ case AMDGPUSubtarget::SEA_ISLANDS:
+ return SIEncodingFamily::SI;
case AMDGPUSubtarget::VOLCANIC_ISLANDS:
- return VI;
+ return SIEncodingFamily::VI;
+
+ // FIXME: This should never be called for r600 GPUs.
+ case AMDGPUSubtarget::R600:
+ case AMDGPUSubtarget::R700:
+ case AMDGPUSubtarget::EVERGREEN:
+ case AMDGPUSubtarget::NORTHERN_ISLANDS:
+ return SIEncodingFamily::SI;
}
}
int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
- int MCOp = AMDGPU::getMCOpcode(
- Opcode, AMDGPUSubtargetToSISubtarget(ST.getGeneration()));
+ int MCOp = AMDGPU::getMCOpcode(Opcode, subtargetEncodingFamily(ST));
// -1 means that Opcode is already a native instruction.
if (MCOp == -1)
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H
-#include "AMDGPURegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
private:
- const AMDGPURegisterInfo RI;
- virtual void anchor();
-protected:
const AMDGPUSubtarget &ST;
-public:
- explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st);
- virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0;
+ virtual void anchor();
public:
+ explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st);
+
/// \returns the smallest register index that will be accessed by an indirect
/// read or write or -1 if indirect addressing is not used by this program.
int getIndirectIndexBegin(const MachineFunction &MF) const;
/// \brief Given a MIMG \p Opcode that writes all 4 channels, return the
/// equivalent opcode that writes \p Channels Channels.
int getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const;
-
};
namespace AMDGPU {
#include "AMDGPUMCInstLower.h"
#include "AMDGPUAsmPrinter.h"
+#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
#include "InstPrinter/AMDGPUInstPrinter.h"
#include "SIInstrInfo.h"
using namespace llvm;
AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &st):
- Ctx(ctx), ST(st)
-{ }
+ Ctx(ctx), ST(st) { }
void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
raw_string_ostream DisasmStream(DisasmLine);
AMDGPUInstPrinter InstPrinter(*TM.getMCAsmInfo(),
- *MF->getSubtarget().getInstrInfo(),
- *MF->getSubtarget().getRegisterInfo());
- InstPrinter.printInst(&TmpInst, DisasmStream, StringRef(),
- MF->getSubtarget());
+ *STI.getInstrInfo(),
+ *STI.getRegisterInfo());
+ InstPrinter.printInst(&TmpInst, DisasmStream, StringRef(), STI);
// Disassemble instruction/operands to hex representation.
SmallVector<MCFixup, 4> Fixups;
// they are not supported at this time.
//===----------------------------------------------------------------------===//
-const MCPhysReg AMDGPURegisterInfo::CalleeSavedReg = AMDGPU::NoRegister;
+// Dummy to not crash RegisterClassInfo.
+static const MCPhysReg CalleeSavedReg = AMDGPU::NoRegister;
-const MCPhysReg*
-AMDGPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+const MCPhysReg *AMDGPURegisterInfo::getCalleeSavedRegs(
+ const MachineFunction *) const {
return &CalleeSavedReg;
}
}
unsigned AMDGPURegisterInfo::getIndirectSubReg(unsigned IndirectIndex) const {
-
return getSubRegFromChannel(IndirectIndex);
}
class TargetInstrInfo;
struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
- static const MCPhysReg CalleeSavedReg;
-
AMDGPURegisterInfo();
- BitVector getReservedRegs(const MachineFunction &MF) const override {
- assert(!"Unimplemented"); return BitVector();
- }
-
- virtual unsigned getHWRegIndex(unsigned Reg) const {
- assert(!"Unimplemented"); return 0;
- }
-
/// \returns the sub reg enum value for the given \p Channel
/// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0)
unsigned getSubRegFromChannel(unsigned Channel) const;
unsigned getFrameRegister(const MachineFunction &MF) const override;
unsigned getIndirectSubReg(unsigned IndirectIndex) const;
-
};
} // End namespace llvm
} // End anonymous namespace.
#endif
+AMDGPUSubtarget::~AMDGPUSubtarget() {}
+
AMDGPUSubtarget &
AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
StringRef GPU, StringRef FS) {
}
AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
- TargetMachine &TM)
- : AMDGPUGenSubtargetInfo(TT, GPU, FS),
- DumpCode(false), R600ALUInst(false), HasVertexCache(false),
- TexVTXClauseSize(0),
- Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600),
- FP64(false),
- FP64Denormals(false), FP32Denormals(false), FPExceptions(false),
- FastFMAF32(false), HalfRate64Ops(false), CaymanISA(false),
- FlatAddressSpace(false), FlatForGlobal(false), EnableIRStructurizer(true),
- EnablePromoteAlloca(false),
- EnableIfCvt(true), EnableLoadStoreOpt(false),
- EnableUnsafeDSOffsetFolding(false),
- EnableXNACK(false),
- WavefrontSize(64), CFALUBug(false),
- LocalMemorySize(0), MaxPrivateElementSize(0),
- EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false),
- GCN1Encoding(false), GCN3Encoding(false), CIInsts(false),
- HasSMemRealTime(false), Has16BitInsts(false),
- LDSBankCount(0),
- IsaVersion(ISAVersion0_0_0),
- EnableSIScheduler(false),
- DebuggerInsertNops(false), DebuggerReserveRegs(false),
- FrameLowering(nullptr),
- GISel(),
- InstrItins(getInstrItineraryForCPU(GPU)), TargetTriple(TT) {
-
+ const TargetMachine &TM)
+ : AMDGPUGenSubtargetInfo(TT, GPU, FS),
+ TargetTriple(TT),
+ Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600),
+ IsaVersion(ISAVersion0_0_0),
+ WavefrontSize(64),
+ LocalMemorySize(0),
+ LDSBankCount(0),
+ MaxPrivateElementSize(0),
+
+ FastFMAF32(false),
+ HalfRate64Ops(false),
+
+ FP32Denormals(false),
+ FP64Denormals(false),
+ FPExceptions(false),
+ FlatForGlobal(false),
+ EnableXNACK(false),
+ DebuggerInsertNops(false),
+ DebuggerReserveRegs(false),
+
+ EnableVGPRSpilling(false),
+ EnableIRStructurizer(true),
+ EnablePromoteAlloca(false),
+ EnableIfCvt(true),
+ EnableLoadStoreOpt(false),
+ EnableUnsafeDSOffsetFolding(false),
+ EnableSIScheduler(false),
+ DumpCode(false),
+
+ FP64(false),
+ IsGCN(false),
+ GCN1Encoding(false),
+ GCN3Encoding(false),
+ CIInsts(false),
+ SGPRInitBug(false),
+ HasSMemRealTime(false),
+ Has16BitInsts(false),
+ FlatAddressSpace(false),
+
+ R600ALUInst(false),
+ CaymanISA(false),
+ CFALUBug(false),
+ HasVertexCache(false),
+ TexVTXClauseSize(0),
+
+ FeatureDisable(false),
+
+ InstrItins(getInstrItineraryForCPU(GPU)) {
initializeSubtargetDependencies(TT, GPU, FS);
-
- // Scratch is allocated in 256 dword per wave blocks.
- const unsigned StackAlign = 4 * 256 / getWavefrontSize();
-
- if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
- InstrInfo.reset(new R600InstrInfo(*this));
- TLInfo.reset(new R600TargetLowering(TM, *this));
-
- // FIXME: Should have R600 specific FrameLowering
- FrameLowering.reset(new AMDGPUFrameLowering(
- TargetFrameLowering::StackGrowsUp,
- StackAlign,
- 0));
- } else {
- InstrInfo.reset(new SIInstrInfo(*this));
- TLInfo.reset(new SITargetLowering(TM, *this));
- FrameLowering.reset(new SIFrameLowering(
- TargetFrameLowering::StackGrowsUp,
- StackAlign,
- 0));
-#ifndef LLVM_BUILD_GLOBAL_ISEL
- GISelAccessor *GISel = new GISelAccessor();
-#else
- AMDGPUGISelActualAccessor *GISel =
- new AMDGPUGISelActualAccessor();
- GISel->CallLoweringInfo.reset(
- new AMDGPUCallLowering(*getTargetLowering()));
-#endif
- setGISelAccessor(*GISel);
- }
-}
-
-const CallLowering *AMDGPUSubtarget::getCallLowering() const {
- assert(GISel && "Access to GlobalISel APIs not set");
- return GISel->getCallLowering();
-}
-
-unsigned AMDGPUSubtarget::getStackEntrySize() const {
- assert(getGeneration() <= NORTHERN_ISLANDS);
- switch(getWavefrontSize()) {
- case 16:
- return 8;
- case 32:
- return hasCaymanISA() ? 4 : 8;
- case 64:
- return 4;
- default:
- llvm_unreachable("Illegal wavefront size.");
- }
}
// FIXME: These limits are for SI. Did they change with the larger maximum LDS
return 1;
}
-unsigned AMDGPUSubtarget::getAmdKernelCodeChipID() const {
- switch(getGeneration()) {
- default: llvm_unreachable("ChipID unknown");
- case SEA_ISLANDS: return 12;
- }
-}
-
-AMDGPU::IsaVersion AMDGPUSubtarget::getIsaVersion() const {
- return AMDGPU::getIsaVersion(getFeatureBits());
+R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
+ const TargetMachine &TM) :
+ AMDGPUSubtarget(TT, GPU, FS, TM),
+ InstrInfo(*this),
+ FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
+ TLInfo(TM, *this) {}
+
+SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS,
+ const TargetMachine &TM) :
+ AMDGPUSubtarget(TT, GPU, FS, TM),
+ InstrInfo(*this),
+ FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
+ TLInfo(TM, *this) {
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+ GISelAccessor *GISel = new GISelAccessor();
+#else
+ AMDGPUGISelActualAccessor *GISel =
+ new AMDGPUGISelActualAccessor();
+ GISel->CallLoweringInfo.reset(
+ new AMDGPUCallLowering(*getTargetLowering()));
+#endif
+ setGISelAccessor(*GISel);
}
-bool AMDGPUSubtarget::isVGPRSpillingEnabled(const Function& F) const {
- return !AMDGPU::isShader(F.getCallingConv()) || EnableVGPRSpilling;
+unsigned R600Subtarget::getStackEntrySize() const {
+ switch (getWavefrontSize()) {
+ case 16:
+ return 8;
+ case 32:
+ return hasCaymanISA() ? 4 : 8;
+ case 64:
+ return 4;
+ default:
+ llvm_unreachable("Illegal wavefront size.");
+ }
}
-void AMDGPUSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
+void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
MachineInstr *begin,
MachineInstr *end,
unsigned NumRegionInstrs) const {
- if (getGeneration() >= SOUTHERN_ISLANDS) {
-
- // Track register pressure so the scheduler can try to decrease
- // pressure once register usage is above the threshold defined by
- // SIRegisterInfo::getRegPressureSetLimit()
- Policy.ShouldTrackPressure = true;
+ // Track register pressure so the scheduler can try to decrease
+ // pressure once register usage is above the threshold defined by
+ // SIRegisterInfo::getRegPressureSetLimit()
+ Policy.ShouldTrackPressure = true;
+
+ // Enabling both top down and bottom up scheduling seems to give us less
+ // register spills than just using one of these approaches on its own.
+ Policy.OnlyTopDown = false;
+ Policy.OnlyBottomUp = false;
+
+ // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler.
+ if (!enableSIScheduler())
+ Policy.ShouldTrackLaneMasks = true;
+}
- // Enabling both top down and bottom up scheduling seems to give us less
- // register spills than just using one of these approaches on its own.
- Policy.OnlyTopDown = false;
- Policy.OnlyBottomUp = false;
+bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const {
+ return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv());
+}
- // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler.
- if (!enableSIScheduler())
- Policy.ShouldTrackLaneMasks = true;
+unsigned SISubtarget::getAmdKernelCodeChipID() const {
+ switch (getGeneration()) {
+ case SEA_ISLANDS:
+ return 12;
+ default:
+ llvm_unreachable("ChipID unknown");
}
}
+AMDGPU::IsaVersion SISubtarget::getIsaVersion() const {
+ return AMDGPU::getIsaVersion(getFeatureBits());
+}
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
#include "AMDGPU.h"
-#include "AMDGPUFrameLowering.h"
-#include "AMDGPUISelLowering.h"
-#include "AMDGPUInstrInfo.h"
-#include "AMDGPUSubtarget.h"
+#include "R600InstrInfo.h"
+#include "R600ISelLowering.h"
+#include "R600FrameLowering.h"
+#include "SIInstrInfo.h"
+#include "SIISelLowering.h"
+#include "SIFrameLowering.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
#include "llvm/Target/TargetSubtargetInfo.h"
namespace llvm {
-class StringRef;
class SIMachineFunctionInfo;
+class StringRef;
class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
-
public:
enum Generation {
R600 = 0,
};
enum {
- FIXED_SGPR_COUNT_FOR_INIT_BUG = 80
- };
-
- enum {
ISAVersion0_0_0,
ISAVersion7_0_0,
ISAVersion7_0_1,
ISAVersion8_0_3
};
-private:
- bool DumpCode;
- bool R600ALUInst;
- bool HasVertexCache;
- short TexVTXClauseSize;
+protected:
+ // Basic subtarget description.
+ Triple TargetTriple;
Generation Gen;
- bool FP64;
- bool FP64Denormals;
- bool FP32Denormals;
- bool FPExceptions;
+ unsigned IsaVersion;
+ unsigned WavefrontSize;
+ int LocalMemorySize;
+ int LDSBankCount;
+ unsigned MaxPrivateElementSize;
+
+ // Possibly statically set by tablegen, but may want to be overridden.
bool FastFMAF32;
bool HalfRate64Ops;
- bool CaymanISA;
- bool FlatAddressSpace;
+
+ // Dynamially set bits that enable features.
+ bool FP32Denormals;
+ bool FP64Denormals;
+ bool FPExceptions;
bool FlatForGlobal;
+ bool EnableXNACK;
+ bool DebuggerInsertNops;
+ bool DebuggerReserveRegs;
+
+ // Used as options.
+ bool EnableVGPRSpilling;
bool EnableIRStructurizer;
bool EnablePromoteAlloca;
bool EnableIfCvt;
bool EnableLoadStoreOpt;
bool EnableUnsafeDSOffsetFolding;
- bool EnableXNACK;
- unsigned WavefrontSize;
- bool CFALUBug;
- int LocalMemorySize;
- unsigned MaxPrivateElementSize;
- bool EnableVGPRSpilling;
- bool SGPRInitBug;
+ bool EnableSIScheduler;
+ bool DumpCode;
+
+ // Subtarget statically properties set by tablegen
+ bool FP64;
bool IsGCN;
bool GCN1Encoding;
bool GCN3Encoding;
bool CIInsts;
+ bool SGPRInitBug;
bool HasSMemRealTime;
bool Has16BitInsts;
+ bool FlatAddressSpace;
+ bool R600ALUInst;
+ bool CaymanISA;
+ bool CFALUBug;
+ bool HasVertexCache;
+ short TexVTXClauseSize;
+
+ // Dummy feature to use for assembler in tablegen.
bool FeatureDisable;
- int LDSBankCount;
- unsigned IsaVersion;
- bool EnableSIScheduler;
- bool DebuggerInsertNops;
- bool DebuggerReserveRegs;
- std::unique_ptr<AMDGPUFrameLowering> FrameLowering;
- std::unique_ptr<AMDGPUTargetLowering> TLInfo;
- std::unique_ptr<AMDGPUInstrInfo> InstrInfo;
- std::unique_ptr<GISelAccessor> GISel;
InstrItineraryData InstrItins;
- Triple TargetTriple;
public:
- AMDGPUSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
- TargetMachine &TM);
+ AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
+ const TargetMachine &TM);
+ virtual ~AMDGPUSubtarget();
AMDGPUSubtarget &initializeSubtargetDependencies(const Triple &TT,
StringRef GPU, StringRef FS);
- void setGISelAccessor(GISelAccessor &GISel) {
- this->GISel.reset(&GISel);
- }
+ const AMDGPUInstrInfo *getInstrInfo() const override;
+ const AMDGPUFrameLowering *getFrameLowering() const override;
+ const AMDGPUTargetLowering *getTargetLowering() const override;
+ const AMDGPURegisterInfo *getRegisterInfo() const override;
- const AMDGPUFrameLowering *getFrameLowering() const override {
- return FrameLowering.get();
- }
- const AMDGPUInstrInfo *getInstrInfo() const override {
- return InstrInfo.get();
- }
- const AMDGPURegisterInfo *getRegisterInfo() const override {
- return &InstrInfo->getRegisterInfo();
- }
- AMDGPUTargetLowering *getTargetLowering() const override {
- return TLInfo.get();
- }
const InstrItineraryData *getInstrItineraryData() const override {
return &InstrItins;
}
- const CallLowering *getCallLowering() const override;
-
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
- bool hasVertexCache() const {
- return HasVertexCache;
- }
-
- short getTexVTXClauseSize() const {
- return TexVTXClauseSize;
+ bool isAmdHsaOS() const {
+ return TargetTriple.getOS() == Triple::AMDHSA;
}
Generation getGeneration() const {
return Gen;
}
- bool hasHWFP64() const {
- return FP64;
+ unsigned getWavefrontSize() const {
+ return WavefrontSize;
}
- bool hasCaymanISA() const {
- return CaymanISA;
+ int getLocalMemorySize() const {
+ return LocalMemorySize;
}
- bool hasFP32Denormals() const {
- return FP32Denormals;
+ int getLDSBankCount() const {
+ return LDSBankCount;
}
- bool hasFP64Denormals() const {
- return FP64Denormals;
+ unsigned getMaxPrivateElementSize() const {
+ return MaxPrivateElementSize;
}
- bool hasFPExceptions() const {
- return FPExceptions;
+ bool hasHWFP64() const {
+ return FP64;
}
bool hasFastFMAF32() const {
return HalfRate64Ops;
}
- bool hasFlatAddressSpace() const {
- return FlatAddressSpace;
- }
-
- bool hasSMemRealTime() const {
- return HasSMemRealTime;
- }
-
- bool has16BitInsts() const {
- return Has16BitInsts;
- }
-
- bool useFlatForGlobal() const {
- return FlatForGlobal;
- }
-
bool hasAddr64() const {
return (getGeneration() < VOLCANIC_ISLANDS);
}
return (getGeneration() >= EVERGREEN);
}
+ bool hasCaymanISA() const {
+ return CaymanISA;
+ }
+
bool IsIRStructurizerEnabled() const {
return EnableIRStructurizer;
}
return EnableIfCvt;
}
- bool loadStoreOptEnabled() const {
- return EnableLoadStoreOpt;
- }
-
bool unsafeDSOffsetFoldingEnabled() const {
return EnableUnsafeDSOffsetFolding;
}
- unsigned getWavefrontSize() const {
- return WavefrontSize;
- }
-
- unsigned getStackEntrySize() const;
-
- bool hasCFAluBug() const {
- assert(getGeneration() <= NORTHERN_ISLANDS);
- return CFALUBug;
+ bool dumpCode() const {
+ return DumpCode;
}
/// Return the amount of LDS that can be used that will not restrict the
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes) const;
- int getLocalMemorySize() const {
- return LocalMemorySize;
+ bool hasFP32Denormals() const {
+ return FP32Denormals;
}
- unsigned getMaxPrivateElementSize() const {
- return MaxPrivateElementSize;
+ bool hasFP64Denormals() const {
+ return FP64Denormals;
}
- bool hasSGPRInitBug() const {
- return SGPRInitBug;
+ bool hasFPExceptions() const {
+ return FPExceptions;
}
- int getLDSBankCount() const {
- return LDSBankCount;
+ bool useFlatForGlobal() const {
+ return FlatForGlobal;
}
- unsigned getAmdKernelCodeChipID() const;
+ bool isXNACKEnabled() const {
+ return EnableXNACK;
+ }
- AMDGPU::IsaVersion getIsaVersion() const;
+ unsigned getMaxWavesPerCU() const {
+ if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
+ return 10;
+
+ // FIXME: Not sure what this is for other subtagets.
+ return 8;
+ }
+
+ /// \brief Returns the offset in bytes from the start of the input buffer
+ /// of the first explicit kernel argument.
+ unsigned getExplicitKernelArgOffset() const {
+ return isAmdHsaOS() ? 0 : 36;
+ }
+
+ unsigned getStackAlignment() const {
+ // Scratch is allocated in 256 dword per wave blocks.
+ return 4 * 256 / getWavefrontSize();
+ }
bool enableMachineScheduler() const override {
return true;
}
- void overrideSchedPolicy(MachineSchedPolicy &Policy,
- MachineInstr *begin, MachineInstr *end,
- unsigned NumRegionInstrs) const override;
+ bool enableSubRegLiveness() const override {
+ return true;
+ }
+};
- // Helper functions to simplify if statements
- bool isTargetELF() const {
- return false;
+class R600Subtarget final : public AMDGPUSubtarget {
+private:
+ R600InstrInfo InstrInfo;
+ R600FrameLowering FrameLowering;
+ R600TargetLowering TLInfo;
+
+public:
+ R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
+ const TargetMachine &TM);
+
+ const R600InstrInfo *getInstrInfo() const override {
+ return &InstrInfo;
}
- bool enableSIScheduler() const {
- return EnableSIScheduler;
+ const R600FrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
}
- bool debuggerInsertNops() const {
- return DebuggerInsertNops;
+ const R600TargetLowering *getTargetLowering() const override {
+ return &TLInfo;
}
- bool debuggerReserveRegs() const {
- return DebuggerReserveRegs;
+ const R600RegisterInfo *getRegisterInfo() const override {
+ return &InstrInfo.getRegisterInfo();
}
- bool dumpCode() const {
- return DumpCode;
+ bool hasCFAluBug() const {
+ return CFALUBug;
}
- bool r600ALUEncoding() const {
- return R600ALUInst;
+
+ bool hasVertexCache() const {
+ return HasVertexCache;
}
- bool isAmdHsaOS() const {
- return TargetTriple.getOS() == Triple::AMDHSA;
+
+ short getTexVTXClauseSize() const {
+ return TexVTXClauseSize;
}
- bool isVGPRSpillingEnabled(const Function& F) const;
- bool isXNACKEnabled() const {
- return EnableXNACK;
+ unsigned getStackEntrySize() const;
+};
+
+class SISubtarget final : public AMDGPUSubtarget {
+public:
+ enum {
+ FIXED_SGPR_COUNT_FOR_INIT_BUG = 80
+ };
+
+private:
+ SIInstrInfo InstrInfo;
+ SIFrameLowering FrameLowering;
+ SITargetLowering TLInfo;
+ std::unique_ptr<GISelAccessor> GISel;
+
+public:
+ SISubtarget(const Triple &TT, StringRef CPU, StringRef FS,
+ const TargetMachine &TM);
+
+ const SIInstrInfo *getInstrInfo() const override {
+ return &InstrInfo;
}
- unsigned getMaxWavesPerCU() const {
- if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
- return 10;
+ const SIFrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
- // FIXME: Not sure what this is for other subtagets.
- return 8;
+ const SITargetLowering *getTargetLowering() const override {
+ return &TLInfo;
}
- bool enableSubRegLiveness() const override {
- return true;
+ const CallLowering *getCallLowering() const override {
+ assert(GISel && "Access to GlobalISel APIs not set");
+ return GISel->getCallLowering();
}
- /// \brief Returns the offset in bytes from the start of the input buffer
- /// of the first explicit kernel argument.
- unsigned getExplicitKernelArgOffset() const {
- return isAmdHsaOS() ? 0 : 36;
+ const SIRegisterInfo *getRegisterInfo() const override {
+ return &InstrInfo.getRegisterInfo();
}
+ void setGISelAccessor(GISelAccessor &GISel) {
+ this->GISel.reset(&GISel);
+ }
+
+ void overrideSchedPolicy(MachineSchedPolicy &Policy,
+ MachineInstr *Begin, MachineInstr *End,
+ unsigned NumRegionInstrs) const override;
+
+ bool isVGPRSpillingEnabled(const Function& F) const;
+
+ unsigned getAmdKernelCodeChipID() const;
+
+ AMDGPU::IsaVersion getIsaVersion() const;
+
unsigned getMaxNumUserSGPRs() const {
return 16;
}
+
+ bool hasFlatAddressSpace() const {
+ return FlatAddressSpace;
+ }
+
+ bool hasSMemRealTime() const {
+ return HasSMemRealTime;
+ }
+
+ bool has16BitInsts() const {
+ return Has16BitInsts;
+ }
+
+ bool enableSIScheduler() const {
+ return EnableSIScheduler;
+ }
+
+ bool debuggerInsertNops() const {
+ return DebuggerInsertNops;
+ }
+
+ bool debuggerReserveRegs() const {
+ return DebuggerReserveRegs;
+ }
+
+ bool loadStoreOptEnabled() const {
+ return EnableLoadStoreOpt;
+ }
+
+ bool hasSGPRInitBug() const {
+ return SGPRInitBug;
+ }
};
+
+inline const AMDGPUInstrInfo *AMDGPUSubtarget::getInstrInfo() const {
+ if (getGeneration() >= SOUTHERN_ISLANDS)
+ return static_cast<const SISubtarget *>(this)->getInstrInfo();
+
+ return static_cast<const R600Subtarget *>(this)->getInstrInfo();
+}
+
+inline const AMDGPUFrameLowering *AMDGPUSubtarget::getFrameLowering() const {
+ if (getGeneration() >= SOUTHERN_ISLANDS)
+ return static_cast<const SISubtarget *>(this)->getFrameLowering();
+
+ return static_cast<const R600Subtarget *>(this)->getFrameLowering();
+}
+
+inline const AMDGPUTargetLowering *AMDGPUSubtarget::getTargetLowering() const {
+ if (getGeneration() >= SOUTHERN_ISLANDS)
+ return static_cast<const SISubtarget *>(this)->getTargetLowering();
+
+ return static_cast<const R600Subtarget *>(this)->getTargetLowering();
+}
+
+inline const AMDGPURegisterInfo *AMDGPUSubtarget::getRegisterInfo() const {
+ if (getGeneration() >= SOUTHERN_ISLANDS)
+ return static_cast<const SISubtarget *>(this)->getRegisterInfo();
+
+ return static_cast<const R600Subtarget *>(this)->getRegisterInfo();
+}
+
} // End namespace llvm
#endif
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
-#include "llvm/CodeGen/Passes.h"
using namespace llvm;
Optional<Reloc::Model> RM,
CodeModel::Model CM,
CodeGenOpt::Level OptLevel)
- : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU),
- FS, Options, getEffectiveRelocModel(RM), CM, OptLevel),
- TLOF(createTLOF(getTargetTriple())),
- Subtarget(TT, getTargetCPU(), FS, *this), IntrinsicInfo() {
+ : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU),
+ FS, Options, getEffectiveRelocModel(RM), CM, OptLevel),
+ TLOF(createTLOF(getTargetTriple())),
+ IntrinsicInfo() {
setRequiresStructuredCFG(true);
initAsmInfo();
}
TargetOptions Options,
Optional<Reloc::Model> RM,
CodeModel::Model CM, CodeGenOpt::Level OL)
- : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
+ : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, getTargetCPU(), FS, *this) {}
//===----------------------------------------------------------------------===//
// GCN Target Machine (SI+)
TargetOptions Options,
Optional<Reloc::Model> RM,
CodeModel::Model CM, CodeGenOpt::Level OL)
- : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
+ : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, getTargetCPU(), FS, *this) {}
//===----------------------------------------------------------------------===//
// AMDGPU Pass Setup
return getTM<AMDGPUTargetMachine>();
}
- ScheduleDAGInstrs *
- createMachineScheduler(MachineSchedContext *C) const override {
- const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
- if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
- return createR600MachineScheduler(C);
- else if (ST.enableSIScheduler())
- return createSIMachineScheduler(C);
- return nullptr;
- }
-
void addEarlyCSEOrGVNPass();
void addStraightLineScalarOptimizationPasses();
void addIRPasses() override;
R600PassConfig(TargetMachine *TM, PassManagerBase &PM)
: AMDGPUPassConfig(TM, PM) { }
+ ScheduleDAGInstrs *createMachineScheduler(
+ MachineSchedContext *C) const override {
+ return createR600MachineScheduler(C);
+ }
+
bool addPreISel() override;
void addPreRegAlloc() override;
void addPreSched2() override;
public:
GCNPassConfig(TargetMachine *TM, PassManagerBase &PM)
: AMDGPUPassConfig(TM, PM) { }
+
+ GCNTargetMachine &getGCNTargetMachine() const {
+ return getTM<GCNTargetMachine>();
+ }
+
+ ScheduleDAGInstrs *
+ createMachineScheduler(MachineSchedContext *C) const override {
+ const SISubtarget *ST = getGCNTargetMachine().getSubtargetImpl();
+ if (ST->enableSIScheduler())
+ return createSIMachineScheduler(C);
+ return nullptr;
+ }
+
bool addPreISel() override;
void addMachineSSAOptimization() override;
bool addInstSelector() override;
addEarlyCSEOrGVNPass();
}
-bool
-AMDGPUPassConfig::addPreISel() {
+bool AMDGPUPassConfig::addPreISel() {
addPass(createFlattenCFGPass());
return false;
}
#endif
void GCNPassConfig::addPreRegAlloc() {
- const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
+ const SISubtarget &ST = *getGCNTargetMachine().getSubtargetImpl();
// This needs to be run directly before register allocation because
// earlier passes might recompute live intervals.
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETMACHINE_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETMACHINE_H
-#include "AMDGPUFrameLowering.h"
-#include "AMDGPUInstrInfo.h"
#include "AMDGPUIntrinsicInfo.h"
#include "AMDGPUSubtarget.h"
-#include "R600ISelLowering.h"
-#include "llvm/IR/DataLayout.h"
namespace llvm {
//===----------------------------------------------------------------------===//
class AMDGPUTargetMachine : public LLVMTargetMachine {
-private:
-
protected:
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- AMDGPUSubtarget Subtarget;
AMDGPUIntrinsicInfo IntrinsicInfo;
public:
CodeGenOpt::Level OL);
~AMDGPUTargetMachine();
- const AMDGPUSubtarget *getSubtargetImpl() const { return &Subtarget; }
- const AMDGPUSubtarget *getSubtargetImpl(const Function &) const override {
- return &Subtarget;
- }
+ const AMDGPUSubtarget *getSubtargetImpl() const;
+ const AMDGPUSubtarget *getSubtargetImpl(const Function &) const override;
+
const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override {
return &IntrinsicInfo;
}
//===----------------------------------------------------------------------===//
class R600TargetMachine final : public AMDGPUTargetMachine {
+private:
+ R600Subtarget Subtarget;
public:
R600TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
CodeGenOpt::Level OL);
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+
+ const R600Subtarget *getSubtargetImpl() const {
+ return &Subtarget;
+ }
+
+ const R600Subtarget *getSubtargetImpl(const Function &) const override {
+ return &Subtarget;
+ }
};
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
class GCNTargetMachine final : public AMDGPUTargetMachine {
+private:
+ SISubtarget Subtarget;
public:
GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
CodeGenOpt::Level OL);
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+
+ const SISubtarget *getSubtargetImpl() const {
+ return &Subtarget;
+ }
+
+ const SISubtarget *getSubtargetImpl(const Function &) const override {
+ return &Subtarget;
+ }
};
+inline const AMDGPUSubtarget *AMDGPUTargetMachine::getSubtargetImpl() const {
+ if (getTargetTriple().getArch() == Triple::amdgcn)
+ return static_cast<const GCNTargetMachine *>(this)->getSubtargetImpl();
+ return static_cast<const R600TargetMachine *>(this)->getSubtargetImpl();
+}
+
+inline const AMDGPUSubtarget *AMDGPUTargetMachine::getSubtargetImpl(
+ const Function &F) const {
+ if (getTargetTriple().getArch() == Triple::amdgcn)
+ return static_cast<const GCNTargetMachine *>(this)->getSubtargetImpl(F);
+ return static_cast<const R600TargetMachine *>(this)->getSubtargetImpl(F);
+}
+
} // End namespace llvm
#endif
bool prepare();
bool runOnMachineFunction(MachineFunction &MF) override {
- TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
+ TII = MF.getSubtarget<R600Subtarget>().getInstrInfo();
TRI = &TII->getRegisterInfo();
DEBUG(MF.dump(););
OrderedBlks.clear();
R600ControlFlowFinalizer.cpp
R600EmitClauseMarkers.cpp
R600ExpandSpecialInstrs.cpp
+ R600FrameLowering.cpp
R600InstrInfo.cpp
R600ISelLowering.cpp
R600MachineFunctionInfo.cpp
GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
CurrCycleInstr(nullptr),
- MF(MF) {
+ MF(MF),
+ ST(MF.getSubtarget<SISubtarget>()) {
MaxLookAhead = 5;
}
if (!CurrCycleInstr)
return;
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo*>(MF.getSubtarget().getInstrInfo());
+ const SIInstrInfo *TII = ST.getInstrInfo();
unsigned NumWaitStates = TII->getNumWaitStates(*CurrCycleInstr);
// Keep track of emitted instructions
int GCNHazardRecognizer::getWaitStatesSinceDef(
unsigned Reg, function_ref<bool(MachineInstr *)> IsHazardDef) {
- const TargetRegisterInfo *TRI =
- MF.getSubtarget<AMDGPUSubtarget>().getRegisterInfo();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
int WaitStates = -1;
for (MachineInstr *MI : EmittedInstrs) {
}
int GCNHazardRecognizer::checkSMEMSoftClauseHazards(MachineInstr *SMEM) {
- const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
-
// SMEM soft clause are only present on VI+
- if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
return 0;
// A soft-clause is any group of consecutive SMEM instructions. The
}
int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
- const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
- const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
int WaitStatesNeeded = 0;
WaitStatesNeeded = checkSMEMSoftClauseHazards(SMRD);
// This SMRD hazard only affects SI.
- if (ST.getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS)
+ if (ST.getGeneration() != SISubtarget::SOUTHERN_ISLANDS)
return WaitStatesNeeded;
// A read of an SGPR by SMRD instruction requires 4 wait states when the
}
int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
- const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
- const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
+ const SIInstrInfo *TII = ST.getInstrInfo();
- if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
return 0;
const SIRegisterInfo &TRI = TII->getRegisterInfo();
}
int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) {
- const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
- const SIRegisterInfo *TRI =
- static_cast<const SIRegisterInfo*>(ST.getRegisterInfo());
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
// Check for DPP VGPR read after VALU VGPR write.
int DppVgprWaitStates = 2;
class MachineInstr;
class ScheduleDAG;
class SIInstrInfo;
+class SISubtarget;
class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
-
- // This variable stores the instruction that has been emitted this cycle.
- // It will be added to EmittedInstrs, when AdvanceCycle() or RecedeCycle() is
+ // This variable stores the instruction that has been emitted this cycle. It
+ // will be added to EmittedInstrs, when AdvanceCycle() or RecedeCycle() is
// called.
MachineInstr *CurrCycleInstr;
std::list<MachineInstr*> EmittedInstrs;
const MachineFunction &MF;
+ const SISubtarget &ST;
int getWaitStatesSinceDef(unsigned Reg,
function_ref<bool(MachineInstr *)> IsHazardDef =
if (skipFunction(*MF.getFunction()))
return false;
- TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
+ TII = ST.getInstrInfo();
+
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
BB != BB_E; ++BB) {
MachineBasicBlock &MBB = *BB;
FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3
};
- const AMDGPUSubtarget *ST;
+ const R600Subtarget *ST;
std::vector<StackItem> BranchStack;
std::vector<StackItem> LoopStack;
unsigned MaxStackSize;
unsigned CurrentEntries;
unsigned CurrentSubEntries;
- CFStack(const AMDGPUSubtarget *st, CallingConv::ID cc) : ST(st),
+ CFStack(const R600Subtarget *st, CallingConv::ID cc) : ST(st),
// We need to reserve a stack entry for CALL_FS in vertex shaders.
MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0),
CurrentEntries(0), CurrentSubEntries(0) { }
return 0;
case CFStack::FIRST_NON_WQM_PUSH:
assert(!ST->hasCaymanISA());
- if (ST->getGeneration() <= AMDGPUSubtarget::R700) {
+ if (ST->getGeneration() <= R600Subtarget::R700) {
// +1 For the push operation.
// +2 Extra space required.
return 3;
return 2;
}
case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY:
- assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
+ assert(ST->getGeneration() >= R600Subtarget::EVERGREEN);
// +1 For the push operation.
// +1 Extra space required.
return 2;
// See comment in
// CFStack::getSubEntrySize()
else if (CurrentEntries > 0 &&
- ST->getGeneration() > AMDGPUSubtarget::EVERGREEN &&
+ ST->getGeneration() > R600Subtarget::EVERGREEN &&
!ST->hasCaymanISA() &&
!branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))
Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
const R600InstrInfo *TII;
const R600RegisterInfo *TRI;
unsigned MaxFetchInst;
- const AMDGPUSubtarget *ST;
+ const R600Subtarget *ST;
bool IsTrivialInst(MachineInstr *MI) const {
switch (MI->getOpcode()) {
const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
unsigned Opcode = 0;
- bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
+ bool isEg = (ST->getGeneration() >= R600Subtarget::EVERGREEN);
switch (CFI) {
case CF_TC:
Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600;
: MachineFunctionPass(ID), TII(nullptr), TRI(nullptr), ST(nullptr) {}
bool runOnMachineFunction(MachineFunction &MF) override {
- ST = &MF.getSubtarget<AMDGPUSubtarget>();
+ ST = &MF.getSubtarget<R600Subtarget>();
MaxFetchInst = ST->getTexVTXClauseSize();
- TII = static_cast<const R600InstrInfo *>(ST->getInstrInfo());
- TRI = static_cast<const R600RegisterInfo *>(ST->getRegisterInfo());
+ TII = ST->getInstrInfo();
+ TRI = ST->getRegisterInfo();
+
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
CFStack CFStack(ST, MF.getFunction()->getCallingConv());
}
bool runOnMachineFunction(MachineFunction &MF) override {
- TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
+ TII = ST.getInstrInfo();
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
BB != BB_E; ++BB) {
namespace {
class R600ExpandSpecialInstrsPass : public MachineFunctionPass {
-
private:
static char ID;
const R600InstrInfo *TII;
}
bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
- TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
+ TII = ST.getInstrInfo();
const R600RegisterInfo &TRI = TII->getRegisterInfo();
--- /dev/null
+//===----------------------- R600FrameLowering.cpp ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+
+#include "R600FrameLowering.h"
+
+using namespace llvm;
+
+R600FrameLowering::~R600FrameLowering() {
+}
--- /dev/null
+//===--------------------- R600FrameLowering.h ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_R600FRAMELOWERING_H
+#define LLVM_LIB_TARGET_AMDGPU_R600FRAMELOWERING_H
+
+#include "AMDGPUFrameLowering.h"
+
+namespace llvm {
+
+class R600FrameLowering : public AMDGPUFrameLowering {
+public:
+ R600FrameLowering(StackDirection D, unsigned StackAl, int LAO,
+ unsigned TransAl = 1) :
+ AMDGPUFrameLowering(D, StackAl, LAO, TransAl) {}
+ virtual ~R600FrameLowering();
+
+ void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const {}
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {}
+};
+
+}
+
+#endif
using namespace llvm;
-R600TargetLowering::R600TargetLowering(TargetMachine &TM,
- const AMDGPUSubtarget &STI)
+R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
+ const R600Subtarget &STI)
: AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
}
+const R600Subtarget *R600TargetLowering::getSubtarget() const {
+ return static_cast<const R600Subtarget *>(Subtarget);
+}
+
static inline bool isEOP(MachineBasicBlock::iterator I) {
return std::next(I)->getOpcode() == AMDGPU::RETURN;
}
MachineFunction * MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
MachineBasicBlock::iterator I = *MI;
- const R600InstrInfo *TII =
- static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
+ const R600InstrInfo *TII = getSubtarget()->getInstrInfo();
switch (MI->getOpcode()) {
default:
SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
DAG.getNode(ISD::FADD, DL, VT, FractPart,
DAG.getConstantFP(-0.5, DL, MVT::f32)));
- if (Gen >= AMDGPUSubtarget::R700)
+ if (Gen >= R600Subtarget::R700)
return TrigVal;
// On R600 hw, COS/SIN input must be between -Pi and Pi.
return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
// Lowering for indirect addressing
const MachineFunction &MF = DAG.getMachineFunction();
- const AMDGPUFrameLowering *TFL =
- static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
+ const R600FrameLowering *TFL = getSubtarget()->getFrameLowering();
unsigned StackWidth = TFL->getStackWidth(MF);
Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
// Lowering for indirect addressing
const MachineFunction &MF = DAG.getMachineFunction();
- const AMDGPUFrameLowering *TFL =
- static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
+ const R600FrameLowering *TFL = getSubtarget()->getFrameLowering();
unsigned StackWidth = TFL->getStackWidth(MF);
Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
- const AMDGPUFrameLowering *TFL = Subtarget->getFrameLowering();
+ const R600FrameLowering *TFL = getSubtarget()->getFrameLowering();
FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
}
-static bool
-FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
- SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
- const R600InstrInfo *TII =
- static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
+bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
+ SDValue &Src, SDValue &Neg, SDValue &Abs,
+ SDValue &Sel, SDValue &Imm,
+ SelectionDAG &DAG) const {
+ const R600InstrInfo *TII = getSubtarget()->getInstrInfo();
if (!Src.isMachineOpcode())
return false;
+
switch (Src.getMachineOpcode()) {
case AMDGPU::FNEG_R600:
if (!Neg.getNode())
}
}
-
/// \brief Fold the instructions after selecting them
SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
SelectionDAG &DAG) const {
- const R600InstrInfo *TII =
- static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
+ const R600InstrInfo *TII = getSubtarget()->getInstrInfo();
if (!Node->isMachineOpcode())
return Node;
+
unsigned Opcode = Node->getMachineOpcode();
SDValue FakeOp;
namespace llvm {
class R600InstrInfo;
+class R600Subtarget;
class R600TargetLowering final : public AMDGPUTargetLowering {
public:
- R600TargetLowering(TargetMachine &TM, const AMDGPUSubtarget &STI);
+ R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI);
+
+ const R600Subtarget *getSubtarget() const;
+
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock * BB) const override;
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
bool isHWTrueValue(SDValue Op) const;
bool isHWFalseValue(SDValue Op) const;
+ bool FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src,
+ SDValue &Neg, SDValue &Abs, SDValue &Sel, SDValue &Imm,
+ SelectionDAG &DAG) const;
+
SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override;
};
#define GET_INSTRINFO_CTOR_DTOR
#include "AMDGPUGenDFAPacketizer.inc"
-R600InstrInfo::R600InstrInfo(const AMDGPUSubtarget &st)
- : AMDGPUInstrInfo(st), RI() {}
-
-const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const {
- return RI;
-}
+R600InstrInfo::R600InstrInfo(const R600Subtarget &ST)
+ : AMDGPUInstrInfo(ST), RI(), ST(ST) {}
bool R600InstrInfo::isTrig(const MachineInstr &MI) const {
return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
}
bool R600InstrInfo::isMov(unsigned Opcode) const {
-
-
switch(Opcode) {
- default: return false;
+ default:
+ return false;
case AMDGPU::MOV:
case AMDGPU::MOV_IMM_F32:
case AMDGPU::MOV_IMM_I32:
DFAPacketizer *
R600InstrInfo::CreateTargetScheduleState(const TargetSubtargetInfo &STI) const {
const InstrItineraryData *II = STI.getInstrItineraryData();
- return static_cast<const AMDGPUSubtarget &>(STI).createDFAPacketizer(II);
+ return static_cast<const R600Subtarget &>(STI).createDFAPacketizer(II);
}
static bool
void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved,
const MachineFunction &MF) const {
- const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering *>(
- MF.getSubtarget().getFrameLowering());
+ const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
+ const R600FrameLowering *TFL = ST.getFrameLowering();
unsigned StackWidth = TFL->getStackWidth(MF);
int End = getIndirectIndexEnd(MF);
const {
assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented");
unsigned Opcode;
- if (ST.getGeneration() <= AMDGPUSubtarget::R700)
+ if (ST.getGeneration() <= R600Subtarget::R700)
Opcode = AMDGPU::DOT4_r600;
else
Opcode = AMDGPU::DOT4_eg;
#define LLVM_LIB_TARGET_AMDGPU_R600INSTRINFO_H
#include "AMDGPUInstrInfo.h"
-#include "R600Defines.h"
#include "R600RegisterInfo.h"
namespace llvm {
- class AMDGPUTargetMachine;
- class DFAPacketizer;
- class MachineFunction;
- class MachineInstr;
- class MachineInstrBuilder;
-
- class R600InstrInfo final : public AMDGPUInstrInfo {
- private:
+class AMDGPUTargetMachine;
+class DFAPacketizer;
+class MachineFunction;
+class MachineInstr;
+class MachineInstrBuilder;
+class R600Subtarget;
+
+class R600InstrInfo final : public AMDGPUInstrInfo {
+private:
const R600RegisterInfo RI;
+ const R600Subtarget &ST;
- std::vector<std::pair<int, unsigned> >
- ExtractSrcs(MachineInstr *MI, const DenseMap<unsigned, unsigned> &PV, unsigned &ConstCount) const;
-
+ std::vector<std::pair<int, unsigned>>
+ ExtractSrcs(MachineInstr *MI,
+ const DenseMap<unsigned, unsigned> &PV,
+ unsigned &ConstCount) const;
MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
unsigned AddrChan) const;
MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator I,
- unsigned ValueReg, unsigned Address,
- unsigned OffsetReg,
- unsigned AddrChan) const;
- public:
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg, unsigned Address,
+ unsigned OffsetReg,
+ unsigned AddrChan) const;
+public:
enum BankSwizzle {
ALU_VEC_012_SCL_210 = 0,
ALU_VEC_021_SCL_122,
ALU_VEC_210
};
- explicit R600InstrInfo(const AMDGPUSubtarget &st);
+ explicit R600InstrInfo(const R600Subtarget &);
+
+ const R600RegisterInfo &getRegisterInfo() const {
+ return RI;
+ }
- const R600RegisterInfo &getRegisterInfo() const override;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
bool KillSrc) const override;
/// If register is ALU_LITERAL, second member is IMM.
/// Otherwise, second member value is undefined.
SmallVector<std::pair<MachineOperand *, int64_t>, 3>
- getSrcs(MachineInstr *MI) const;
+ getSrcs(MachineInstr *MI) const;
unsigned isLegalUpTo(
const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
DFAPacketizer *
CreateTargetScheduleState(const TargetSubtargetInfo &) const override;
- bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+ bool ReverseBranchCondition(
+ SmallVectorImpl<MachineOperand> &Cond) const override;
- bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const override;
+ bool AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const override;
unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
bool isPredicable(MachineInstr &MI) const override;
- bool
- isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
- BranchProbability Probability) const override;
+ bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
+ BranchProbability Probability) const override;
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
unsigned ExtraPredCycles,
BranchProbability Probability) const override ;
- bool
- isProfitableToIfCvt(MachineBasicBlock &TMBB,
- unsigned NumTCycles, unsigned ExtraTCycles,
- MachineBasicBlock &FMBB,
- unsigned NumFCycles, unsigned ExtraFCycles,
- BranchProbability Probability) const override;
+ bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumTCycles, unsigned ExtraTCycles,
+ MachineBasicBlock &FMBB,
+ unsigned NumFCycles, unsigned ExtraFCycles,
+ BranchProbability Probability) const override;
bool DefinesPredicate(MachineInstr &MI,
std::vector<MachineOperand> &Pred) const override;
ArrayRef<MachineOperand> Pred2) const override;
bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
- MachineBasicBlock &FMBB) const override;
+ MachineBasicBlock &FMBB) const override;
bool PredicateInstruction(MachineInstr &MI,
ArrayRef<MachineOperand> Pred) const override;
unsigned getMaxAlusPerClause() const;
- ///buildDefaultInstruction - This function returns a MachineInstr with
- /// all the instruction modifiers initialized to their default values.
- /// You can use this function to avoid manually specifying each instruction
- /// modifier operand when building a new instruction.
+ /// buildDefaultInstruction - This function returns a MachineInstr with all
+ /// the instruction modifiers initialized to their default values. You can
+ /// use this function to avoid manually specifying each instruction modifier
+ /// operand when building a new instruction.
///
/// \returns a MachineInstr with all the instruction modifiers initialized
/// to their default values.
unsigned DstReg) const;
MachineInstr *buildMovImm(MachineBasicBlock &BB,
- MachineBasicBlock::iterator I,
- unsigned DstReg,
- uint64_t Imm) const;
+ MachineBasicBlock::iterator I,
+ unsigned DstReg,
+ uint64_t Imm) const;
MachineInstr *buildMovInstr(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
// Helper functions that check the opcode for status information
bool isRegisterStore(const MachineInstr &MI) const;
bool isRegisterLoad(const MachineInstr &MI) const;
-
};
namespace AMDGPU {
def load_param_exti8 : LoadParamFrag<az_extloadi8>;
def load_param_exti16 : LoadParamFrag<az_extloadi16>;
-def isR600 : Predicate<"Subtarget->getGeneration() <= AMDGPUSubtarget::R700">;
+def isR600 : Predicate<"Subtarget->getGeneration() <= R600Subtarget::R700">;
def isR600toCayman
: Predicate<
- "Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS">;
+ "Subtarget->getGeneration() <= R600Subtarget::NORTHERN_ISLANDS">;
//===----------------------------------------------------------------------===//
// R600 SDNodes
//===----------------------------------------------------------------------===//
#include "R600MachineScheduler.h"
+#include "R600InstrInfo.h"
#include "AMDGPUSubtarget.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Pass.h"
void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
assert(dag->hasVRegLiveness() && "R600SchedStrategy needs vreg liveness");
DAG = static_cast<ScheduleDAGMILive*>(dag);
- const AMDGPUSubtarget &ST = DAG->MF.getSubtarget<AMDGPUSubtarget>();
+ const R600Subtarget &ST = DAG->MF.getSubtarget<R600Subtarget>();
TII = static_cast<const R600InstrInfo*>(DAG->TII);
TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
VLIW5 = !ST.hasCaymanISA();
QSrc.clear();
}
-static
-unsigned getWFCountLimitedByGPR(unsigned GPRCount) {
+static unsigned getWFCountLimitedByGPR(unsigned GPRCount) {
assert (GPRCount && "GPRCount cannot be 0");
return 248 / GPRCount;
}
DEBUG(dbgs() << "New Slot\n");
assert (OccupedSlotsMask && "Slot wasn't filled");
OccupedSlotsMask = 0;
-// if (HwGen == AMDGPUSubtarget::NORTHERN_ISLANDS)
+// if (HwGen == R600Subtarget::NORTHERN_ISLANDS)
// OccupedSlotsMask |= 16;
InstructionsGroupCandidate.clear();
LoadAlu();
#ifndef LLVM_LIB_TARGET_AMDGPU_R600MACHINESCHEDULER_H
#define LLVM_LIB_TARGET_AMDGPU_R600MACHINESCHEDULER_H
-#include "R600InstrInfo.h"
#include "llvm/CodeGen/MachineScheduler.h"
using namespace llvm;
namespace llvm {
-class R600SchedStrategy final : public MachineSchedStrategy {
+class R600InstrInfo;
+struct R600RegisterInfo;
+class R600SchedStrategy final : public MachineSchedStrategy {
const ScheduleDAGMILive *DAG;
const R600InstrInfo *TII;
const R600RegisterInfo *TRI;
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
+#include "R600Defines.h"
#include "R600InstrInfo.h"
#include "llvm/CodeGen/DFAPacketizer.h"
#include "llvm/CodeGen/MachineDominators.h"
if (skipFunction(*Fn.getFunction()))
return false;
- TII = static_cast<const R600InstrInfo *>(Fn.getSubtarget().getInstrInfo());
- MRI = &(Fn.getRegInfo());
+ const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>();
+ TII = ST.getInstrInfo();
+ MRI = &Fn.getRegInfo();
+
for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
MBB != MBBe; ++MBB) {
MachineBasicBlock *MB = &*MBB;
char R600Packetizer::ID = 0;
class R600PacketizerList : public VLIWPacketizerList {
-
private:
const R600InstrInfo *TII;
const R600RegisterInfo &TRI;
}
public:
// Ctor.
- R600PacketizerList(MachineFunction &MF, MachineLoopInfo &MLI)
+ R600PacketizerList(MachineFunction &MF, const R600Subtarget &ST,
+ MachineLoopInfo &MLI)
: VLIWPacketizerList(MF, MLI, nullptr),
- TII(static_cast<const R600InstrInfo *>(
- MF.getSubtarget().getInstrInfo())),
+ TII(ST.getInstrInfo()),
TRI(TII->getRegisterInfo()) {
- VLIW5 = !MF.getSubtarget<AMDGPUSubtarget>().hasCaymanISA();
+ VLIW5 = !ST.hasCaymanISA();
}
// initPacketizerState - initialize some internal flags.
};
bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
- const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo();
+ const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>();
+ const R600InstrInfo *TII = ST.getInstrInfo();
+
MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
// Instantiate the packetizer.
- R600PacketizerList Packetizer(Fn, MLI);
+ R600PacketizerList Packetizer(Fn, ST, MLI);
// DFA state table should not be empty.
assert(Packetizer.getResourceTracker() && "Empty DFA table!");
BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
- const R600InstrInfo *TII =
- static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
+ const R600InstrInfo *TII = ST.getInstrInfo();
Reserved.set(AMDGPU::ZERO);
Reserved.set(AMDGPU::HALF);
/// \brief get the HW encoding for a register's channel.
unsigned getHWRegChan(unsigned reg) const;
- unsigned getHWRegIndex(unsigned Reg) const override;
+ unsigned getHWRegIndex(unsigned Reg) const;
/// \brief get the register class of the specified type to use in the
/// CFGStructurizer
//===----------------------------------------------------------------------===//
#include "SIInstrInfo.h"
+#include "AMDGPUSubtarget.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
bool SIDebuggerInsertNops::runOnMachineFunction(MachineFunction &MF) {
// Skip this pass if "amdgpu-debugger-insert-nops" attribute was not
// specified.
- const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
if (!ST.debuggerInsertNops())
return false;
return false;
// Target instruction info.
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo*>(MF.getSubtarget().getInstrInfo());
+ const SIInstrInfo *TII = ST.getInstrInfo();
// Set containing line numbers that have nop inserted.
DenseSet<unsigned> NopInserted;
namespace AMDGPU {
enum OperandType {
/// Operand with register or 32-bit immediate
- OPERAND_REG_IMM32 = llvm::MCOI::OPERAND_FIRST_TARGET,
+ OPERAND_REG_IMM32 = MCOI::OPERAND_FIRST_TARGET,
/// Operand with register or inline constant
OPERAND_REG_INLINE_C
};
}
bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
MachineRegisterInfo &MRI = MF.getRegInfo();
- const SIRegisterInfo *TRI =
- static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ const SIInstrInfo *TII = ST.getInstrInfo();
SmallVector<MachineInstr *, 16> Worklist;
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
if (skipFunction(*MF.getFunction()))
return false;
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+
MachineRegisterInfo &MRI = MF.getRegInfo();
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo &TRI = TII->getRegisterInfo();
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
+#include "AMDGPUSubtarget.h"
+
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
if (hasOnlySGPRSpills(MFI, MF.getFrameInfo()))
return;
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo *TRI = &TII->getRegisterInfo();
- const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
MachineRegisterInfo &MRI = MF.getRegInfo();
MachineBasicBlock::iterator I = MBB.begin();
}
}
+void SIFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+
+}
+
void SIFrameLowering::processFunctionBeforeFrameFinalized(
MachineFunction &MF,
RegScavenger *RS) const {
void emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const override;
+ void emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const override;
void processFunctionBeforeFrameFinalized(
MachineFunction &MF,
llvm_unreachable("Cannot allocate sgpr");
}
-SITargetLowering::SITargetLowering(TargetMachine &TM,
- const AMDGPUSubtarget &STI)
+SITargetLowering::SITargetLowering(const TargetMachine &TM,
+ const SISubtarget &STI)
: AMDGPUTargetLowering(TM, STI) {
addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass);
addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Expand);
- if (Subtarget->hasFlatAddressSpace()) {
+ if (getSubtarget()->hasFlatAddressSpace()) {
setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
}
setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
- if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
+ if (Subtarget->getGeneration() >= SISubtarget::SEA_ISLANDS) {
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
setOperationAction(ISD::FRINT, MVT::f64, Legal);
setSchedulingPreference(Sched::RegPressure);
}
+const SISubtarget *SITargetLowering::getSubtarget() const {
+ return static_cast<const SISubtarget *>(Subtarget);
+}
+
//===----------------------------------------------------------------------===//
// TargetLowering queries
//===----------------------------------------------------------------------===//
switch (AS) {
case AMDGPUAS::GLOBAL_ADDRESS: {
- if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
// Assume the we will use FLAT for all global memory accesses
// on VI.
// FIXME: This assumption is currently wrong. On VI we still use
if (DL.getTypeStoreSize(Ty) < 4)
return isLegalMUBUFAddressingMode(AM);
- if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+ if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS) {
// SMRD instructions have an 8-bit, dword offset on SI.
if (!isUInt<8>(AM.BaseOffs / 4))
return false;
- } else if (Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS) {
+ } else if (Subtarget->getGeneration() == SISubtarget::SEA_ISLANDS) {
// On CI+, this can also be a 32-bit literal constant offset. If it fits
// in 8-bits, it can use a smaller encoding.
if (!isUInt<32>(AM.BaseOffs / 4))
return false;
- } else if (Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ } else if (Subtarget->getGeneration() == SISubtarget::VOLCANIC_ISLANDS) {
// On VI, these use the SMEM format and the offset is 20-bit in bytes.
if (!isUInt<20>(AM.BaseOffs))
return false;
bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const {
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
+ const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
return TII->isInlineConstant(Imm);
}
unsigned Offset) const {
const DataLayout &DL = DAG.getDataLayout();
MachineFunction &MF = DAG.getMachineFunction();
- const SIRegisterInfo *TRI =
- static_cast<const SIRegisterInfo*>(Subtarget->getRegisterInfo());
+ const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
unsigned InputPtrReg = TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
- const SIRegisterInfo *TRI =
- static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
+ const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
MachineFunction &MF = DAG.getMachineFunction();
FunctionType *FType = MF.getFunction()->getFunctionType();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
if (Subtarget->isAmdHsaOS() && AMDGPU::isShader(CallConv)) {
const Function *Fn = MF.getFunction();
auto *ParamTy =
dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex()));
- if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
+ if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS &&
ParamTy && ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
// On SI local pointers are just offsets into LDS, so they are always
// less than 16-bits. On CI and newer they could potentially be
}
- if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
+ if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS &&
Subtarget->getRegisterInfo()->regsOverlap(Reg, AMDGPU::FLAT_SCR)) {
report_fatal_error(Twine("invalid register \""
+ StringRef(RegName) + "\" for subtarget."));
MachineInstr *MI, MachineBasicBlock *BB) const {
switch (MI->getOpcode()) {
case AMDGPU::SI_INIT_M0: {
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
+ const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
BuildMI(*BB, MI->getIterator(), MI->getDebugLoc(),
TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
.addOperand(MI->getOperand(0));
case AMDGPU::BRANCH:
return BB;
case AMDGPU::GET_GROUPSTATICSIZE: {
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
+ const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
+
MachineFunction *MF = BB->getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
DebugLoc DL = MI->getDebugLoc();
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
auto MFI = MF.getInfo<SIMachineFunctionInfo>();
- const SIRegisterInfo *TRI =
- static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
+ const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
EVT VT = Op.getValueType();
SDLoc DL(Op);
case AMDGPUIntrinsic::AMDGPU_rsq: // Legacy name
return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
case Intrinsic::amdgcn_rsq_legacy: {
- if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)
return emitRemovedIntrinsicError(DAG, DL, VT);
return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
}
case Intrinsic::amdgcn_rsq_clamp:
case AMDGPUIntrinsic::AMDGPU_rsq_clamped: { // Legacy name
- if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ if (Subtarget->getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
Type *Type = VT.getTypeForEVT(*DAG.getContext());
return DAG.getNode(AMDGPUISD::COS_HW, DL, VT, Op.getOperand(1));
case Intrinsic::amdgcn_log_clamp: {
- if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ if (Subtarget->getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
return SDValue();
DiagnosticInfoUnsupported BadIntrin(
SDValue Scale;
- if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+ if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS) {
// Workaround a hardware bug on SI where the condition output from div_scale
// is not usable.
/// \brief Return true if the given offset Size in bytes can be folded into
/// the immediate offsets of a memory instruction for the given address space.
static bool canFoldOffset(unsigned OffsetSize, unsigned AS,
- const AMDGPUSubtarget &STI) {
+ const SISubtarget &STI) {
switch (AS) {
case AMDGPUAS::GLOBAL_ADDRESS: {
// MUBUF instructions a 12-bit offset in bytes.
case AMDGPUAS::CONSTANT_ADDRESS: {
// SMRD instructions have an 8-bit offset in dwords on SI and
// a 20-bit offset in bytes on VI.
- if (STI.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ if (STI.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)
return isUInt<20>(OffsetSize);
else
return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4);
// If the resulting offset is too large, we can't fold it into the addressing
// mode offset.
APInt Offset = CAdd->getAPIntValue() << CN1->getAPIntValue();
- if (!canFoldOffset(Offset.getZExtValue(), AddrSpace, *Subtarget))
+ if (!canFoldOffset(Offset.getZExtValue(), AddrSpace, *getSubtarget()))
return SDValue();
SelectionDAG &DAG = DCI.DAG;
/// Returns -1 if it isn't an immediate, 0 if it's and inline immediate
/// and the immediate value if it's a literal immediate
int32_t SITargetLowering::analyzeImmediate(const SDNode *N) const {
-
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
+ const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
if (const ConstantSDNode *Node = dyn_cast<ConstantSDNode>(N)) {
if (TII->isInlineConstant(Node->getAPIntValue()))
/// \brief Fold the instructions after selecting them.
SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
SelectionDAG &DAG) const {
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
+ const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
unsigned Opcode = Node->getMachineOpcode();
if (TII->isMIMG(Opcode) && !TII->get(Opcode).mayStore())
/// bits set in the writemask
void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
SDNode *Node) const {
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
+ const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG,
const SDLoc &DL,
SDValue Ptr) const {
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
+ const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
// Build the half of the subregister with the constants before building the
// full 128-bit register. If we are building multiple resource descriptors,
bool isCFIntrinsic(const SDNode *Intr) const;
public:
- SITargetLowering(TargetMachine &tm, const AMDGPUSubtarget &STI);
+ SITargetLowering(const TargetMachine &tm, const SISubtarget &STI);
+
+ const SISubtarget *getSubtarget() const;
bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &,
unsigned IntrinsicID) const override;
class SIInsertWaits : public MachineFunctionPass {
private:
+ const SISubtarget *ST;
const SIInstrInfo *TII;
const SIRegisterInfo *TRI;
const MachineRegisterInfo *MRI;
SIInsertWaits() :
MachineFunctionPass(ID),
+ ST(nullptr),
TII(nullptr),
TRI(nullptr),
ExpInstrTypesSeen(0),
return;
}
- if (MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() >=
- AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ if (ST->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
// Any occurrence of consecutive VMEM or SMEM instructions forms a VMEM
// or SMEM clause, respectively.
//
void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) {
- if (MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() <
- AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ if (ST->getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
return;
// There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG.
bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
bool Changes = false;
- TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
- TRI =
- static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
-
- const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+ ST = &MF.getSubtarget<SISubtarget>();
+ TII = ST->getInstrInfo();
+ TRI = &TII->getRegisterInfo();
MRI = &MF.getRegInfo();
WaitedOn = ZeroCounts;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
- if (ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) {
+ if (ST->getGeneration() <= SISubtarget::SEA_ISLANDS) {
// There is a hardware bug on CI/SI where SMRD instruction may corrupt
// vccz bit, so when we detect that an instruction may read from a
// corrupt vccz bit, we need to:
using namespace llvm;
-SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st)
- : AMDGPUInstrInfo(st), RI() {}
+SIInstrInfo::SIInstrInfo(const SISubtarget &ST)
+ : AMDGPUInstrInfo(ST), RI(), ST(ST) {}
//===----------------------------------------------------------------------===//
// TargetInstrInfo callbacks
unsigned Size) const {
MachineFunction *MF = MBB.getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
- const AMDGPUSubtarget &ST = MF->getSubtarget<AMDGPUSubtarget>();
- const SIRegisterInfo *TRI =
- static_cast<const SIRegisterInfo*>(ST.getRegisterInfo());
+ const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
DebugLoc DL = MBB.findDebugLoc(MI);
unsigned WorkGroupSize = MFI->getMaximumWorkGroupSize(*MF);
unsigned WavefrontSize = ST.getWavefrontSize();
}
case AMDGPU::SI_PC_ADD_REL_OFFSET: {
- const SIRegisterInfo *TRI =
- static_cast<const SIRegisterInfo *>(ST.getRegisterInfo());
+ const SIRegisterInfo *TRI
+ = static_cast<const SIRegisterInfo *>(ST.getRegisterInfo());
MachineFunction &MF = *MBB.getParent();
unsigned Reg = MI->getOperand(0).getReg();
unsigned RegLo = TRI->getSubReg(Reg, AMDGPU::sub0);
// Target-independent instructions do not have an implicit-use of EXEC, even
// when they operate on VGPRs. Treating EXEC modifications as scheduling
// boundaries prevents incorrect movements of such instructions.
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ const SIRegisterInfo *TRI = MF.getSubtarget<SISubtarget>().getRegisterInfo();
if (MI->modifiesRegister(AMDGPU::EXEC, TRI))
return true;
} else {
// This instructions is the _OFFSET variant, so we need to convert it to
// ADDR64.
- assert(MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration()
- < AMDGPUSubtarget::VOLCANIC_ISLANDS &&
+ assert(MBB.getParent()->getSubtarget<SISubtarget>().getGeneration()
+ < SISubtarget::VOLCANIC_ISLANDS &&
"FIXME: Need to emit flat atomics here");
MachineOperand *VData = getNamedOperand(*MI, AMDGPU::OpName::vdata);
}
case AMDGPU::S_LSHL_B32:
- if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
swapOperands(Inst);
}
break;
case AMDGPU::S_ASHR_I32:
- if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
swapOperands(Inst);
}
break;
case AMDGPU::S_LSHR_B32:
- if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
swapOperands(Inst);
}
break;
case AMDGPU::S_LSHL_B64:
- if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
NewOpcode = AMDGPU::V_LSHLREV_B64;
swapOperands(Inst);
}
break;
case AMDGPU::S_ASHR_I64:
- if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
NewOpcode = AMDGPU::V_ASHRREV_I64;
swapOperands(Inst);
}
break;
case AMDGPU::S_LSHR_B64:
- if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
NewOpcode = AMDGPU::V_LSHRREV_B64;
swapOperands(Inst);
}
if (ST.isAmdHsaOS()) {
RsrcDataFormat |= (1ULL << 56);
- if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)
// Set MTYPE = 2
RsrcDataFormat |= (2ULL << 59);
}
// If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
// Clear them unless we want a huge stride.
- if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)
Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
return Rsrc23;
class SIInstrInfo final : public AMDGPUInstrInfo {
private:
const SIRegisterInfo RI;
+ const SISubtarget &ST;
// The the inverse predicate should have the negative value.
enum BranchPredicate {
unsigned OpIdx1) const override;
public:
- explicit SIInstrInfo(const AMDGPUSubtarget &st);
+ explicit SIInstrInfo(const SISubtarget &);
- const SIRegisterInfo &getRegisterInfo() const override {
+ const SIRegisterInfo &getRegisterInfo() const {
return RI;
}
//
//===----------------------------------------------------------------------===//
def isCI : Predicate<"Subtarget->getGeneration() "
- ">= AMDGPUSubtarget::SEA_ISLANDS">;
+ ">= SISubtarget::SEA_ISLANDS">;
def isCIOnly : Predicate<"Subtarget->getGeneration() =="
- "AMDGPUSubtarget::SEA_ISLANDS">,
+ "SISubtarget::SEA_ISLANDS">,
AssemblerPredicate <"FeatureSeaIslands">;
def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">;
field bits<8> VI = vi;
}
-// Execpt for the NONE field, this must be kept in sync with the SISubtarget enum
-// in AMDGPUInstrInfo.cpp
-def SISubtarget {
+// Execpt for the NONE field, this must be kept in sync with the
+// SIEncodingFamily enum in AMDGPUInstrInfo.cpp
+def SIEncodingFamily {
int NONE = -1;
int SI = 0;
int VI = 1;
}]>;
class SGPRImm <dag frag> : PatLeaf<frag, [{
- if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+ if (Subtarget->getGeneration() < SISubtarget::SOUTHERN_ISLANDS) {
return false;
}
const SIRegisterInfo *SIRI =
multiclass EXP_m {
let isPseudo = 1, isCodeGenOnly = 1 in {
- def "" : EXPCommon, SIMCInstr <"exp", SISubtarget.NONE> ;
+ def "" : EXPCommon, SIMCInstr <"exp", SIEncodingFamily.NONE> ;
}
- def _si : EXPCommon, SIMCInstr <"exp", SISubtarget.SI>, EXPe {
+ def _si : EXPCommon, SIMCInstr <"exp", SIEncodingFamily.SI>, EXPe {
let DecoderNamespace="SICI";
let DisableDecoder = DisableSIDecoder;
}
- def _vi : EXPCommon, SIMCInstr <"exp", SISubtarget.VI>, EXPe_vi {
+ def _vi : EXPCommon, SIMCInstr <"exp", SIEncodingFamily.VI>, EXPe_vi {
let DecoderNamespace="VI";
let DisableDecoder = DisableVIDecoder;
}
class SOP1_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
SOP1 <outs, ins, "", pattern>,
- SIMCInstr<opName, SISubtarget.NONE> {
+ SIMCInstr<opName, SIEncodingFamily.NONE> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
class SOP1_Real_si <sop1 op, string opName, dag outs, dag ins, string asm> :
SOP1 <outs, ins, asm, []>,
SOP1e <op.SI>,
- SIMCInstr<opName, SISubtarget.SI> {
+ SIMCInstr<opName, SIEncodingFamily.SI> {
let isCodeGenOnly = 0;
let AssemblerPredicates = [isSICI];
let DecoderNamespace = "SICI";
class SOP1_Real_vi <sop1 op, string opName, dag outs, dag ins, string asm> :
SOP1 <outs, ins, asm, []>,
SOP1e <op.VI>,
- SIMCInstr<opName, SISubtarget.VI> {
+ SIMCInstr<opName, SIEncodingFamily.VI> {
let isCodeGenOnly = 0;
let AssemblerPredicates = [isVI];
let DecoderNamespace = "VI";
class SOP2_Pseudo<string opName, dag outs, dag ins, list<dag> pattern> :
SOP2<outs, ins, "", pattern>,
- SIMCInstr<opName, SISubtarget.NONE> {
+ SIMCInstr<opName, SIEncodingFamily.NONE> {
let isPseudo = 1;
let isCodeGenOnly = 1;
let Size = 4;
class SOP2_Real_si<sop2 op, string opName, dag outs, dag ins, string asm> :
SOP2<outs, ins, asm, []>,
SOP2e<op.SI>,
- SIMCInstr<opName, SISubtarget.SI> {
+ SIMCInstr<opName, SIEncodingFamily.SI> {
let AssemblerPredicates = [isSICI];
let DecoderNamespace = "SICI";
let DisableDecoder = DisableSIDecoder;
class SOP2_Real_vi<sop2 op, string opName, dag outs, dag ins, string asm> :
SOP2<outs, ins, asm, []>,
SOP2e<op.VI>,
- SIMCInstr<opName, SISubtarget.VI> {
+ SIMCInstr<opName, SIEncodingFamily.VI> {
let AssemblerPredicates = [isVI];
let DecoderNamespace = "VI";
let DisableDecoder = DisableVIDecoder;
class SOPK_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
SOPK <outs, ins, "", pattern>,
- SIMCInstr<opName, SISubtarget.NONE> {
+ SIMCInstr<opName, SIEncodingFamily.NONE> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
class SOPK_Real_si <sopk op, string opName, dag outs, dag ins, string asm> :
SOPK <outs, ins, asm, []>,
SOPKe <op.SI>,
- SIMCInstr<opName, SISubtarget.SI> {
+ SIMCInstr<opName, SIEncodingFamily.SI> {
let AssemblerPredicates = [isSICI];
let DecoderNamespace = "SICI";
let DisableDecoder = DisableSIDecoder;
class SOPK_Real_vi <sopk op, string opName, dag outs, dag ins, string asm> :
SOPK <outs, ins, asm, []>,
SOPKe <op.VI>,
- SIMCInstr<opName, SISubtarget.VI> {
+ SIMCInstr<opName, SIEncodingFamily.VI> {
let AssemblerPredicates = [isVI];
let DecoderNamespace = "VI";
let DisableDecoder = DisableVIDecoder;
def _si : SOPK <outs, ins, asm, []>,
SOPK64e <op.SI>,
- SIMCInstr<opName, SISubtarget.SI> {
+ SIMCInstr<opName, SIEncodingFamily.SI> {
let AssemblerPredicates = [isSICI];
let DecoderNamespace = "SICI";
let DisableDecoder = DisableSIDecoder;
def _vi : SOPK <outs, ins, asm, []>,
SOPK64e <op.VI>,
- SIMCInstr<opName, SISubtarget.VI> {
+ SIMCInstr<opName, SIEncodingFamily.VI> {
let AssemblerPredicates = [isVI];
let DecoderNamespace = "VI";
let DisableDecoder = DisableVIDecoder;
class SMRD_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
SMRD <outs, ins, "", pattern>,
- SIMCInstr<opName, SISubtarget.NONE> {
+ SIMCInstr<opName, SIEncodingFamily.NONE> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
string asm> :
SMRD <outs, ins, asm, []>,
SMRD_IMMe <op>,
- SIMCInstr<opName, SISubtarget.SI> {
+ SIMCInstr<opName, SIEncodingFamily.SI> {
let AssemblerPredicates = [isSICI];
let DecoderNamespace = "SICI";
let DisableDecoder = DisableSIDecoder;
string asm> :
SMRD <outs, ins, asm, []>,
SMRD_SOFFe <op>,
- SIMCInstr<opName, SISubtarget.SI> {
+ SIMCInstr<opName, SIEncodingFamily.SI> {
let AssemblerPredicates = [isSICI];
let DecoderNamespace = "SICI";
let DisableDecoder = DisableSIDecoder;
string asm, list<dag> pattern = []> :
SMRD <outs, ins, asm, pattern>,
SMEM_IMMe_vi <op>,
- SIMCInstr<opName, SISubtarget.VI> {
+ SIMCInstr<opName, SIEncodingFamily.VI> {
let AssemblerPredicates = [isVI];
let DecoderNamespace = "VI";
let DisableDecoder = DisableVIDecoder;
string asm, list<dag> pattern = []> :
SMRD <outs, ins, asm, pattern>,
SMEM_SOFFe_vi <op>,
- SIMCInstr<opName, SISubtarget.VI> {
+ SIMCInstr<opName, SIEncodingFamily.VI> {
let AssemblerPredicates = [isVI];
let DecoderNamespace = "VI";
let DisableDecoder = DisableVIDecoder;
class getAsmSDWA <bit HasDst, int NumSrcArgs, bit HasFloatModifiers,
ValueType DstVT = i32> {
- string dst = !if(HasDst,
+ string dst = !if(HasDst,
!if(!eq(DstVT.Size, 1),
"$sdst", // use $sdst for VOPC
"$vdst"),
string src0 = !if(HasFloatModifiers, "$src0_fmodifiers", "$src0_imodifiers");
string src1 = !if(HasFloatModifiers, "$src1_fmodifiers", "$src1_imodifiers");
string args = !if(!eq(NumSrcArgs, 0),
- "",
- !if(!eq(NumSrcArgs, 1),
+ "",
+ !if(!eq(NumSrcArgs, 1),
", "#src0#"$clamp",
", "#src0#", "#src1#"$clamp"
)
class VOP1_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
VOP1Common <outs, ins, "", pattern>,
VOP <opName>,
- SIMCInstr <opName#"_e32", SISubtarget.NONE>,
+ SIMCInstr <opName#"_e32", SIEncodingFamily.NONE>,
MnemonicAlias<opName#"_e32", opName> {
let isPseudo = 1;
let isCodeGenOnly = 1;
class VOP1_Real_si <string opName, vop1 op, dag outs, dag ins, string asm> :
VOP1<op.SI, outs, ins, asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.SI> {
+ SIMCInstr <opName#"_e32", SIEncodingFamily.SI> {
let AssemblerPredicate = SIAssemblerPredicate;
let DecoderNamespace = "SICI";
let DisableDecoder = DisableSIDecoder;
class VOP1_Real_vi <string opName, vop1 op, dag outs, dag ins, string asm> :
VOP1<op.VI, outs, ins, asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.VI> {
+ SIMCInstr <opName#"_e32", SIEncodingFamily.VI> {
let AssemblerPredicates = [isVI];
let DecoderNamespace = "VI";
let DisableDecoder = DisableVIDecoder;
class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
VOP2Common <outs, ins, "", pattern>,
VOP <opName>,
- SIMCInstr<opName#"_e32", SISubtarget.NONE>,
+ SIMCInstr<opName#"_e32", SIEncodingFamily.NONE>,
MnemonicAlias<opName#"_e32", opName> {
let isPseudo = 1;
let isCodeGenOnly = 1;
class VOP2_Real_si <string opName, vop2 op, dag outs, dag ins, string asm> :
VOP2 <op.SI, outs, ins, opName#asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.SI> {
+ SIMCInstr <opName#"_e32", SIEncodingFamily.SI> {
let AssemblerPredicates = [isSICI];
let DecoderNamespace = "SICI";
let DisableDecoder = DisableSIDecoder;
class VOP2_Real_vi <string opName, vop2 op, dag outs, dag ins, string asm> :
VOP2 <op.VI, outs, ins, opName#asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.VI> {
+ SIMCInstr <opName#"_e32", SIEncodingFamily.VI> {
let AssemblerPredicates = [isVI];
let DecoderNamespace = "VI";
let DisableDecoder = DisableVIDecoder;
bit HasMods = 0, bit VOP3Only = 0> :
VOP3Common <outs, ins, "", pattern, HasMods, VOP3Only>,
VOP <opName>,
- SIMCInstr<opName#"_e64", SISubtarget.NONE>,
+ SIMCInstr<opName#"_e64", SIEncodingFamily.NONE>,
MnemonicAlias<opName#"_e64", opName> {
let isPseudo = 1;
let isCodeGenOnly = 1;
bit HasMods = 0, bit VOP3Only = 0> :
VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
VOP3e <op>,
- SIMCInstr<opName#"_e64", SISubtarget.SI> {
+ SIMCInstr<opName#"_e64", SIEncodingFamily.SI> {
let AssemblerPredicates = [isSICI];
let DecoderNamespace = "SICI";
let DisableDecoder = DisableSIDecoder;
bit HasMods = 0, bit VOP3Only = 0> :
VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
VOP3e_vi <op>,
- SIMCInstr <opName#"_e64", SISubtarget.VI> {
+ SIMCInstr <opName#"_e64", SIEncodingFamily.VI> {
let AssemblerPredicates = [isVI];
let DecoderNamespace = "VI";
let DisableDecoder = DisableVIDecoder;
bit HasMods = 0, bit VOP3Only = 0> :
VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
VOP3ce <op>,
- SIMCInstr<opName#"_e64", SISubtarget.SI> {
+ SIMCInstr<opName#"_e64", SIEncodingFamily.SI> {
let AssemblerPredicates = [isSICI];
let DecoderNamespace = "SICI";
let DisableDecoder = DisableSIDecoder;
bit HasMods = 0, bit VOP3Only = 0> :
VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
VOP3ce_vi <op>,
- SIMCInstr <opName#"_e64", SISubtarget.VI> {
+ SIMCInstr <opName#"_e64", SIEncodingFamily.VI> {
let AssemblerPredicates = [isVI];
let DecoderNamespace = "VI";
let DisableDecoder = DisableVIDecoder;
bit HasMods = 0, bit VOP3Only = 0> :
VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
VOP3be <op>,
- SIMCInstr<opName#"_e64", SISubtarget.SI> {
+ SIMCInstr<opName#"_e64", SIEncodingFamily.SI> {
let AssemblerPredicates = [isSICI];
let DecoderNamespace = "SICI";
let DisableDecoder = DisableSIDecoder;
bit HasMods = 0, bit VOP3Only = 0> :
VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
VOP3be_vi <op>,
- SIMCInstr <opName#"_e64", SISubtarget.VI> {
+ SIMCInstr <opName#"_e64", SIEncodingFamily.VI> {
let AssemblerPredicates = [isVI];
let DecoderNamespace = "VI";
let DisableDecoder = DisableVIDecoder;
bit HasMods = 0, bit VOP3Only = 0> :
VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
VOP3e <op>,
- SIMCInstr<opName#"_e64", SISubtarget.SI> {
+ SIMCInstr<opName#"_e64", SIEncodingFamily.SI> {
let AssemblerPredicates = [isSICI];
let DecoderNamespace = "SICI";
let DisableDecoder = DisableSIDecoder;
bit HasMods = 0, bit VOP3Only = 0> :
VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
VOP3e_vi <op>,
- SIMCInstr <opName#"_e64", SISubtarget.VI> {
+ SIMCInstr <opName#"_e64", SIEncodingFamily.VI> {
let AssemblerPredicates = [isVI];
let DecoderNamespace = "VI";
let DisableDecoder = DisableVIDecoder;
string asm, list<dag> pattern = []> {
let isPseudo = 1, isCodeGenOnly = 1 in {
def "" : VOPAnyCommon <outs, ins, "", pattern>,
- SIMCInstr<opName, SISubtarget.NONE>;
+ SIMCInstr<opName, SIEncodingFamily.NONE>;
}
def _si : VOP2 <op.SI3{5-0}, outs, ins, asm, []>,
- SIMCInstr <opName, SISubtarget.SI> {
+ SIMCInstr <opName, SIEncodingFamily.SI> {
let AssemblerPredicates = [isSICI];
let DecoderNamespace = "SICI";
let DisableDecoder = DisableSIDecoder;
def _vi : VOP3Common <outs, ins, asm, []>,
VOP3e_vi <op.VI3>,
VOP3DisableFields <1, 0, 0>,
- SIMCInstr <opName, SISubtarget.VI> {
+ SIMCInstr <opName, SIEncodingFamily.VI> {
let AssemblerPredicates = [isVI];
let DecoderNamespace = "VI";
let DisableDecoder = DisableVIDecoder;
let isCodeGenOnly = 0 in {
def _si : VOP2Common <P.Outs, P.Ins32,
!strconcat(opName, P.Asm32), []>,
- SIMCInstr <opName#"_e32", SISubtarget.SI>,
+ SIMCInstr <opName#"_e32", SIEncodingFamily.SI>,
VOP2_MADKe <op.SI> {
let AssemblerPredicates = [isSICI];
let DecoderNamespace = "SICI";
def _vi : VOP2Common <P.Outs, P.Ins32,
!strconcat(opName, P.Asm32), []>,
- SIMCInstr <opName#"_e32", SISubtarget.VI>,
+ SIMCInstr <opName#"_e32", SIEncodingFamily.VI>,
VOP2_MADKe <op.VI> {
let AssemblerPredicates = [isVI];
let DecoderNamespace = "VI";
class VOPC_Pseudo <dag ins, list<dag> pattern, string opName> :
VOPCCommon <ins, "", pattern>,
VOP <opName>,
- SIMCInstr<opName#"_e32", SISubtarget.NONE> {
+ SIMCInstr<opName#"_e32", SIEncodingFamily.NONE> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
let AssemblerPredicates = [isSICI] in {
def _si : VOPC<op.SI, ins, asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.SI> {
+ SIMCInstr <opName#"_e32", SIEncodingFamily.SI> {
let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
let hasSideEffects = DefExec;
let SchedRW = sched;
let AssemblerPredicates = [isVI] in {
def _vi : VOPC<op.VI, ins, asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.VI> {
+ SIMCInstr <opName#"_e32", SIEncodingFamily.VI> {
let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
let hasSideEffects = DefExec;
let SchedRW = sched;
class VINTRP_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
VINTRPCommon <outs, ins, "", pattern>,
- SIMCInstr<opName, SISubtarget.NONE> {
+ SIMCInstr<opName, SIEncodingFamily.NONE> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
string asm> :
VINTRPCommon <outs, ins, asm, []>,
VINTRPe <op>,
- SIMCInstr<opName, SISubtarget.SI> {
+ SIMCInstr<opName, SIEncodingFamily.SI> {
let AssemblerPredicate = SIAssemblerPredicate;
let DecoderNamespace = "SICI";
let DisableDecoder = DisableSIDecoder;
string asm> :
VINTRPCommon <outs, ins, asm, []>,
VINTRPe_vi <op>,
- SIMCInstr<opName, SISubtarget.VI> {
+ SIMCInstr<opName, SIEncodingFamily.VI> {
let AssemblerPredicate = VIAssemblerPredicate;
let DecoderNamespace = "VI";
let DisableDecoder = DisableVIDecoder;
class DS_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
DS <outs, ins, "", pattern>,
- SIMCInstr <opName, SISubtarget.NONE> {
+ SIMCInstr <opName, SIEncodingFamily.NONE> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
class DS_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> :
DS <outs, ins, asm, []>,
DSe <op>,
- SIMCInstr <opName, SISubtarget.SI> {
+ SIMCInstr <opName, SIEncodingFamily.SI> {
let isCodeGenOnly = 0;
let AssemblerPredicates = [isSICI];
let DecoderNamespace="SICI";
class DS_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
DS <outs, ins, asm, []>,
DSe_vi <op>,
- SIMCInstr <opName, SISubtarget.VI> {
+ SIMCInstr <opName, SIEncodingFamily.VI> {
let isCodeGenOnly = 0;
let AssemblerPredicates = [isVI];
let DecoderNamespace="VI";
class MTBUF_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
MTBUF <outs, ins, "", pattern>,
- SIMCInstr<opName, SISubtarget.NONE> {
+ SIMCInstr<opName, SIEncodingFamily.NONE> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
string asm> :
MTBUF <outs, ins, asm, []>,
MTBUFe <op>,
- SIMCInstr<opName, SISubtarget.SI> {
+ SIMCInstr<opName, SIEncodingFamily.SI> {
let DecoderNamespace="SICI";
let DisableDecoder = DisableSIDecoder;
}
class MTBUF_Real_vi <bits<4> op, string opName, dag outs, dag ins, string asm> :
MTBUF <outs, ins, asm, []>,
MTBUFe_vi <op>,
- SIMCInstr <opName, SISubtarget.VI> {
+ SIMCInstr <opName, SIEncodingFamily.VI> {
let DecoderNamespace="VI";
let DisableDecoder = DisableVIDecoder;
}
class MUBUF_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
MUBUF <outs, ins, "", pattern>,
- SIMCInstr<opName, SISubtarget.NONE> {
+ SIMCInstr<opName, SIEncodingFamily.NONE> {
let isPseudo = 1;
let isCodeGenOnly = 1;
string asm> :
MUBUF <outs, ins, asm, []>,
MUBUFe <op.SI>,
- SIMCInstr<opName, SISubtarget.SI> {
+ SIMCInstr<opName, SIEncodingFamily.SI> {
let lds = 0;
let AssemblerPredicate = SIAssemblerPredicate;
let DecoderNamespace="SICI";
string asm> :
MUBUF <outs, ins, asm, []>,
MUBUFe_vi <op.VI>,
- SIMCInstr<opName, SISubtarget.VI> {
+ SIMCInstr<opName, SIEncodingFamily.VI> {
let lds = 0;
let AssemblerPredicate = VIAssemblerPredicate;
let DecoderNamespace="VI";
class FLAT_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
FLAT <0, outs, ins, "", pattern>,
- SIMCInstr<opName, SISubtarget.NONE> {
+ SIMCInstr<opName, SIEncodingFamily.NONE> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
class FLAT_Real_ci <bits<7> op, string opName, dag outs, dag ins, string asm> :
FLAT <op, outs, ins, asm, []>,
- SIMCInstr<opName, SISubtarget.SI> {
+ SIMCInstr<opName, SIEncodingFamily.SI> {
let AssemblerPredicate = isCIOnly;
let DecoderNamespace="CI";
}
class FLAT_Real_vi <bits<7> op, string opName, dag outs, dag ins, string asm> :
FLAT <op, outs, ins, asm, []>,
- SIMCInstr<opName, SISubtarget.VI> {
+ SIMCInstr<opName, SIEncodingFamily.VI> {
let AssemblerPredicate = VIAssemblerPredicate;
let DecoderNamespace="VI";
let DisableDecoder = DisableVIDecoder;
class MIMG_Atomic_Real_si<mimg op, string name, string asm,
RegisterClass data_rc, RegisterClass addr_rc> :
MIMG_Atomic_Helper<asm, data_rc, addr_rc>,
- SIMCInstr<name, SISubtarget.SI>,
+ SIMCInstr<name, SIEncodingFamily.SI>,
MIMGe<op.SI> {
let isCodeGenOnly = 0;
let AssemblerPredicates = [isSICI];
class MIMG_Atomic_Real_vi<mimg op, string name, string asm,
RegisterClass data_rc, RegisterClass addr_rc> :
MIMG_Atomic_Helper<asm, data_rc, addr_rc>,
- SIMCInstr<name, SISubtarget.VI>,
+ SIMCInstr<name, SIEncodingFamily.VI>,
MIMGe<op.VI> {
let isCodeGenOnly = 0;
let AssemblerPredicates = [isVI];
RegisterClass data_rc, RegisterClass addr_rc> {
let isPseudo = 1, isCodeGenOnly = 1 in {
def "" : MIMG_Atomic_Helper<asm, data_rc, addr_rc>,
- SIMCInstr<name, SISubtarget.NONE>;
+ SIMCInstr<name, SIEncodingFamily.NONE>;
}
let ssamp = 0 in {
let FilterClass = "SIMCInstr";
let RowFields = ["PseudoInstr"];
let ColFields = ["Subtarget"];
- let KeyCol = [!cast<string>(SISubtarget.NONE)];
- let ValueCols = [[!cast<string>(SISubtarget.SI)],[!cast<string>(SISubtarget.VI)]];
+ let KeyCol = [!cast<string>(SIEncodingFamily.NONE)];
+ let ValueCols = [[!cast<string>(SIEncodingFamily.SI)],
+ [!cast<string>(SIEncodingFamily.VI)]];
}
def getAddr64Inst : InstrMapping {
def INTERP : InterpSlots;
def isGCN : Predicate<"Subtarget->getGeneration() "
- ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">,
+ ">= SISubtarget::SOUTHERN_ISLANDS">,
AssemblerPredicate<"FeatureGCN">;
def isSI : Predicate<"Subtarget->getGeneration() "
- "== AMDGPUSubtarget::SOUTHERN_ISLANDS">,
+ "== SISubtarget::SOUTHERN_ISLANDS">,
AssemblerPredicate<"FeatureSouthernIslands">;
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "SIRegisterInfo.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
MachineRegisterInfo *MRI;
LiveIntervals *LIS;
-
static bool offsetsCanBeCombined(unsigned Offset0,
unsigned Offset1,
unsigned EltSize);
if (skipFunction(*MF.getFunction()))
return false;
- const TargetSubtargetInfo &STM = MF.getSubtarget();
- TRI = static_cast<const SIRegisterInfo *>(STM.getRegisterInfo());
- TII = static_cast<const SIInstrInfo *>(STM.getInstrInfo());
+ const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
+ TII = STM.getInstrInfo();
+ TRI = &TII->getRegisterInfo();
+
MRI = &MF.getRegInfo();
LIS = &getAnalysis<LiveIntervals>();
}
bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
- TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
- TRI =
- static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ TII = ST.getInstrInfo();
+ TRI = &TII->getRegisterInfo();
+
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
bool HaveKill = false;
bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
+
std::vector<unsigned> I1Defs;
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
WorkItemIDX(false),
WorkItemIDY(false),
WorkItemIDZ(false) {
- const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
const Function *F = MF.getFunction();
PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
// We don't need to worry about accessing spills with flat instructions.
// TODO: On VI where we must use flat for global, we should be able to omit
// this if it is never used for generic access.
- if (HasStackObjects && ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS &&
+ if (HasStackObjects && ST.getGeneration() >= SISubtarget::SEA_ISLANDS &&
ST.isAmdHsaOS())
FlatScratchInit = true;
return FlatScratchInitUserSGPR;
}
-SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
+SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg (
MachineFunction *MF,
unsigned FrameIndex,
unsigned SubIdx) {
if (!EnableSpillSGPRToVGPR)
return SpilledReg();
+ const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+
MachineFrameInfo *FrameInfo = MF->getFrameInfo();
- const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
- MF->getSubtarget<AMDGPUSubtarget>().getRegisterInfo());
MachineRegisterInfo &MRI = MF->getRegInfo();
int64_t Offset = FrameInfo->getObjectOffset(FrameIndex);
Offset += SubIdx * 4;
//
//===----------------------------------------------------------------------===//
+#include "AMDGPU.h"
#include "SIMachineScheduler.h"
-#include "AMDGPUSubtarget.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "SIRegisterInfo.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
+#include "AMDGPUSubtarget.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/RegisterScavenging.h"
using namespace llvm;
static unsigned getMaxWaveCountPerSIMD(const MachineFunction &MF) {
- const SIMachineFunctionInfo& MFI = *MF.getInfo<SIMachineFunctionInfo>();
- const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+ const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
unsigned SIMDPerCU = 4;
unsigned MaxInvocationsPerWave = SIMDPerCU * ST.getWavefrontSize();
}
static unsigned getMaxWorkGroupSGPRCount(const MachineFunction &MF) {
- const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
unsigned MaxWaveCountPerSIMD = getMaxWaveCountPerSIMD(MF);
unsigned TotalSGPRCountPerSIMD, AddressableSGPRCount, SGPRUsageAlignment;
unsigned ReservedSGPRCount;
- if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
TotalSGPRCountPerSIMD = 800;
AddressableSGPRCount = 102;
SGPRUsageAlignment = 16;
MaxSGPRCount = alignDown(MaxSGPRCount, SGPRUsageAlignment);
if (ST.hasSGPRInitBug())
- MaxSGPRCount = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
+ MaxSGPRCount = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
return std::min(MaxSGPRCount - ReservedSGPRCount, AddressableSGPRCount);
}
// Reserve registers for debugger usage if "amdgpu-debugger-reserve-trap-regs"
// attribute was specified.
- const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
if (ST.debuggerReserveRegs()) {
unsigned ReservedVGPRFirst =
MaxWorkGroupVGPRCount - MFI->getDebuggerReservedVGPRCount();
unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
unsigned Idx) const {
- const AMDGPUSubtarget &STI = MF.getSubtarget<AMDGPUSubtarget>();
+ const SISubtarget &STI = MF.getSubtarget<SISubtarget>();
// FIXME: We should adjust the max number of waves based on LDS size.
- unsigned SGPRLimit = getNumSGPRsAllowed(STI.getGeneration(),
- STI.getMaxWavesPerCU());
+ unsigned SGPRLimit = getNumSGPRsAllowed(STI, STI.getMaxWavesPerCU());
unsigned VGPRLimit = getNumVGPRsAllowed(STI.getMaxWavesPerCU());
unsigned VSLimit = SGPRLimit + VGPRLimit;
DL = Ins->getDebugLoc();
MachineFunction *MF = MBB->getParent();
- const AMDGPUSubtarget &Subtarget = MF->getSubtarget<AMDGPUSubtarget>();
- const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
+ const SIInstrInfo *TII = Subtarget.getInstrInfo();
if (Offset == 0) {
BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg)
MachineBasicBlock *MBB = MI.getParent();
MachineFunction *MF = MBB->getParent();
- const AMDGPUSubtarget &Subtarget = MF->getSubtarget<AMDGPUSubtarget>();
- const SIInstrInfo *TII
- = static_cast<const SIInstrInfo *>(Subtarget.getInstrInfo());
+ const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
+ const SIInstrInfo *TII = Subtarget.getInstrInfo();
#ifndef NDEBUG
// FIXME: Is it possible to be storing a frame index to itself?
bool IsKill = SrcDst->isKill();
MachineBasicBlock *MBB = MI->getParent();
MachineFunction *MF = MI->getParent()->getParent();
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(MF->getSubtarget().getInstrInfo());
+ const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+
DebugLoc DL = MI->getDebugLoc();
- bool IsStore = TII->get(LoadStoreOp).mayStore();
+ bool IsStore = MI->mayStore();
bool RanOutOfSGPRs = false;
bool Scavenged = false;
MachineBasicBlock *MBB = MI->getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
MachineFrameInfo *FrameInfo = MF->getFrameInfo();
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(MF->getSubtarget().getInstrInfo());
+ const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
MachineOperand &FIOp = MI->getOperand(FIOperandNum);
}
}
-unsigned SIRegisterInfo::getHWRegIndex(unsigned Reg) const {
- return getEncodingValue(Reg) & 0xff;
-}
-
// FIXME: This is very slow. It might be worth creating a map from physreg to
// register class.
const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
enum PreloadedValue Value) const {
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
(void)ST;
switch (Value) {
case SIRegisterInfo::WORKGROUP_ID_X:
}
}
-unsigned SIRegisterInfo::getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,
+unsigned SIRegisterInfo::getNumSGPRsAllowed(const SISubtarget &ST,
unsigned WaveCount) const {
- if (gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
switch (WaveCount) {
case 10: return 80;
case 9: return 80;
//
//===----------------------------------------------------------------------===//
-
#ifndef LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H
#define LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H
#include "AMDGPURegisterInfo.h"
-#include "AMDGPUSubtarget.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/Debug.h"
namespace llvm {
+class SISubtarget;
+class MachineRegisterInfo;
+
struct SIRegisterInfo final : public AMDGPURegisterInfo {
private:
unsigned SGPR32SetID;
unsigned FIOperandNum,
RegScavenger *RS) const override;
- unsigned getHWRegIndex(unsigned Reg) const override;
+ unsigned getHWRegIndex(unsigned Reg) const {
+ return getEncodingValue(Reg) & 0xff;
+ }
/// \brief Return the 'base' register class for this register.
/// e.g. SGPR0 => SReg_32, VGPR => VGPR_32 SGPR0_SGPR1 -> SReg_32, etc.
/// \brief Give the maximum number of SGPRs that can be used by \p WaveCount
/// concurrent waves.
- unsigned getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,
- unsigned WaveCount) const;
+ unsigned getNumSGPRsAllowed(const SISubtarget &ST, unsigned WaveCount) const;
unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
const TargetRegisterClass *RC) const;
return false;
MachineRegisterInfo &MRI = MF.getRegInfo();
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo &TRI = TII->getRegisterInfo();
+
std::vector<unsigned> I1Defs;
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
ExecExports.clear();
LiveMaskQueries.clear();
- TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
- TRI = static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+
+ TII = ST.getInstrInfo();
+ TRI = &TII->getRegisterInfo();
MRI = &MF.getRegInfo();
char GlobalFlags = analyzeFunction(MF);