AMDGPU: Cleanup subtarget handling.

author Matt Arsenault <Matthew.Arsenault@amd.com>

Fri, 24 Jun 2016 06:30:11 +0000 (06:30 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Fri, 24 Jun 2016 06:30:11 +0000 (06:30 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Fri, 24 Jun 2016 06:30:11 +0000 (06:30 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Fri, 24 Jun 2016 06:30:11 +0000 (06:30 +0000)
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

index eba5e4a..b18c97a 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -63,7 +63,7 @@ using namespace llvm;
  // instructions to run at the double precision rate for the device so it's
  // probably best to just report no single precision denormals.
  static uint32_t getFPMode(const MachineFunction &F) {
-  const AMDGPUSubtarget& ST = F.getSubtarget<AMDGPUSubtarget>();
+  const SISubtarget& ST = F.getSubtarget<SISubtarget>();
    // TODO: Is there any real use for the flush in only / flush out only modes?
  
    uint32_t FP32Denormals =
@@ -243,9 +243,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
  void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
    unsigned MaxGPR = 0;
    bool killPixel = false;
-  const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
-  const R600RegisterInfo *RI =
-      static_cast<const R600RegisterInfo *>(STM.getRegisterInfo());
+  const R600Subtarget &STM = MF.getSubtarget<R600Subtarget>();
+  const R600RegisterInfo *RI = STM.getRegisterInfo();
    const R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
  
    for (const MachineBasicBlock &MBB : MF) {
@@ -268,7 +267,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
    }
  
    unsigned RsrcReg;
-  if (STM.getGeneration() >= AMDGPUSubtarget::EVERGREEN) {
+  if (STM.getGeneration() >= R600Subtarget::EVERGREEN) {
      // Evergreen / Northern Islands
      switch (MF.getFunction()->getCallingConv()) {
      default: // Fall through
@@ -302,17 +301,15 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
  
  void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
                                          const MachineFunction &MF) const {
-  const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
+  const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
    const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
    uint64_t CodeSize = 0;
    unsigned MaxSGPR = 0;
    unsigned MaxVGPR = 0;
    bool VCCUsed = false;
    bool FlatUsed = false;
-  const SIRegisterInfo *RI =
-      static_cast<const SIRegisterInfo *>(STM.getRegisterInfo());
-  const SIInstrInfo *TII =
-      static_cast<const SIInstrInfo *>(STM.getInstrInfo());
+  const SIRegisterInfo *RI = STM.getRegisterInfo();
+  const SIInstrInfo *TII = STM.getInstrInfo();
  
    for (const MachineBasicBlock &MBB : MF) {
      for (const MachineInstr &MI : MBB) {
@@ -425,7 +422,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
    if (VCCUsed)
      ExtraSGPRs = 2;
  
-  if (STM.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+  if (STM.getGeneration() < SISubtarget::VOLCANIC_ISLANDS) {
      if (FlatUsed)
        ExtraSGPRs = 4;
    } else {
@@ -453,7 +450,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
    ProgInfo.NumSGPR = MaxSGPR + 1;
  
    if (STM.hasSGPRInitBug()) {
-    if (ProgInfo.NumSGPR > AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG) {
+    if (ProgInfo.NumSGPR > SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG) {
        LLVMContext &Ctx = MF.getFunction()->getContext();
        DiagnosticInfoResourceLimit Diag(*MF.getFunction(),
                                         "SGPRs with SGPR init bug",
@@ -461,7 +458,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
        Ctx.diagnose(Diag);
      }
  
-    ProgInfo.NumSGPR = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
+    ProgInfo.NumSGPR = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
    }
  
    if (MFI->NumUserSGPRs > STM.getMaxNumUserSGPRs()) {
@@ -497,7 +494,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
    ProgInfo.CodeLen = CodeSize;
  
    unsigned LDSAlignShift;
-  if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
+  if (STM.getGeneration() < SISubtarget::SEA_ISLANDS) {
      // LDS is allocated in 64 dword blocks.
      LDSAlignShift = 8;
    } else {
@@ -564,7 +561,7 @@ static unsigned getRsrcReg(CallingConv::ID CallConv) {
  
  void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
                                           const SIProgramInfo &KernelInfo) {
-  const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
+  const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
    const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
    unsigned RsrcReg = getRsrcReg(MF.getFunction()->getCallingConv());
  
@@ -618,7 +615,7 @@ static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) {
  void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
                                           const SIProgramInfo &KernelInfo) const {
    const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-  const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
+  const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
    amd_kernel_code_t header;
  
    AMDGPU::initDefaultAMDKernelCodeT(header, STM.getFeatureBits());
diff --git a/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp b/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp

index f7582f4..bbc28b8 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp
@@ -12,7 +12,8 @@
  //===----------------------------------------------------------------------===//
  #include "AMDGPUFrameLowering.h"
  #include "AMDGPURegisterInfo.h"
-#include "R600MachineFunctionInfo.h"
+#include "AMDGPUSubtarget.h"
+
  #include "llvm/CodeGen/MachineFrameInfo.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
  #include "llvm/IR/Instructions.h"
@@ -75,7 +76,8 @@ int AMDGPUFrameLowering::getFrameIndexReference(const MachineFunction &MF,
                                                  int FI,
                                                  unsigned &FrameReg) const {
    const MachineFrameInfo *MFI = MF.getFrameInfo();
-  const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
+  const AMDGPURegisterInfo *RI
+    = MF.getSubtarget<AMDGPUSubtarget>().getRegisterInfo();
  
    // Fill in FrameReg output argument.
    FrameReg = RI->getFrameRegister(MF);
@@ -100,19 +102,3 @@ int AMDGPUFrameLowering::getFrameIndexReference(const MachineFunction &MF,
    return OffsetBytes / (getStackWidth(MF) * 4);
  }
  
-const TargetFrameLowering::SpillSlot *
-AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
-  NumEntries = 0;
-  return nullptr;
-}
-void AMDGPUFrameLowering::emitPrologue(MachineFunction &MF,
-                                       MachineBasicBlock &MBB) const {}
-void
-AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF,
-                                  MachineBasicBlock &MBB) const {
-}
-
-bool
-AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const {
-  return false;
-}
diff --git a/lib/Target/AMDGPU/AMDGPUFrameLowering.h b/lib/Target/AMDGPU/AMDGPUFrameLowering.h

index 257a3da..44196e2 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUFrameLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUFrameLowering.h
@@ -32,13 +32,18 @@ public:
    /// \returns The number of 32-bit sub-registers that are used when storing
    /// values to the stack.
    unsigned getStackWidth(const MachineFunction &MF) const;
+
    int getFrameIndexReference(const MachineFunction &MF, int FI,
                               unsigned &FrameReg) const override;
-  const SpillSlot *
-    getCalleeSavedSpillSlots(unsigned &NumEntries) const override;
-  void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
-  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
-  bool hasFP(const MachineFunction &MF) const override;
+
+  const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const override {
+    NumEntries = 0;
+    return nullptr;
+  }
+
+  bool hasFP(const MachineFunction &MF) const override {
+    return false;
+  }
  };
  } // namespace llvm
  #endif
diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

index 99c2f97..cfecd29 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -177,7 +177,7 @@ AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM)
      : SelectionDAGISel(TM) {}
  
  bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
-  Subtarget = &static_cast<const AMDGPUSubtarget &>(MF.getSubtarget());
+  Subtarget = &MF.getSubtarget<AMDGPUSubtarget>();
    return SelectionDAGISel::runOnMachineFunction(MF);
  }
  
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

index ddc86da..6d162be 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -64,7 +64,7 @@ EVT AMDGPUTargetLowering::getEquivalentLoadRegType(LLVMContext &Ctx, EVT VT) {
    return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32);
  }
  
-AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
+AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
                                             const AMDGPUSubtarget &STI)
      : TargetLowering(TM), Subtarget(&STI) {
    // Lower floating point store/load to integer store/load to reduce the number
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h

index e5acf36..4648438 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -116,7 +116,7 @@ protected:
                       const SmallVectorImpl<ISD::OutputArg> &Outs) const;
  
  public:
-  AMDGPUTargetLowering(TargetMachine &TM, const AMDGPUSubtarget &STI);
+  AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI);
  
    bool isFAbsFree(EVT VT) const override;
    bool isFNegFree(EVT VT) const override;
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp

index 60dba1a..82d4eef 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
@@ -30,12 +30,8 @@ using namespace llvm;
  // Pin the vtable to this file.
  void AMDGPUInstrInfo::anchor() {}
  
-AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &st)
-    : AMDGPUGenInstrInfo(-1, -1), ST(st) {}
-
-const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
-  return RI;
-}
+AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)
+  : AMDGPUGenInstrInfo(-1, -1), ST(ST) {}
  
  bool AMDGPUInstrInfo::enableClusterLoads() const {
    return true;
@@ -111,9 +107,11 @@ int AMDGPUInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
      return -1;
    }
  
+  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+  const AMDGPUFrameLowering *TFL = ST.getFrameLowering();
+
    unsigned IgnoredFrameReg;
-  Offset = MF.getSubtarget().getFrameLowering()->getFrameIndexReference(
-      MF, -1, IgnoredFrameReg);
+  Offset = TFL->getFrameIndexReference(MF, -1, IgnoredFrameReg);
  
    return getIndirectIndexBegin(MF) + Offset;
  }
@@ -127,35 +125,42 @@ int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const {
    }
  }
  
+// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
+enum SIEncodingFamily {
+  SI = 0,
+  VI = 1
+};
+
  // Wrapper for Tablegen'd function.  enum Subtarget is not defined in any
  // header files, so we need to wrap it in a function that takes unsigned
  // instead.
  namespace llvm {
  namespace AMDGPU {
  static int getMCOpcode(uint16_t Opcode, unsigned Gen) {
-  return getMCOpcodeGen(Opcode, (enum Subtarget)Gen);
+  return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
  }
  }
  }
  
-// This must be kept in sync with the SISubtarget class in SIInstrInfo.td
-enum SISubtarget {
-  SI = 0,
-  VI = 1
-};
-
-static enum SISubtarget AMDGPUSubtargetToSISubtarget(unsigned Gen) {
-  switch (Gen) {
-  default:
-    return SI;
+static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
+  switch (ST.getGeneration()) {
+  case AMDGPUSubtarget::SOUTHERN_ISLANDS:
+  case AMDGPUSubtarget::SEA_ISLANDS:
+    return SIEncodingFamily::SI;
    case AMDGPUSubtarget::VOLCANIC_ISLANDS:
-    return VI;
+    return SIEncodingFamily::VI;
+
+  // FIXME: This should never be called for r600 GPUs.
+  case AMDGPUSubtarget::R600:
+  case AMDGPUSubtarget::R700:
+  case AMDGPUSubtarget::EVERGREEN:
+  case AMDGPUSubtarget::NORTHERN_ISLANDS:
+    return SIEncodingFamily::SI;
    }
  }
  
  int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
-  int MCOp = AMDGPU::getMCOpcode(
-      Opcode, AMDGPUSubtargetToSISubtarget(ST.getGeneration()));
+  int MCOp = AMDGPU::getMCOpcode(Opcode, subtargetEncodingFamily(ST));
  
    // -1 means that Opcode is already a native instruction.
    if (MCOp == -1)
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/lib/Target/AMDGPU/AMDGPUInstrInfo.h

index 461dd99..6203e57 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -16,7 +16,6 @@
  #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H
  #define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H
  
-#include "AMDGPURegisterInfo.h"
  #include "llvm/Target/TargetInstrInfo.h"
  
  #define GET_INSTRINFO_HEADER
@@ -38,16 +37,13 @@ class MachineInstrBuilder;
  
  class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
  private:
-  const AMDGPURegisterInfo RI;
-  virtual void anchor();
-protected:
    const AMDGPUSubtarget &ST;
-public:
-  explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st);
  
-  virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0;
+  virtual void anchor();
  
  public:
+  explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st);
+
    /// \returns the smallest register index that will be accessed by an indirect
    /// read or write or -1 if indirect addressing is not used by this program.
    int getIndirectIndexBegin(const MachineFunction &MF) const;
@@ -80,7 +76,6 @@ public:
    /// \brief Given a MIMG \p Opcode that writes all 4 channels, return the
    /// equivalent opcode that writes \p Channels Channels.
    int getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const;
-
  };
  
  namespace AMDGPU {
diff --git a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp

index 154e992..9453fb0 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -15,6 +15,7 @@
  
  #include "AMDGPUMCInstLower.h"
  #include "AMDGPUAsmPrinter.h"
+#include "AMDGPUSubtarget.h"
  #include "AMDGPUTargetMachine.h"
  #include "InstPrinter/AMDGPUInstPrinter.h"
  #include "SIInstrInfo.h"
@@ -36,8 +37,7 @@
  using namespace llvm;
  
  AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &st):
-  Ctx(ctx), ST(st)
-{ }
+  Ctx(ctx), ST(st) { }
  
  void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
  
@@ -140,10 +140,9 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
        raw_string_ostream DisasmStream(DisasmLine);
  
        AMDGPUInstPrinter InstPrinter(*TM.getMCAsmInfo(),
-                                    *MF->getSubtarget().getInstrInfo(),
-                                    *MF->getSubtarget().getRegisterInfo());
-      InstPrinter.printInst(&TmpInst, DisasmStream, StringRef(),
-                            MF->getSubtarget());
+                                    *STI.getInstrInfo(),
+                                    *STI.getRegisterInfo());
+      InstPrinter.printInst(&TmpInst, DisasmStream, StringRef(), STI);
  
        // Disassemble instruction/operands to hex representation.
        SmallVector<MCFixup, 4> Fixups;
diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp

index 3ca0eca..2157c8f 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
@@ -24,10 +24,11 @@ AMDGPURegisterInfo::AMDGPURegisterInfo() : AMDGPUGenRegisterInfo(0) {}
  // they are not supported at this time.
  //===----------------------------------------------------------------------===//
  
-const MCPhysReg AMDGPURegisterInfo::CalleeSavedReg = AMDGPU::NoRegister;
+// Dummy to not crash RegisterClassInfo.
+static const MCPhysReg CalleeSavedReg = AMDGPU::NoRegister;
  
-const MCPhysReg*
-AMDGPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+const MCPhysReg *AMDGPURegisterInfo::getCalleeSavedRegs(
+  const MachineFunction *) const {
    return &CalleeSavedReg;
  }
  
@@ -55,7 +56,6 @@ unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel) const {
  }
  
  unsigned AMDGPURegisterInfo::getIndirectSubReg(unsigned IndirectIndex) const {
-
    return getSubRegFromChannel(IndirectIndex);
  }
  
diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.h b/lib/Target/AMDGPU/AMDGPURegisterInfo.h

index 441d536..e780ca0 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPURegisterInfo.h
+++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.h
@@ -29,18 +29,8 @@ class AMDGPUSubtarget;
  class TargetInstrInfo;
  
  struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
-  static const MCPhysReg CalleeSavedReg;
-
    AMDGPURegisterInfo();
  
-  BitVector getReservedRegs(const MachineFunction &MF) const override {
-    assert(!"Unimplemented");  return BitVector();
-  }
-
-  virtual unsigned getHWRegIndex(unsigned Reg) const {
-    assert(!"Unimplemented"); return 0;
-  }
-
    /// \returns the sub reg enum value for the given \p Channel
    /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0)
    unsigned getSubRegFromChannel(unsigned Channel) const;
@@ -52,7 +42,6 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
    unsigned getFrameRegister(const MachineFunction &MF) const override;
  
    unsigned getIndirectSubReg(unsigned IndirectIndex) const;
-
  };
  
  } // End namespace llvm
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

index 7060337..6b5a88d 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -44,6 +44,8 @@ struct AMDGPUGISelActualAccessor : public GISelAccessor {
  } // End anonymous namespace.
  #endif
  
+AMDGPUSubtarget::~AMDGPUSubtarget() {}
+
  AMDGPUSubtarget &
  AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
                                                   StringRef GPU, StringRef FS) {
@@ -79,82 +81,56 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
  }
  
  AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
-                                 TargetMachine &TM)
-    : AMDGPUGenSubtargetInfo(TT, GPU, FS),
-      DumpCode(false), R600ALUInst(false), HasVertexCache(false),
-      TexVTXClauseSize(0),
-      Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600),
-      FP64(false),
-      FP64Denormals(false), FP32Denormals(false), FPExceptions(false),
-      FastFMAF32(false), HalfRate64Ops(false), CaymanISA(false),
-      FlatAddressSpace(false), FlatForGlobal(false), EnableIRStructurizer(true),
-      EnablePromoteAlloca(false),
-      EnableIfCvt(true), EnableLoadStoreOpt(false),
-      EnableUnsafeDSOffsetFolding(false),
-      EnableXNACK(false),
-      WavefrontSize(64), CFALUBug(false),
-      LocalMemorySize(0), MaxPrivateElementSize(0),
-      EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false),
-      GCN1Encoding(false), GCN3Encoding(false), CIInsts(false),
-      HasSMemRealTime(false), Has16BitInsts(false),
-      LDSBankCount(0),
-      IsaVersion(ISAVersion0_0_0),
-      EnableSIScheduler(false),
-      DebuggerInsertNops(false), DebuggerReserveRegs(false),
-      FrameLowering(nullptr),
-      GISel(),
-      InstrItins(getInstrItineraryForCPU(GPU)), TargetTriple(TT) {
-
+                                 const TargetMachine &TM)
+  : AMDGPUGenSubtargetInfo(TT, GPU, FS),
+    TargetTriple(TT),
+    Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600),
+    IsaVersion(ISAVersion0_0_0),
+    WavefrontSize(64),
+    LocalMemorySize(0),
+    LDSBankCount(0),
+    MaxPrivateElementSize(0),
+
+    FastFMAF32(false),
+    HalfRate64Ops(false),
+
+    FP32Denormals(false),
+    FP64Denormals(false),
+    FPExceptions(false),
+    FlatForGlobal(false),
+    EnableXNACK(false),
+    DebuggerInsertNops(false),
+    DebuggerReserveRegs(false),
+
+    EnableVGPRSpilling(false),
+    EnableIRStructurizer(true),
+    EnablePromoteAlloca(false),
+    EnableIfCvt(true),
+    EnableLoadStoreOpt(false),
+    EnableUnsafeDSOffsetFolding(false),
+    EnableSIScheduler(false),
+    DumpCode(false),
+
+    FP64(false),
+    IsGCN(false),
+    GCN1Encoding(false),
+    GCN3Encoding(false),
+    CIInsts(false),
+    SGPRInitBug(false),
+    HasSMemRealTime(false),
+    Has16BitInsts(false),
+    FlatAddressSpace(false),
+
+    R600ALUInst(false),
+    CaymanISA(false),
+    CFALUBug(false),
+    HasVertexCache(false),
+    TexVTXClauseSize(0),
+
+    FeatureDisable(false),
+
+    InstrItins(getInstrItineraryForCPU(GPU)) {
    initializeSubtargetDependencies(TT, GPU, FS);
-
-  // Scratch is allocated in 256 dword per wave blocks.
-  const unsigned StackAlign = 4 * 256 / getWavefrontSize();
-
-  if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
-    InstrInfo.reset(new R600InstrInfo(*this));
-    TLInfo.reset(new R600TargetLowering(TM, *this));
-
-    // FIXME: Should have R600 specific FrameLowering
-    FrameLowering.reset(new AMDGPUFrameLowering(
-                          TargetFrameLowering::StackGrowsUp,
-                          StackAlign,
-                          0));
-  } else {
-    InstrInfo.reset(new SIInstrInfo(*this));
-    TLInfo.reset(new SITargetLowering(TM, *this));
-    FrameLowering.reset(new SIFrameLowering(
-                          TargetFrameLowering::StackGrowsUp,
-                          StackAlign,
-                          0));
-#ifndef LLVM_BUILD_GLOBAL_ISEL
-    GISelAccessor *GISel = new GISelAccessor();
-#else
-    AMDGPUGISelActualAccessor *GISel =
-        new AMDGPUGISelActualAccessor();
-    GISel->CallLoweringInfo.reset(
-        new AMDGPUCallLowering(*getTargetLowering()));
-#endif
-    setGISelAccessor(*GISel);
-  }
-}
-
-const CallLowering *AMDGPUSubtarget::getCallLowering() const {
-  assert(GISel && "Access to GlobalISel APIs not set");
-  return GISel->getCallLowering();
-}
-
-unsigned AMDGPUSubtarget::getStackEntrySize() const {
-  assert(getGeneration() <= NORTHERN_ISLANDS);
-  switch(getWavefrontSize()) {
-  case 16:
-    return 8;
-  case 32:
-    return hasCaymanISA() ? 4 : 8;
-  case 64:
-    return 4;
-  default:
-    llvm_unreachable("Illegal wavefront size.");
-  }
  }
  
  // FIXME: These limits are for SI. Did they change with the larger maximum LDS
@@ -215,40 +191,75 @@ unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes) const {
    return 1;
  }
  
-unsigned AMDGPUSubtarget::getAmdKernelCodeChipID() const {
-  switch(getGeneration()) {
-  default: llvm_unreachable("ChipID unknown");
-  case SEA_ISLANDS: return 12;
-  }
-}
-
-AMDGPU::IsaVersion AMDGPUSubtarget::getIsaVersion() const {
-  return AMDGPU::getIsaVersion(getFeatureBits());
+R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
+                             const TargetMachine &TM) :
+  AMDGPUSubtarget(TT, GPU, FS, TM),
+  InstrInfo(*this),
+  FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
+  TLInfo(TM, *this) {}
+
+SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS,
+                         const TargetMachine &TM) :
+  AMDGPUSubtarget(TT, GPU, FS, TM),
+  InstrInfo(*this),
+  FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
+  TLInfo(TM, *this) {
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+  GISelAccessor *GISel = new GISelAccessor();
+#else
+  AMDGPUGISelActualAccessor *GISel =
+    new AMDGPUGISelActualAccessor();
+  GISel->CallLoweringInfo.reset(
+    new AMDGPUCallLowering(*getTargetLowering()));
+#endif
+  setGISelAccessor(*GISel);
  }
  
-bool AMDGPUSubtarget::isVGPRSpillingEnabled(const Function& F) const {
-  return !AMDGPU::isShader(F.getCallingConv()) || EnableVGPRSpilling;
+unsigned R600Subtarget::getStackEntrySize() const {
+  switch (getWavefrontSize()) {
+  case 16:
+    return 8;
+  case 32:
+    return hasCaymanISA() ? 4 : 8;
+  case 64:
+    return 4;
+  default:
+    llvm_unreachable("Illegal wavefront size.");
+  }
  }
  
-void AMDGPUSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
+void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
                                            MachineInstr *begin,
                                            MachineInstr *end,
                                            unsigned NumRegionInstrs) const {
-  if (getGeneration() >= SOUTHERN_ISLANDS) {
-
-    // Track register pressure so the scheduler can try to decrease
-    // pressure once register usage is above the threshold defined by
-    // SIRegisterInfo::getRegPressureSetLimit()
-    Policy.ShouldTrackPressure = true;
+  // Track register pressure so the scheduler can try to decrease
+  // pressure once register usage is above the threshold defined by
+  // SIRegisterInfo::getRegPressureSetLimit()
+  Policy.ShouldTrackPressure = true;
+
+  // Enabling both top down and bottom up scheduling seems to give us less
+  // register spills than just using one of these approaches on its own.
+  Policy.OnlyTopDown = false;
+  Policy.OnlyBottomUp = false;
+
+  // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler.
+  if (!enableSIScheduler())
+    Policy.ShouldTrackLaneMasks = true;
+}
  
-    // Enabling both top down and bottom up scheduling seems to give us less
-    // register spills than just using one of these approaches on its own.
-    Policy.OnlyTopDown = false;
-    Policy.OnlyBottomUp = false;
+bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const {
+  return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv());
+}
  
-    // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler.
-    if (!enableSIScheduler())
-      Policy.ShouldTrackLaneMasks = true;
+unsigned SISubtarget::getAmdKernelCodeChipID() const {
+  switch (getGeneration()) {
+  case SEA_ISLANDS:
+    return 12;
+  default:
+    llvm_unreachable("ChipID unknown");
    }
  }
  
+AMDGPU::IsaVersion SISubtarget::getIsaVersion() const {
+  return AMDGPU::getIsaVersion(getFeatureBits());
+}
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h

index 96cdc9f..300a92e 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -16,10 +16,12 @@
  #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
  
  #include "AMDGPU.h"
-#include "AMDGPUFrameLowering.h"
-#include "AMDGPUISelLowering.h"
-#include "AMDGPUInstrInfo.h"
-#include "AMDGPUSubtarget.h"
+#include "R600InstrInfo.h"
+#include "R600ISelLowering.h"
+#include "R600FrameLowering.h"
+#include "SIInstrInfo.h"
+#include "SIISelLowering.h"
+#include "SIFrameLowering.h"
  #include "Utils/AMDGPUBaseInfo.h"
  #include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
  #include "llvm/Target/TargetSubtargetInfo.h"
@@ -29,11 +31,10 @@
  
  namespace llvm {
  
-class StringRef;
  class SIMachineFunctionInfo;
+class StringRef;
  
  class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
-
  public:
    enum Generation {
      R600 = 0,
@@ -46,10 +47,6 @@ public:
    };
  
    enum {
-    FIXED_SGPR_COUNT_FOR_INIT_BUG = 80
-  };
-
-  enum {
      ISAVersion0_0_0,
      ISAVersion7_0_0,
      ISAVersion7_0_1,
@@ -58,113 +55,104 @@ public:
      ISAVersion8_0_3
    };
  
-private:
-  bool DumpCode;
-  bool R600ALUInst;
-  bool HasVertexCache;
-  short TexVTXClauseSize;
+protected:
+  // Basic subtarget description.
+  Triple TargetTriple;
    Generation Gen;
-  bool FP64;
-  bool FP64Denormals;
-  bool FP32Denormals;
-  bool FPExceptions;
+  unsigned IsaVersion;
+  unsigned WavefrontSize;
+  int LocalMemorySize;
+  int LDSBankCount;
+  unsigned MaxPrivateElementSize;
+
+  // Possibly statically set by tablegen, but may want to be overridden.
    bool FastFMAF32;
    bool HalfRate64Ops;
-  bool CaymanISA;
-  bool FlatAddressSpace;
+
+  // Dynamially set bits that enable features.
+  bool FP32Denormals;
+  bool FP64Denormals;
+  bool FPExceptions;
    bool FlatForGlobal;
+  bool EnableXNACK;
+  bool DebuggerInsertNops;
+  bool DebuggerReserveRegs;
+
+  // Used as options.
+  bool EnableVGPRSpilling;
    bool EnableIRStructurizer;
    bool EnablePromoteAlloca;
    bool EnableIfCvt;
    bool EnableLoadStoreOpt;
    bool EnableUnsafeDSOffsetFolding;
-  bool EnableXNACK;
-  unsigned WavefrontSize;
-  bool CFALUBug;
-  int LocalMemorySize;
-  unsigned MaxPrivateElementSize;
-  bool EnableVGPRSpilling;
-  bool SGPRInitBug;
+  bool EnableSIScheduler;
+  bool DumpCode;
+
+  // Subtarget statically properties set by tablegen
+  bool FP64;
    bool IsGCN;
    bool GCN1Encoding;
    bool GCN3Encoding;
    bool CIInsts;
+  bool SGPRInitBug;
    bool HasSMemRealTime;
    bool Has16BitInsts;
+  bool FlatAddressSpace;
+  bool R600ALUInst;
+  bool CaymanISA;
+  bool CFALUBug;
+  bool HasVertexCache;
+  short TexVTXClauseSize;
+
+  // Dummy feature to use for assembler in tablegen.
    bool FeatureDisable;
-  int LDSBankCount;
-  unsigned IsaVersion;
-  bool EnableSIScheduler;
-  bool DebuggerInsertNops;
-  bool DebuggerReserveRegs;
  
-  std::unique_ptr<AMDGPUFrameLowering> FrameLowering;
-  std::unique_ptr<AMDGPUTargetLowering> TLInfo;
-  std::unique_ptr<AMDGPUInstrInfo> InstrInfo;
-  std::unique_ptr<GISelAccessor> GISel;
    InstrItineraryData InstrItins;
-  Triple TargetTriple;
  
  public:
-  AMDGPUSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
-                  TargetMachine &TM);
+  AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
+                  const TargetMachine &TM);
+  virtual ~AMDGPUSubtarget();
    AMDGPUSubtarget &initializeSubtargetDependencies(const Triple &TT,
                                                     StringRef GPU, StringRef FS);
  
-  void setGISelAccessor(GISelAccessor &GISel) {
-    this->GISel.reset(&GISel);
-  }
+  const AMDGPUInstrInfo *getInstrInfo() const override;
+  const AMDGPUFrameLowering *getFrameLowering() const override;
+  const AMDGPUTargetLowering *getTargetLowering() const override;
+  const AMDGPURegisterInfo *getRegisterInfo() const override;
  
-  const AMDGPUFrameLowering *getFrameLowering() const override {
-    return FrameLowering.get();
-  }
-  const AMDGPUInstrInfo *getInstrInfo() const override {
-    return InstrInfo.get();
-  }
-  const AMDGPURegisterInfo *getRegisterInfo() const override {
-    return &InstrInfo->getRegisterInfo();
-  }
-  AMDGPUTargetLowering *getTargetLowering() const override {
-    return TLInfo.get();
-  }
    const InstrItineraryData *getInstrItineraryData() const override {
      return &InstrItins;
    }
  
-  const CallLowering *getCallLowering() const override;
-
    void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
  
-  bool hasVertexCache() const {
-    return HasVertexCache;
-  }
-
-  short getTexVTXClauseSize() const {
-    return TexVTXClauseSize;
+  bool isAmdHsaOS() const {
+    return TargetTriple.getOS() == Triple::AMDHSA;
    }
  
    Generation getGeneration() const {
      return Gen;
    }
  
-  bool hasHWFP64() const {
-    return FP64;
+  unsigned getWavefrontSize() const {
+    return WavefrontSize;
    }
  
-  bool hasCaymanISA() const {
-    return CaymanISA;
+  int getLocalMemorySize() const {
+    return LocalMemorySize;
    }
  
-  bool hasFP32Denormals() const {
-    return FP32Denormals;
+  int getLDSBankCount() const {
+    return LDSBankCount;
    }
  
-  bool hasFP64Denormals() const {
-    return FP64Denormals;
+  unsigned getMaxPrivateElementSize() const {
+    return MaxPrivateElementSize;
    }
  
-  bool hasFPExceptions() const {
-    return FPExceptions;
+  bool hasHWFP64() const {
+    return FP64;
    }
  
    bool hasFastFMAF32() const {
@@ -175,22 +163,6 @@ public:
      return HalfRate64Ops;
    }
  
-  bool hasFlatAddressSpace() const {
-    return FlatAddressSpace;
-  }
-
-  bool hasSMemRealTime() const {
-    return HasSMemRealTime;
-  }
-
-  bool has16BitInsts() const {
-    return Has16BitInsts;
-  }
-
-  bool useFlatForGlobal() const {
-    return FlatForGlobal;
-  }
-
    bool hasAddr64() const {
      return (getGeneration() < VOLCANIC_ISLANDS);
    }
@@ -242,6 +214,10 @@ public:
      return (getGeneration() >= EVERGREEN);
    }
  
+  bool hasCaymanISA() const {
+    return CaymanISA;
+  }
+
    bool IsIRStructurizerEnabled() const {
      return EnableIRStructurizer;
    }
@@ -254,23 +230,12 @@ public:
      return EnableIfCvt;
    }
  
-  bool loadStoreOptEnabled() const {
-    return EnableLoadStoreOpt;
-  }
-
    bool unsafeDSOffsetFoldingEnabled() const {
      return EnableUnsafeDSOffsetFolding;
    }
  
-  unsigned getWavefrontSize() const {
-    return WavefrontSize;
-  }
-
-  unsigned getStackEntrySize() const;
-
-  bool hasCFAluBug() const {
-    assert(getGeneration() <= NORTHERN_ISLANDS);
-    return CFALUBug;
+  bool dumpCode() const {
+    return DumpCode;
    }
  
    /// Return the amount of LDS that can be used that will not restrict the
@@ -282,89 +247,212 @@ public:
    unsigned getOccupancyWithLocalMemSize(uint32_t Bytes) const;
  
  
-  int getLocalMemorySize() const {
-    return LocalMemorySize;
+  bool hasFP32Denormals() const {
+    return FP32Denormals;
    }
  
-  unsigned getMaxPrivateElementSize() const {
-    return MaxPrivateElementSize;
+  bool hasFP64Denormals() const {
+    return FP64Denormals;
    }
  
-  bool hasSGPRInitBug() const {
-    return SGPRInitBug;
+  bool hasFPExceptions() const {
+    return FPExceptions;
    }
  
-  int getLDSBankCount() const {
-    return LDSBankCount;
+  bool useFlatForGlobal() const {
+    return FlatForGlobal;
    }
  
-  unsigned getAmdKernelCodeChipID() const;
+  bool isXNACKEnabled() const {
+    return EnableXNACK;
+  }
  
-  AMDGPU::IsaVersion getIsaVersion() const;
+  unsigned getMaxWavesPerCU() const {
+    if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
+      return 10;
+
+    // FIXME: Not sure what this is for other subtagets.
+    return 8;
+  }
+
+  /// \brief Returns the offset in bytes from the start of the input buffer
+  ///        of the first explicit kernel argument.
+  unsigned getExplicitKernelArgOffset() const {
+    return isAmdHsaOS() ? 0 : 36;
+  }
+
+  unsigned getStackAlignment() const {
+    // Scratch is allocated in 256 dword per wave blocks.
+    return 4 * 256 / getWavefrontSize();
+  }
  
    bool enableMachineScheduler() const override {
      return true;
    }
  
-  void overrideSchedPolicy(MachineSchedPolicy &Policy,
-                           MachineInstr *begin, MachineInstr *end,
-                           unsigned NumRegionInstrs) const override;
+  bool enableSubRegLiveness() const override {
+    return true;
+  }
+};
  
-  // Helper functions to simplify if statements
-  bool isTargetELF() const {
-    return false;
+class R600Subtarget final : public AMDGPUSubtarget {
+private:
+  R600InstrInfo InstrInfo;
+  R600FrameLowering FrameLowering;
+  R600TargetLowering TLInfo;
+
+public:
+  R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
+                const TargetMachine &TM);
+
+  const R600InstrInfo *getInstrInfo() const override {
+    return &InstrInfo;
    }
  
-  bool enableSIScheduler() const {
-    return EnableSIScheduler;
+  const R600FrameLowering *getFrameLowering() const override {
+    return &FrameLowering;
    }
  
-  bool debuggerInsertNops() const {
-    return DebuggerInsertNops;
+  const R600TargetLowering *getTargetLowering() const override {
+    return &TLInfo;
    }
  
-  bool debuggerReserveRegs() const {
-    return DebuggerReserveRegs;
+  const R600RegisterInfo *getRegisterInfo() const override {
+    return &InstrInfo.getRegisterInfo();
    }
  
-  bool dumpCode() const {
-    return DumpCode;
+  bool hasCFAluBug() const {
+    return CFALUBug;
    }
-  bool r600ALUEncoding() const {
-    return R600ALUInst;
+
+  bool hasVertexCache() const {
+    return HasVertexCache;
    }
-  bool isAmdHsaOS() const {
-    return TargetTriple.getOS() == Triple::AMDHSA;
+
+  short getTexVTXClauseSize() const {
+    return TexVTXClauseSize;
    }
-  bool isVGPRSpillingEnabled(const Function& F) const;
  
-  bool isXNACKEnabled() const {
-    return EnableXNACK;
+  unsigned getStackEntrySize() const;
+};
+
+class SISubtarget final : public AMDGPUSubtarget {
+public:
+  enum {
+    FIXED_SGPR_COUNT_FOR_INIT_BUG = 80
+  };
+
+private:
+  SIInstrInfo InstrInfo;
+  SIFrameLowering FrameLowering;
+  SITargetLowering TLInfo;
+  std::unique_ptr<GISelAccessor> GISel;
+
+public:
+  SISubtarget(const Triple &TT, StringRef CPU, StringRef FS,
+              const TargetMachine &TM);
+
+  const SIInstrInfo *getInstrInfo() const override {
+    return &InstrInfo;
    }
  
-  unsigned getMaxWavesPerCU() const {
-    if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
-      return 10;
+  const SIFrameLowering *getFrameLowering() const override {
+    return &FrameLowering;
+  }
  
-    // FIXME: Not sure what this is for other subtagets.
-    return 8;
+  const SITargetLowering *getTargetLowering() const override {
+    return &TLInfo;
    }
  
-  bool enableSubRegLiveness() const override {
-    return true;
+  const CallLowering *getCallLowering() const override {
+    assert(GISel && "Access to GlobalISel APIs not set");
+    return GISel->getCallLowering();
    }
  
-  /// \brief Returns the offset in bytes from the start of the input buffer
-  ///        of the first explicit kernel argument.
-  unsigned getExplicitKernelArgOffset() const {
-    return isAmdHsaOS() ? 0 : 36;
+  const SIRegisterInfo *getRegisterInfo() const override {
+    return &InstrInfo.getRegisterInfo();
    }
  
+  void setGISelAccessor(GISelAccessor &GISel) {
+    this->GISel.reset(&GISel);
+  }
+
+  void overrideSchedPolicy(MachineSchedPolicy &Policy,
+                           MachineInstr *Begin, MachineInstr *End,
+                           unsigned NumRegionInstrs) const override;
+
+  bool isVGPRSpillingEnabled(const Function& F) const;
+
+  unsigned getAmdKernelCodeChipID() const;
+
+  AMDGPU::IsaVersion getIsaVersion() const;
+
    unsigned getMaxNumUserSGPRs() const {
      return 16;
    }
+
+  bool hasFlatAddressSpace() const {
+    return FlatAddressSpace;
+  }
+
+  bool hasSMemRealTime() const {
+    return HasSMemRealTime;
+  }
+
+  bool has16BitInsts() const {
+    return Has16BitInsts;
+  }
+
+  bool enableSIScheduler() const {
+    return EnableSIScheduler;
+  }
+
+  bool debuggerInsertNops() const {
+    return DebuggerInsertNops;
+  }
+
+  bool debuggerReserveRegs() const {
+    return DebuggerReserveRegs;
+  }
+
+  bool loadStoreOptEnabled() const {
+    return EnableLoadStoreOpt;
+  }
+
+  bool hasSGPRInitBug() const {
+    return SGPRInitBug;
+  }
  };
  
+
+inline const AMDGPUInstrInfo *AMDGPUSubtarget::getInstrInfo() const {
+  if (getGeneration() >= SOUTHERN_ISLANDS)
+    return static_cast<const SISubtarget *>(this)->getInstrInfo();
+
+  return static_cast<const R600Subtarget *>(this)->getInstrInfo();
+}
+
+inline const AMDGPUFrameLowering *AMDGPUSubtarget::getFrameLowering() const  {
+  if (getGeneration() >= SOUTHERN_ISLANDS)
+    return static_cast<const SISubtarget *>(this)->getFrameLowering();
+
+  return static_cast<const R600Subtarget *>(this)->getFrameLowering();
+}
+
+inline const AMDGPUTargetLowering *AMDGPUSubtarget::getTargetLowering() const  {
+  if (getGeneration() >= SOUTHERN_ISLANDS)
+    return static_cast<const SISubtarget *>(this)->getTargetLowering();
+
+  return static_cast<const R600Subtarget *>(this)->getTargetLowering();
+}
+
+inline const AMDGPURegisterInfo *AMDGPUSubtarget::getRegisterInfo() const  {
+  if (getGeneration() >= SOUTHERN_ISLANDS)
+    return static_cast<const SISubtarget *>(this)->getRegisterInfo();
+
+  return static_cast<const R600Subtarget *>(this)->getRegisterInfo();
+}
+
  } // End namespace llvm
  
  #endif
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

index a8428ad..8bc999b 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -37,7 +37,6 @@
  #include "llvm/Transforms/IPO.h"
  #include "llvm/Transforms/Scalar.h"
  #include "llvm/Transforms/Scalar/GVN.h"
-#include "llvm/CodeGen/Passes.h"
  
  using namespace llvm;
  
@@ -118,10 +117,10 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
                                           Optional<Reloc::Model> RM,
                                           CodeModel::Model CM,
                                           CodeGenOpt::Level OptLevel)
-    : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU),
-                        FS, Options, getEffectiveRelocModel(RM), CM, OptLevel),
-      TLOF(createTLOF(getTargetTriple())),
-      Subtarget(TT, getTargetCPU(), FS, *this), IntrinsicInfo() {
+  : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU),
+                      FS, Options, getEffectiveRelocModel(RM), CM, OptLevel),
+    TLOF(createTLOF(getTargetTriple())),
+    IntrinsicInfo() {
    setRequiresStructuredCFG(true);
    initAsmInfo();
  }
@@ -137,7 +136,8 @@ R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT,
                                       TargetOptions Options,
                                       Optional<Reloc::Model> RM,
                                       CodeModel::Model CM, CodeGenOpt::Level OL)
-    : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
+  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+    Subtarget(TT, getTargetCPU(), FS, *this) {}
  
  //===----------------------------------------------------------------------===//
  // GCN Target Machine (SI+)
@@ -148,7 +148,8 @@ GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT,
                                     TargetOptions Options,
                                     Optional<Reloc::Model> RM,
                                     CodeModel::Model CM, CodeGenOpt::Level OL)
-    : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
+  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+    Subtarget(TT, getTargetCPU(), FS, *this) {}
  
  //===----------------------------------------------------------------------===//
  // AMDGPU Pass Setup
@@ -171,16 +172,6 @@ public:
      return getTM<AMDGPUTargetMachine>();
    }
  
-  ScheduleDAGInstrs *
-  createMachineScheduler(MachineSchedContext *C) const override {
-    const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
-    if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
-      return createR600MachineScheduler(C);
-    else if (ST.enableSIScheduler())
-      return createSIMachineScheduler(C);
-    return nullptr;
-  }
-
    void addEarlyCSEOrGVNPass();
    void addStraightLineScalarOptimizationPasses();
    void addIRPasses() override;
@@ -194,6 +185,11 @@ public:
    R600PassConfig(TargetMachine *TM, PassManagerBase &PM)
      : AMDGPUPassConfig(TM, PM) { }
  
+  ScheduleDAGInstrs *createMachineScheduler(
+    MachineSchedContext *C) const override {
+    return createR600MachineScheduler(C);
+  }
+
    bool addPreISel() override;
    void addPreRegAlloc() override;
    void addPreSched2() override;
@@ -204,6 +200,19 @@ class GCNPassConfig final : public AMDGPUPassConfig {
  public:
    GCNPassConfig(TargetMachine *TM, PassManagerBase &PM)
      : AMDGPUPassConfig(TM, PM) { }
+
+  GCNTargetMachine &getGCNTargetMachine() const {
+    return getTM<GCNTargetMachine>();
+  }
+
+  ScheduleDAGInstrs *
+  createMachineScheduler(MachineSchedContext *C) const override {
+    const SISubtarget *ST = getGCNTargetMachine().getSubtargetImpl();
+    if (ST->enableSIScheduler())
+      return createSIMachineScheduler(C);
+    return nullptr;
+  }
+
    bool addPreISel() override;
    void addMachineSSAOptimization() override;
    bool addInstSelector() override;
@@ -296,8 +305,7 @@ void AMDGPUPassConfig::addIRPasses() {
      addEarlyCSEOrGVNPass();
  }
  
-bool
-AMDGPUPassConfig::addPreISel() {
+bool AMDGPUPassConfig::addPreISel() {
    addPass(createFlattenCFGPass());
    return false;
  }
@@ -401,7 +409,7 @@ bool GCNPassConfig::addRegBankSelect() {
  #endif
  
  void GCNPassConfig::addPreRegAlloc() {
-  const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
+  const SISubtarget &ST = *getGCNTargetMachine().getSubtargetImpl();
  
    // This needs to be run directly before register allocation because
    // earlier passes might recompute live intervals.
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/lib/Target/AMDGPU/AMDGPUTargetMachine.h

index 639d65c..77dfd4f 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -15,12 +15,8 @@
  #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETMACHINE_H
  #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETMACHINE_H
  
-#include "AMDGPUFrameLowering.h"
-#include "AMDGPUInstrInfo.h"
  #include "AMDGPUIntrinsicInfo.h"
  #include "AMDGPUSubtarget.h"
-#include "R600ISelLowering.h"
-#include "llvm/IR/DataLayout.h"
  
  namespace llvm {
  
@@ -29,11 +25,8 @@ namespace llvm {
  //===----------------------------------------------------------------------===//
  
  class AMDGPUTargetMachine : public LLVMTargetMachine {
-private:
-
  protected:
    std::unique_ptr<TargetLoweringObjectFile> TLOF;
-  AMDGPUSubtarget Subtarget;
    AMDGPUIntrinsicInfo IntrinsicInfo;
  
  public:
@@ -43,10 +36,9 @@ public:
                        CodeGenOpt::Level OL);
    ~AMDGPUTargetMachine();
  
-  const AMDGPUSubtarget *getSubtargetImpl() const { return &Subtarget; }
-  const AMDGPUSubtarget *getSubtargetImpl(const Function &) const override {
-    return &Subtarget;
-  }
+  const AMDGPUSubtarget *getSubtargetImpl() const;
+  const AMDGPUSubtarget *getSubtargetImpl(const Function &) const override;
+
    const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override {
      return &IntrinsicInfo;
    }
@@ -62,6 +54,8 @@ public:
  //===----------------------------------------------------------------------===//
  
  class R600TargetMachine final : public AMDGPUTargetMachine {
+private:
+  R600Subtarget Subtarget;
  
  public:
    R600TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
@@ -70,6 +64,14 @@ public:
                      CodeGenOpt::Level OL);
  
    TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+
+  const R600Subtarget *getSubtargetImpl() const {
+    return &Subtarget;
+  }
+
+  const R600Subtarget *getSubtargetImpl(const Function &) const override {
+    return &Subtarget;
+  }
  };
  
  //===----------------------------------------------------------------------===//
@@ -77,6 +79,8 @@ public:
  //===----------------------------------------------------------------------===//
  
  class GCNTargetMachine final : public AMDGPUTargetMachine {
+private:
+    SISubtarget Subtarget;
  
  public:
    GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
@@ -85,8 +89,29 @@ public:
                     CodeGenOpt::Level OL);
  
    TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+
+  const SISubtarget *getSubtargetImpl() const {
+    return &Subtarget;
+  }
+
+  const SISubtarget *getSubtargetImpl(const Function &) const override {
+    return &Subtarget;
+  }
  };
  
+inline const AMDGPUSubtarget *AMDGPUTargetMachine::getSubtargetImpl() const {
+  if (getTargetTriple().getArch() == Triple::amdgcn)
+    return static_cast<const GCNTargetMachine *>(this)->getSubtargetImpl();
+  return static_cast<const R600TargetMachine *>(this)->getSubtargetImpl();
+}
+
+inline const AMDGPUSubtarget *AMDGPUTargetMachine::getSubtargetImpl(
+  const Function &F) const {
+  if (getTargetTriple().getArch() == Triple::amdgcn)
+    return static_cast<const GCNTargetMachine *>(this)->getSubtargetImpl(F);
+  return static_cast<const R600TargetMachine *>(this)->getSubtargetImpl(F);
+}
+
  } // End namespace llvm
  
  #endif
diff --git a/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp

index 45f700d..16f19d8 100644 (file)
--- a/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
+++ b/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
@@ -160,7 +160,7 @@ public:
    bool prepare();
  
    bool runOnMachineFunction(MachineFunction &MF) override {
-    TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
+    TII = MF.getSubtarget<R600Subtarget>().getInstrInfo();
      TRI = &TII->getRegisterInfo();
      DEBUG(MF.dump(););
      OrderedBlks.clear();
diff --git a/lib/Target/AMDGPU/CMakeLists.txt b/lib/Target/AMDGPU/CMakeLists.txt

index ad70664..f370102 100644 (file)
--- a/lib/Target/AMDGPU/CMakeLists.txt
+++ b/lib/Target/AMDGPU/CMakeLists.txt
@@ -52,6 +52,7 @@ add_llvm_target(AMDGPUCodeGen
    R600ControlFlowFinalizer.cpp
    R600EmitClauseMarkers.cpp
    R600ExpandSpecialInstrs.cpp
+  R600FrameLowering.cpp
    R600InstrInfo.cpp
    R600ISelLowering.cpp
    R600MachineFunctionInfo.cpp
diff --git a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

index 4b6cc65..29b1f79 100644 (file)
--- a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -25,7 +25,8 @@ using namespace llvm;
  
  GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
    CurrCycleInstr(nullptr),
-  MF(MF) {
+  MF(MF),
+  ST(MF.getSubtarget<SISubtarget>()) {
    MaxLookAhead = 5;
  }
  
@@ -81,8 +82,7 @@ void GCNHazardRecognizer::AdvanceCycle() {
    if (!CurrCycleInstr)
      return;
  
-  const SIInstrInfo *TII =
-      static_cast<const SIInstrInfo*>(MF.getSubtarget().getInstrInfo());
+  const SIInstrInfo *TII = ST.getInstrInfo();
    unsigned NumWaitStates = TII->getNumWaitStates(*CurrCycleInstr);
  
    // Keep track of emitted instructions
@@ -114,8 +114,7 @@ void GCNHazardRecognizer::RecedeCycle() {
  
  int GCNHazardRecognizer::getWaitStatesSinceDef(
      unsigned Reg, function_ref<bool(MachineInstr *)> IsHazardDef) {
-  const TargetRegisterInfo *TRI =
-      MF.getSubtarget<AMDGPUSubtarget>().getRegisterInfo();
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
  
    int WaitStates = -1;
    for (MachineInstr *MI : EmittedInstrs) {
@@ -141,10 +140,8 @@ static void addRegsToSet(iterator_range<MachineInstr::const_mop_iterator> Ops,
  }
  
  int GCNHazardRecognizer::checkSMEMSoftClauseHazards(MachineInstr *SMEM) {
-  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
-
    // SMEM soft clause are only present on VI+
-  if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
+  if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
      return 0;
  
    // A soft-clause is any group of consecutive SMEM instructions.  The
@@ -198,14 +195,14 @@ int GCNHazardRecognizer::checkSMEMSoftClauseHazards(MachineInstr *SMEM) {
  }
  
  int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
-  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
-  const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
+  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+  const SIInstrInfo *TII = ST.getInstrInfo();
    int WaitStatesNeeded = 0;
  
    WaitStatesNeeded = checkSMEMSoftClauseHazards(SMRD);
  
    // This SMRD hazard only affects SI.
-  if (ST.getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS)
+  if (ST.getGeneration() != SISubtarget::SOUTHERN_ISLANDS)
      return WaitStatesNeeded;
  
    // A read of an SGPR by SMRD instruction requires 4 wait states when the
@@ -224,10 +221,9 @@ int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
  }
  
  int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
-  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
-  const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
+  const SIInstrInfo *TII = ST.getInstrInfo();
  
-  if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
+  if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
      return 0;
  
    const SIRegisterInfo &TRI = TII->getRegisterInfo();
@@ -250,9 +246,7 @@ int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
  }
  
  int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) {
-  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
-  const SIRegisterInfo *TRI =
-      static_cast<const SIRegisterInfo*>(ST.getRegisterInfo());
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
  
    // Check for DPP VGPR read after VALU VGPR write.
    int DppVgprWaitStates = 2;
diff --git a/lib/Target/AMDGPU/GCNHazardRecognizer.h b/lib/Target/AMDGPU/GCNHazardRecognizer.h

index 3c0a808..d82041c 100644 (file)
--- a/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -24,15 +24,16 @@ class MachineFunction;
  class MachineInstr;
  class ScheduleDAG;
  class SIInstrInfo;
+class SISubtarget;
  
  class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
-
-  // This variable stores the instruction that has been emitted this cycle.
-  // It will be added to EmittedInstrs, when AdvanceCycle() or RecedeCycle() is
+  // This variable stores the instruction that has been emitted this cycle. It
+  // will be added to EmittedInstrs, when AdvanceCycle() or RecedeCycle() is
    // called.
    MachineInstr *CurrCycleInstr;
    std::list<MachineInstr*> EmittedInstrs;
    const MachineFunction &MF;
+  const SISubtarget &ST;
  
    int getWaitStatesSinceDef(unsigned Reg,
                              function_ref<bool(MachineInstr *)> IsHazardDef =
diff --git a/lib/Target/AMDGPU/R600ClauseMergePass.cpp b/lib/Target/AMDGPU/R600ClauseMergePass.cpp

index ca2c7ee..ef645f9 100644 (file)
--- a/lib/Target/AMDGPU/R600ClauseMergePass.cpp
+++ b/lib/Target/AMDGPU/R600ClauseMergePass.cpp
@@ -171,7 +171,9 @@ bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
    if (skipFunction(*MF.getFunction()))
      return false;
  
-  TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
+  const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
+  TII = ST.getInstrInfo();
+
    for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
                                                    BB != BB_E; ++BB) {
      MachineBasicBlock &MBB = *BB;
diff --git a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp

index 560e2e2..fd2a688 100644 (file)
--- a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
@@ -39,14 +39,14 @@ struct CFStack {
      FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3
    };
  
-  const AMDGPUSubtarget *ST;
+  const R600Subtarget *ST;
    std::vector<StackItem> BranchStack;
    std::vector<StackItem> LoopStack;
    unsigned MaxStackSize;
    unsigned CurrentEntries;
    unsigned CurrentSubEntries;
  
-  CFStack(const AMDGPUSubtarget *st, CallingConv::ID cc) : ST(st),
+  CFStack(const R600Subtarget *st, CallingConv::ID cc) : ST(st),
        // We need to reserve a stack entry for CALL_FS in vertex shaders.
        MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0),
        CurrentEntries(0), CurrentSubEntries(0) { }
@@ -119,7 +119,7 @@ unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
      return 0;
    case CFStack::FIRST_NON_WQM_PUSH:
    assert(!ST->hasCaymanISA());
-  if (ST->getGeneration() <= AMDGPUSubtarget::R700) {
+  if (ST->getGeneration() <= R600Subtarget::R700) {
      // +1 For the push operation.
      // +2 Extra space required.
      return 3;
@@ -132,7 +132,7 @@ unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
      return 2;
    }
    case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY:
-    assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
+    assert(ST->getGeneration() >= R600Subtarget::EVERGREEN);
      // +1 For the push operation.
      // +1 Extra space required.
      return 2;
@@ -159,7 +159,7 @@ void CFStack::pushBranch(unsigned Opcode, bool isWQM) {
                                               // See comment in
                                               // CFStack::getSubEntrySize()
        else if (CurrentEntries > 0 &&
-               ST->getGeneration() > AMDGPUSubtarget::EVERGREEN &&
+               ST->getGeneration() > R600Subtarget::EVERGREEN &&
                 !ST->hasCaymanISA() &&
                 !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))
          Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
@@ -220,7 +220,7 @@ private:
    const R600InstrInfo *TII;
    const R600RegisterInfo *TRI;
    unsigned MaxFetchInst;
-  const AMDGPUSubtarget *ST;
+  const R600Subtarget *ST;
  
    bool IsTrivialInst(MachineInstr *MI) const {
      switch (MI->getOpcode()) {
@@ -234,7 +234,7 @@ private:
  
    const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
      unsigned Opcode = 0;
-    bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
+    bool isEg = (ST->getGeneration() >= R600Subtarget::EVERGREEN);
      switch (CFI) {
      case CF_TC:
        Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600;
@@ -491,10 +491,11 @@ public:
        : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr), ST(nullptr) {}
  
    bool runOnMachineFunction(MachineFunction &MF) override {
-    ST = &MF.getSubtarget<AMDGPUSubtarget>();
+    ST = &MF.getSubtarget<R600Subtarget>();
      MaxFetchInst = ST->getTexVTXClauseSize();
-    TII = static_cast<const R600InstrInfo *>(ST->getInstrInfo());
-    TRI = static_cast<const R600RegisterInfo *>(ST->getRegisterInfo());
+    TII = ST->getInstrInfo();
+    TRI = ST->getRegisterInfo();
+
      R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
  
      CFStack CFStack(ST, MF.getFunction()->getCallingConv());
diff --git a/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp b/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp

index 8c690a7..a2d5f5b 100644 (file)
--- a/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
+++ b/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
@@ -298,7 +298,8 @@ public:
    }
  
    bool runOnMachineFunction(MachineFunction &MF) override {
-    TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
+    const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
+    TII = ST.getInstrInfo();
  
      for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
                                                      BB != BB_E; ++BB) {
diff --git a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp

index 211d392..81219ae 100644 (file)
--- a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
+++ b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
@@ -29,7 +29,6 @@ using namespace llvm;
  namespace {
  
  class R600ExpandSpecialInstrsPass : public MachineFunctionPass {
-
  private:
    static char ID;
    const R600InstrInfo *TII;
@@ -66,7 +65,8 @@ void R600ExpandSpecialInstrsPass::SetFlagInNewMI(MachineInstr *NewMI,
  }
  
  bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
-  TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
+  const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
+  TII = ST.getInstrInfo();
  
    const R600RegisterInfo &TRI = TII->getRegisterInfo();
  
diff --git a/lib/Target/AMDGPU/R600FrameLowering.cpp b/lib/Target/AMDGPU/R600FrameLowering.cpp

new file mode 100644 (file)

index 0000000..dd5681f
--- /dev/null
+++ b/lib/Target/AMDGPU/R600FrameLowering.cpp
@@ -0,0 +1,15 @@
+//===----------------------- R600FrameLowering.cpp ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+
+#include "R600FrameLowering.h"
+
+using namespace llvm;
+
+R600FrameLowering::~R600FrameLowering() {
+}
diff --git a/lib/Target/AMDGPU/R600FrameLowering.h b/lib/Target/AMDGPU/R600FrameLowering.h

new file mode 100644 (file)

index 0000000..5fe4e0d
--- /dev/null
+++ b/lib/Target/AMDGPU/R600FrameLowering.h
@@ -0,0 +1,30 @@
+//===--------------------- R600FrameLowering.h ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_R600FRAMELOWERING_H
+#define LLVM_LIB_TARGET_AMDGPU_R600FRAMELOWERING_H
+
+#include "AMDGPUFrameLowering.h"
+
+namespace llvm {
+
+class R600FrameLowering : public AMDGPUFrameLowering {
+public:
+  R600FrameLowering(StackDirection D, unsigned StackAl, int LAO,
+                    unsigned TransAl = 1) :
+    AMDGPUFrameLowering(D, StackAl, LAO, TransAl) {}
+  virtual ~R600FrameLowering();
+
+  void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const {}
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {}
+};
+
+}
+
+#endif
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp

index 8cbd2d5..f6e40ca 100644 (file)
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -30,8 +30,8 @@
  
  using namespace llvm;
  
-R600TargetLowering::R600TargetLowering(TargetMachine &TM,
-                                       const AMDGPUSubtarget &STI)
+R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
+                                       const R600Subtarget &STI)
      : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
    addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
    addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
@@ -199,6 +199,10 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM,
    setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
  }
  
+const R600Subtarget *R600TargetLowering::getSubtarget() const {
+  return static_cast<const R600Subtarget *>(Subtarget);
+}
+
  static inline bool isEOP(MachineBasicBlock::iterator I) {
    return std::next(I)->getOpcode() == AMDGPU::RETURN;
  }
@@ -208,8 +212,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
    MachineFunction * MF = BB->getParent();
    MachineRegisterInfo &MRI = MF->getRegInfo();
    MachineBasicBlock::iterator I = *MI;
-  const R600InstrInfo *TII =
-      static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
+  const R600InstrInfo *TII = getSubtarget()->getInstrInfo();
  
    switch (MI->getOpcode()) {
    default:
@@ -966,7 +969,7 @@ SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
    SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
        DAG.getNode(ISD::FADD, DL, VT, FractPart,
          DAG.getConstantFP(-0.5, DL, MVT::f32)));
-  if (Gen >= AMDGPUSubtarget::R700)
+  if (Gen >= R600Subtarget::R700)
      return TrigVal;
    // On R600 hw, COS/SIN input must be between -Pi and Pi.
    return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
@@ -1439,8 +1442,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
  
    // Lowering for indirect addressing
    const MachineFunction &MF = DAG.getMachineFunction();
-  const AMDGPUFrameLowering *TFL =
-      static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
+  const R600FrameLowering *TFL = getSubtarget()->getFrameLowering();
    unsigned StackWidth = TFL->getStackWidth(MF);
  
    Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
@@ -1677,8 +1679,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
  
    // Lowering for indirect addressing
    const MachineFunction &MF = DAG.getMachineFunction();
-  const AMDGPUFrameLowering *TFL =
-      static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
+  const R600FrameLowering *TFL = getSubtarget()->getFrameLowering();
    unsigned StackWidth = TFL->getStackWidth(MF);
  
    Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
@@ -1731,7 +1732,7 @@ SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
  SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
                                              SelectionDAG &DAG) const {
    MachineFunction &MF = DAG.getMachineFunction();
-  const AMDGPUFrameLowering *TFL = Subtarget->getFrameLowering();
+  const R600FrameLowering *TFL = getSubtarget()->getFrameLowering();
  
    FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
  
@@ -2179,13 +2180,14 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
    return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
  }
  
-static bool
-FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
-            SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
-  const R600InstrInfo *TII =
-      static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
+bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
+                                     SDValue &Src, SDValue &Neg, SDValue &Abs,
+                                     SDValue &Sel, SDValue &Imm,
+                                     SelectionDAG &DAG) const {
+  const R600InstrInfo *TII = getSubtarget()->getInstrInfo();
    if (!Src.isMachineOpcode())
      return false;
+
    switch (Src.getMachineOpcode()) {
    case AMDGPU::FNEG_R600:
      if (!Neg.getNode())
@@ -2310,14 +2312,13 @@ FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
    }
  }
  
-
  /// \brief Fold the instructions after selecting them
  SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
                                              SelectionDAG &DAG) const {
-  const R600InstrInfo *TII =
-      static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
+  const R600InstrInfo *TII = getSubtarget()->getInstrInfo();
    if (!Node->isMachineOpcode())
      return Node;
+
    unsigned Opcode = Node->getMachineOpcode();
    SDValue FakeOp;
  
diff --git a/lib/Target/AMDGPU/R600ISelLowering.h b/lib/Target/AMDGPU/R600ISelLowering.h

index 8c400c1..3fbd727 100644 (file)
--- a/lib/Target/AMDGPU/R600ISelLowering.h
+++ b/lib/Target/AMDGPU/R600ISelLowering.h
@@ -20,10 +20,14 @@
  namespace llvm {
  
  class R600InstrInfo;
+class R600Subtarget;
  
  class R600TargetLowering final : public AMDGPUTargetLowering {
  public:
-  R600TargetLowering(TargetMachine &TM, const AMDGPUSubtarget &STI);
+  R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI);
+
+  const R600Subtarget *getSubtarget() const;
+
    MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI,
        MachineBasicBlock * BB) const override;
    SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
@@ -86,6 +90,10 @@ private:
    bool isHWTrueValue(SDValue Op) const;
    bool isHWFalseValue(SDValue Op) const;
  
+ bool FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src,
+                  SDValue &Neg, SDValue &Abs, SDValue &Sel, SDValue &Imm,
+                  SelectionDAG &DAG) const;
+
    SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override;
  };
  
diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp

index 9adb800..85478e0 100644 (file)
--- a/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -28,12 +28,8 @@ using namespace llvm;
  #define GET_INSTRINFO_CTOR_DTOR
  #include "AMDGPUGenDFAPacketizer.inc"
  
-R600InstrInfo::R600InstrInfo(const AMDGPUSubtarget &st)
-    : AMDGPUInstrInfo(st), RI() {}
-
-const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const {
-  return RI;
-}
+R600InstrInfo::R600InstrInfo(const R600Subtarget &ST)
+  : AMDGPUInstrInfo(ST), RI(), ST(ST) {}
  
  bool R600InstrInfo::isTrig(const MachineInstr &MI) const {
    return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
@@ -90,10 +86,9 @@ bool R600InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
  }
  
  bool R600InstrInfo::isMov(unsigned Opcode) const {
-
-
    switch(Opcode) {
-  default: return false;
+  default:
+    return false;
    case AMDGPU::MOV:
    case AMDGPU::MOV_IMM_F32:
    case AMDGPU::MOV_IMM_I32:
@@ -651,7 +646,7 @@ R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs)
  DFAPacketizer *
  R600InstrInfo::CreateTargetScheduleState(const TargetSubtargetInfo &STI) const {
    const InstrItineraryData *II = STI.getInstrItineraryData();
-  return static_cast<const AMDGPUSubtarget &>(STI).createDFAPacketizer(II);
+  return static_cast<const R600Subtarget &>(STI).createDFAPacketizer(II);
  }
  
  static bool
@@ -1113,8 +1108,8 @@ bool R600InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
  
  void  R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved,
                                               const MachineFunction &MF) const {
-  const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering *>(
-      MF.getSubtarget().getFrameLowering());
+  const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
+  const R600FrameLowering *TFL = ST.getFrameLowering();
  
    unsigned StackWidth = TFL->getStackWidth(MF);
    int End = getIndirectIndexEnd(MF);
@@ -1290,7 +1285,7 @@ MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction(
      const {
    assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented");
    unsigned Opcode;
-  if (ST.getGeneration() <= AMDGPUSubtarget::R700)
+  if (ST.getGeneration() <= R600Subtarget::R700)
      Opcode = AMDGPU::DOT4_r600;
    else
      Opcode = AMDGPU::DOT4_eg;
diff --git a/lib/Target/AMDGPU/R600InstrInfo.h b/lib/Target/AMDGPU/R600InstrInfo.h

index de586e7..420bec8 100644 (file)
--- a/lib/Target/AMDGPU/R600InstrInfo.h
+++ b/lib/Target/AMDGPU/R600InstrInfo.h
@@ -16,23 +16,25 @@
  #define LLVM_LIB_TARGET_AMDGPU_R600INSTRINFO_H
  
  #include "AMDGPUInstrInfo.h"
-#include "R600Defines.h"
  #include "R600RegisterInfo.h"
  
  namespace llvm {
-  class AMDGPUTargetMachine;
-  class DFAPacketizer;
-  class MachineFunction;
-  class MachineInstr;
-  class MachineInstrBuilder;
-
-  class R600InstrInfo final : public AMDGPUInstrInfo {
-  private:
+class AMDGPUTargetMachine;
+class DFAPacketizer;
+class MachineFunction;
+class MachineInstr;
+class MachineInstrBuilder;
+class R600Subtarget;
+
+class R600InstrInfo final : public AMDGPUInstrInfo {
+private:
    const R600RegisterInfo RI;
+  const R600Subtarget &ST;
  
-  std::vector<std::pair<int, unsigned> >
-  ExtractSrcs(MachineInstr *MI, const DenseMap<unsigned, unsigned> &PV, unsigned &ConstCount) const;
-
+  std::vector<std::pair<int, unsigned>>
+  ExtractSrcs(MachineInstr *MI,
+              const DenseMap<unsigned, unsigned> &PV,
+              unsigned &ConstCount) const;
  
    MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
                                          MachineBasicBlock::iterator I,
@@ -41,11 +43,11 @@ namespace llvm {
                                          unsigned AddrChan) const;
  
    MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
-                                        MachineBasicBlock::iterator I,
-                                        unsigned ValueReg, unsigned Address,
-                                        unsigned OffsetReg,
-                                        unsigned AddrChan) const;
-  public:
+                                         MachineBasicBlock::iterator I,
+                                         unsigned ValueReg, unsigned Address,
+                                         unsigned OffsetReg,
+                                         unsigned AddrChan) const;
+public:
    enum BankSwizzle {
      ALU_VEC_012_SCL_210 = 0,
      ALU_VEC_021_SCL_122,
@@ -55,9 +57,12 @@ namespace llvm {
      ALU_VEC_210
    };
  
-  explicit R600InstrInfo(const AMDGPUSubtarget &st);
+  explicit R600InstrInfo(const R600Subtarget &);
+
+  const R600RegisterInfo &getRegisterInfo() const {
+    return RI;
+  }
  
-  const R600RegisterInfo &getRegisterInfo() const override;
    void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
                     const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
                     bool KillSrc) const override;
@@ -109,7 +114,7 @@ namespace llvm {
    /// If register is ALU_LITERAL, second member is IMM.
    /// Otherwise, second member value is undefined.
    SmallVector<std::pair<MachineOperand *, int64_t>, 3>
-      getSrcs(MachineInstr *MI) const;
+  getSrcs(MachineInstr *MI) const;
  
    unsigned  isLegalUpTo(
      const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
@@ -153,10 +158,14 @@ namespace llvm {
    DFAPacketizer *
    CreateTargetScheduleState(const TargetSubtargetInfo &) const override;
  
-  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+  bool ReverseBranchCondition(
+    SmallVectorImpl<MachineOperand> &Cond) const override;
  
-  bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
-                     SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const override;
+  bool AnalyzeBranch(MachineBasicBlock &MBB,
+                     MachineBasicBlock *&TBB,
+                     MachineBasicBlock *&FBB,
+                     SmallVectorImpl<MachineOperand> &Cond,
+                     bool AllowModify) const override;
  
    unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                          MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
@@ -168,20 +177,18 @@ namespace llvm {
  
    bool isPredicable(MachineInstr &MI) const override;
  
-  bool
-   isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
-                             BranchProbability Probability) const override;
+  bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
+                                 BranchProbability Probability) const override;
  
    bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
                             unsigned ExtraPredCycles,
                             BranchProbability Probability) const override ;
  
-  bool
-   isProfitableToIfCvt(MachineBasicBlock &TMBB,
-                       unsigned NumTCycles, unsigned ExtraTCycles,
-                       MachineBasicBlock &FMBB,
-                       unsigned NumFCycles, unsigned ExtraFCycles,
-                       BranchProbability Probability) const override;
+  bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
+                           unsigned NumTCycles, unsigned ExtraTCycles,
+                           MachineBasicBlock &FMBB,
+                           unsigned NumFCycles, unsigned ExtraFCycles,
+                           BranchProbability Probability) const override;
  
    bool DefinesPredicate(MachineInstr &MI,
                          std::vector<MachineOperand> &Pred) const override;
@@ -190,7 +197,7 @@ namespace llvm {
                           ArrayRef<MachineOperand> Pred2) const override;
  
    bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
-                                          MachineBasicBlock &FMBB) const override;
+                                 MachineBasicBlock &FMBB) const override;
  
    bool PredicateInstruction(MachineInstr &MI,
                              ArrayRef<MachineOperand> Pred) const override;
@@ -240,10 +247,10 @@ namespace llvm {
  
    unsigned getMaxAlusPerClause() const;
  
-  ///buildDefaultInstruction - This function returns a MachineInstr with
-  /// all the instruction modifiers initialized to their default values.
-  /// You can use this function to avoid manually specifying each instruction
-  /// modifier operand when building a new instruction.
+  /// buildDefaultInstruction - This function returns a MachineInstr with all
+  /// the instruction modifiers initialized to their default values.  You can
+  /// use this function to avoid manually specifying each instruction modifier
+  /// operand when building a new instruction.
    ///
    /// \returns a MachineInstr with all the instruction modifiers initialized
    /// to their default values.
@@ -260,9 +267,9 @@ namespace llvm {
                                               unsigned DstReg) const;
  
    MachineInstr *buildMovImm(MachineBasicBlock &BB,
-                                  MachineBasicBlock::iterator I,
-                                  unsigned DstReg,
-                                  uint64_t Imm) const;
+                            MachineBasicBlock::iterator I,
+                            unsigned DstReg,
+                            uint64_t Imm) const;
  
    MachineInstr *buildMovInstr(MachineBasicBlock *MBB,
                                MachineBasicBlock::iterator I,
@@ -303,7 +310,6 @@ namespace llvm {
    // Helper functions that check the opcode for status information
    bool isRegisterStore(const MachineInstr &MI) const;
    bool isRegisterLoad(const MachineInstr &MI) const;
-
  };
  
  namespace AMDGPU {
diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td

index b7a8a80..431f923 100644 (file)
--- a/lib/Target/AMDGPU/R600Instructions.td
+++ b/lib/Target/AMDGPU/R600Instructions.td
@@ -336,11 +336,11 @@ def load_param : LoadParamFrag<load>;
  def load_param_exti8 : LoadParamFrag<az_extloadi8>;
  def load_param_exti16 : LoadParamFrag<az_extloadi16>;
  
-def isR600 : Predicate<"Subtarget->getGeneration() <= AMDGPUSubtarget::R700">;
+def isR600 : Predicate<"Subtarget->getGeneration() <= R600Subtarget::R700">;
  
  def isR600toCayman
      : Predicate<
-          "Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS">;
+          "Subtarget->getGeneration() <= R600Subtarget::NORTHERN_ISLANDS">;
  
  //===----------------------------------------------------------------------===//
  // R600 SDNodes
diff --git a/lib/Target/AMDGPU/R600MachineScheduler.cpp b/lib/Target/AMDGPU/R600MachineScheduler.cpp

index b3da0d3..62318fd 100644 (file)
--- a/lib/Target/AMDGPU/R600MachineScheduler.cpp
+++ b/lib/Target/AMDGPU/R600MachineScheduler.cpp
@@ -13,6 +13,7 @@
  //===----------------------------------------------------------------------===//
  
  #include "R600MachineScheduler.h"
+#include "R600InstrInfo.h"
  #include "AMDGPUSubtarget.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
  #include "llvm/Pass.h"
@@ -26,7 +27,7 @@ using namespace llvm;
  void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
    assert(dag->hasVRegLiveness() && "R600SchedStrategy needs vreg liveness");
    DAG = static_cast<ScheduleDAGMILive*>(dag);
-  const AMDGPUSubtarget &ST = DAG->MF.getSubtarget<AMDGPUSubtarget>();
+  const R600Subtarget &ST = DAG->MF.getSubtarget<R600Subtarget>();
    TII = static_cast<const R600InstrInfo*>(DAG->TII);
    TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
    VLIW5 = !ST.hasCaymanISA();
@@ -48,8 +49,7 @@ void R600SchedStrategy::MoveUnits(std::vector<SUnit *> &QSrc,
    QSrc.clear();
  }
  
-static
-unsigned getWFCountLimitedByGPR(unsigned GPRCount) {
+static unsigned getWFCountLimitedByGPR(unsigned GPRCount) {
    assert (GPRCount && "GPRCount cannot be 0");
    return 248 / GPRCount;
  }
@@ -349,7 +349,7 @@ void R600SchedStrategy::PrepareNextSlot() {
    DEBUG(dbgs() << "New Slot\n");
    assert (OccupedSlotsMask && "Slot wasn't filled");
    OccupedSlotsMask = 0;
-//  if (HwGen == AMDGPUSubtarget::NORTHERN_ISLANDS)
+//  if (HwGen == R600Subtarget::NORTHERN_ISLANDS)
  //    OccupedSlotsMask |= 16;
    InstructionsGroupCandidate.clear();
    LoadAlu();
diff --git a/lib/Target/AMDGPU/R600MachineScheduler.h b/lib/Target/AMDGPU/R600MachineScheduler.h

index bc3eafe..16d5d93 100644 (file)
--- a/lib/Target/AMDGPU/R600MachineScheduler.h
+++ b/lib/Target/AMDGPU/R600MachineScheduler.h
@@ -15,15 +15,16 @@
  #ifndef LLVM_LIB_TARGET_AMDGPU_R600MACHINESCHEDULER_H
  #define LLVM_LIB_TARGET_AMDGPU_R600MACHINESCHEDULER_H
  
-#include "R600InstrInfo.h"
  #include "llvm/CodeGen/MachineScheduler.h"
  
  using namespace llvm;
  
  namespace llvm {
  
-class R600SchedStrategy final : public MachineSchedStrategy {
+class R600InstrInfo;
+struct R600RegisterInfo;
  
+class R600SchedStrategy final : public MachineSchedStrategy {
    const ScheduleDAGMILive *DAG;
    const R600InstrInfo *TII;
    const R600RegisterInfo *TRI;
diff --git a/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp

index 50bae54..d46ff31 100644 (file)
--- a/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
+++ b/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
@@ -29,6 +29,7 @@
  
  #include "AMDGPU.h"
  #include "AMDGPUSubtarget.h"
+#include "R600Defines.h"
  #include "R600InstrInfo.h"
  #include "llvm/CodeGen/DFAPacketizer.h"
  #include "llvm/CodeGen/MachineDominators.h"
@@ -317,8 +318,10 @@ bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) {
    if (skipFunction(*Fn.getFunction()))
      return false;
  
-  TII = static_cast<const R600InstrInfo *>(Fn.getSubtarget().getInstrInfo());
-  MRI = &(Fn.getRegInfo());
+  const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>();
+  TII = ST.getInstrInfo();
+  MRI = &Fn.getRegInfo();
+
    for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
         MBB != MBBe; ++MBB) {
      MachineBasicBlock *MB = &*MBB;
diff --git a/lib/Target/AMDGPU/R600Packetizer.cpp b/lib/Target/AMDGPU/R600Packetizer.cpp

index 6a34df2..2f16b11 100644 (file)
--- a/lib/Target/AMDGPU/R600Packetizer.cpp
+++ b/lib/Target/AMDGPU/R600Packetizer.cpp
@@ -56,7 +56,6 @@ public:
  char R600Packetizer::ID = 0;
  
  class R600PacketizerList : public VLIWPacketizerList {
-
  private:
    const R600InstrInfo *TII;
    const R600RegisterInfo &TRI;
@@ -148,12 +147,12 @@ private:
    }
  public:
    // Ctor.
-  R600PacketizerList(MachineFunction &MF, MachineLoopInfo &MLI)
+  R600PacketizerList(MachineFunction &MF, const R600Subtarget &ST,
+                     MachineLoopInfo &MLI)
        : VLIWPacketizerList(MF, MLI, nullptr),
-        TII(static_cast<const R600InstrInfo *>(
-            MF.getSubtarget().getInstrInfo())),
+        TII(ST.getInstrInfo()),
          TRI(TII->getRegisterInfo()) {
-    VLIW5 = !MF.getSubtarget<AMDGPUSubtarget>().hasCaymanISA();
+    VLIW5 = !ST.hasCaymanISA();
    }
  
    // initPacketizerState - initialize some internal flags.
@@ -327,11 +326,13 @@ public:
  };
  
  bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
-  const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo();
+  const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>();
+  const R600InstrInfo *TII = ST.getInstrInfo();
+
    MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
  
    // Instantiate the packetizer.
-  R600PacketizerList Packetizer(Fn, MLI);
+  R600PacketizerList Packetizer(Fn, ST, MLI);
  
    // DFA state table should not be empty.
    assert(Packetizer.getResourceTracker() && "Empty DFA table!");
diff --git a/lib/Target/AMDGPU/R600RegisterInfo.cpp b/lib/Target/AMDGPU/R600RegisterInfo.cpp

index fb0359c..4c3a3f7 100644 (file)
--- a/lib/Target/AMDGPU/R600RegisterInfo.cpp
+++ b/lib/Target/AMDGPU/R600RegisterInfo.cpp
@@ -28,8 +28,8 @@ R600RegisterInfo::R600RegisterInfo() : AMDGPURegisterInfo() {
  BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
    BitVector Reserved(getNumRegs());
  
-  const R600InstrInfo *TII =
-      static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
+  const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
+  const R600InstrInfo *TII = ST.getInstrInfo();
  
    Reserved.set(AMDGPU::ZERO);
    Reserved.set(AMDGPU::HALF);
diff --git a/lib/Target/AMDGPU/R600RegisterInfo.h b/lib/Target/AMDGPU/R600RegisterInfo.h

index d8102e3..c1a1402 100644 (file)
--- a/lib/Target/AMDGPU/R600RegisterInfo.h
+++ b/lib/Target/AMDGPU/R600RegisterInfo.h
@@ -31,7 +31,7 @@ struct R600RegisterInfo final : public AMDGPURegisterInfo {
    /// \brief get the HW encoding for a register's channel.
    unsigned getHWRegChan(unsigned reg) const;
  
-  unsigned getHWRegIndex(unsigned Reg) const override;
+  unsigned getHWRegIndex(unsigned Reg) const;
  
    /// \brief get the register class of the specified type to use in the
    /// CFGStructurizer
diff --git a/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp b/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp

index 8883770..65ceff3 100644 (file)
--- a/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp
+++ b/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp
@@ -20,6 +20,7 @@
  //===----------------------------------------------------------------------===//
  
  #include "SIInstrInfo.h"
+#include "AMDGPUSubtarget.h"
  #include "llvm/ADT/DenseSet.h"
  #include "llvm/CodeGen/MachineFunction.h"
  #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -61,7 +62,7 @@ FunctionPass *llvm::createSIDebuggerInsertNopsPass() {
  bool SIDebuggerInsertNops::runOnMachineFunction(MachineFunction &MF) {
    // Skip this pass if "amdgpu-debugger-insert-nops" attribute was not
    // specified.
-  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
    if (!ST.debuggerInsertNops())
      return false;
  
@@ -70,8 +71,7 @@ bool SIDebuggerInsertNops::runOnMachineFunction(MachineFunction &MF) {
      return false;
  
    // Target instruction info.
-  const SIInstrInfo *TII =
-    static_cast<const SIInstrInfo*>(MF.getSubtarget().getInstrInfo());
+  const SIInstrInfo *TII = ST.getInstrInfo();
  
    // Set containing line numbers that have nop inserted.
    DenseSet<unsigned> NopInserted;
diff --git a/lib/Target/AMDGPU/SIDefines.h b/lib/Target/AMDGPU/SIDefines.h

index 9a2e8b6..23cde1c 100644 (file)
--- a/lib/Target/AMDGPU/SIDefines.h
+++ b/lib/Target/AMDGPU/SIDefines.h
@@ -48,7 +48,7 @@ namespace llvm {
  namespace AMDGPU {
    enum OperandType {
      /// Operand with register or 32-bit immediate
-    OPERAND_REG_IMM32 = llvm::MCOI::OPERAND_FIRST_TARGET,
+    OPERAND_REG_IMM32 = MCOI::OPERAND_FIRST_TARGET,
      /// Operand with register or inline constant
      OPERAND_REG_INLINE_C
    };
diff --git a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp

index e241dd8..9e0086b 100644 (file)
--- a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -237,11 +237,10 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
  }
  
  bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
+  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
    MachineRegisterInfo &MRI = MF.getRegInfo();
-  const SIRegisterInfo *TRI =
-      static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
-  const SIInstrInfo *TII =
-      static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
+  const SIInstrInfo *TII = ST.getInstrInfo();
  
    SmallVector<MachineInstr *, 16> Worklist;
  
diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp

index 236a3f1..4ba9d73 100644 (file)
--- a/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -16,8 +16,6 @@
  #include "llvm/CodeGen/MachineFunctionPass.h"
  #include "llvm/CodeGen/MachineInstrBuilder.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
  #include "llvm/Support/Debug.h"
  #include "llvm/Support/raw_ostream.h"
  #include "llvm/Target/TargetMachine.h"
@@ -298,9 +296,10 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
    if (skipFunction(*MF.getFunction()))
      return false;
  
+  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+
    MachineRegisterInfo &MRI = MF.getRegInfo();
-  const SIInstrInfo *TII =
-      static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+  const SIInstrInfo *TII = ST.getInstrInfo();
    const SIRegisterInfo &TRI = TII->getRegisterInfo();
  
    for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp

index 37a47b4..1f3b361 100644 (file)
--- a/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -11,6 +11,8 @@
  #include "SIInstrInfo.h"
  #include "SIMachineFunctionInfo.h"
  #include "SIRegisterInfo.h"
+#include "AMDGPUSubtarget.h"
+
  #include "llvm/CodeGen/MachineFrameInfo.h"
  #include "llvm/CodeGen/MachineFunction.h"
  #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -52,10 +54,9 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
    if (hasOnlySGPRSpills(MFI, MF.getFrameInfo()))
      return;
  
-  const SIInstrInfo *TII =
-      static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+  const SIInstrInfo *TII = ST.getInstrInfo();
    const SIRegisterInfo *TRI = &TII->getRegisterInfo();
-  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
    MachineRegisterInfo &MRI = MF.getRegInfo();
    MachineBasicBlock::iterator I = MBB.begin();
  
@@ -263,6 +264,11 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
    }
  }
  
+void SIFrameLowering::emitEpilogue(MachineFunction &MF,
+                                   MachineBasicBlock &MBB) const {
+
+}
+
  void SIFrameLowering::processFunctionBeforeFrameFinalized(
    MachineFunction &MF,
    RegScavenger *RS) const {
diff --git a/lib/Target/AMDGPU/SIFrameLowering.h b/lib/Target/AMDGPU/SIFrameLowering.h

index a9152fd..c2e7a71 100644 (file)
--- a/lib/Target/AMDGPU/SIFrameLowering.h
+++ b/lib/Target/AMDGPU/SIFrameLowering.h
@@ -23,6 +23,8 @@ public:
  
    void emitPrologue(MachineFunction &MF,
                      MachineBasicBlock &MBB) const override;
+  void emitEpilogue(MachineFunction &MF,
+                    MachineBasicBlock &MBB) const override;
  
    void processFunctionBeforeFrameFinalized(
      MachineFunction &MF,
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp

index 63efbde..60fe8c8 100644 (file)
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -52,8 +52,8 @@ static unsigned findFirstFreeSGPR(CCState &CCInfo) {
    llvm_unreachable("Cannot allocate sgpr");
  }
  
-SITargetLowering::SITargetLowering(TargetMachine &TM,
-                                   const AMDGPUSubtarget &STI)
+SITargetLowering::SITargetLowering(const TargetMachine &TM,
+                                   const SISubtarget &STI)
      : AMDGPUTargetLowering(TM, STI) {
    addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass);
    addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
@@ -190,7 +190,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
    setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Expand);
    setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Expand);
  
-  if (Subtarget->hasFlatAddressSpace()) {
+  if (getSubtarget()->hasFlatAddressSpace()) {
      setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
      setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
    }
@@ -205,7 +205,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
    setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
    setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
  
-  if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
+  if (Subtarget->getGeneration() >= SISubtarget::SEA_ISLANDS) {
      setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
      setOperationAction(ISD::FCEIL, MVT::f64, Legal);
      setOperationAction(ISD::FRINT, MVT::f64, Legal);
@@ -255,6 +255,10 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
    setSchedulingPreference(Sched::RegPressure);
  }
  
+const SISubtarget *SITargetLowering::getSubtarget() const {
+  return static_cast<const SISubtarget *>(Subtarget);
+}
+
  //===----------------------------------------------------------------------===//
  // TargetLowering queries
  //===----------------------------------------------------------------------===//
@@ -335,7 +339,7 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
  
    switch (AS) {
    case AMDGPUAS::GLOBAL_ADDRESS: {
-    if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+    if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
        // Assume the we will use FLAT for all global memory accesses
        // on VI.
        // FIXME: This assumption is currently wrong.  On VI we still use
@@ -363,16 +367,16 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
      if (DL.getTypeStoreSize(Ty) < 4)
        return isLegalMUBUFAddressingMode(AM);
  
-    if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+    if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS) {
        // SMRD instructions have an 8-bit, dword offset on SI.
        if (!isUInt<8>(AM.BaseOffs / 4))
          return false;
-    } else if (Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS) {
+    } else if (Subtarget->getGeneration() == SISubtarget::SEA_ISLANDS) {
        // On CI+, this can also be a 32-bit literal constant offset. If it fits
        // in 8-bits, it can use a smaller encoding.
        if (!isUInt<32>(AM.BaseOffs / 4))
          return false;
-    } else if (Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+    } else if (Subtarget->getGeneration() == SISubtarget::VOLCANIC_ISLANDS) {
        // On VI, these use the SMEM format and the offset is 20-bit in bytes.
        if (!isUInt<20>(AM.BaseOffs))
          return false;
@@ -519,8 +523,7 @@ SITargetLowering::getPreferredVectorAction(EVT VT) const {
  
  bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
                                                           Type *Ty) const {
-  const SIInstrInfo *TII =
-      static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
+  const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
    return TII->isInlineConstant(Imm);
  }
  
@@ -539,8 +542,7 @@ SDValue SITargetLowering::LowerParameterPtr(SelectionDAG &DAG,
                                              unsigned Offset) const {
    const DataLayout &DL = DAG.getDataLayout();
    MachineFunction &MF = DAG.getMachineFunction();
-  const SIRegisterInfo *TRI =
-      static_cast<const SIRegisterInfo*>(Subtarget->getRegisterInfo());
+  const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
    unsigned InputPtrReg = TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
  
    MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
@@ -579,13 +581,12 @@ SDValue SITargetLowering::LowerFormalArguments(
      SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
      const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
      SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
-  const SIRegisterInfo *TRI =
-      static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
+  const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
  
    MachineFunction &MF = DAG.getMachineFunction();
    FunctionType *FType = MF.getFunction()->getFunctionType();
    SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
-  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
  
    if (Subtarget->isAmdHsaOS() && AMDGPU::isShader(CallConv)) {
      const Function *Fn = MF.getFunction();
@@ -740,7 +741,7 @@ SDValue SITargetLowering::LowerFormalArguments(
  
        auto *ParamTy =
          dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex()));
-      if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
+      if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS &&
            ParamTy && ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
          // On SI local pointers are just offsets into LDS, so they are always
          // less than 16-bits.  On CI and newer they could potentially be
@@ -1030,7 +1031,7 @@ unsigned SITargetLowering::getRegisterByName(const char* RegName, EVT VT,
  
    }
  
-  if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
+  if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS &&
        Subtarget->getRegisterInfo()->regsOverlap(Reg, AMDGPU::FLAT_SCR)) {
      report_fatal_error(Twine("invalid register \""
                               + StringRef(RegName)  + "\" for subtarget."));
@@ -1062,8 +1063,7 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
    MachineInstr *MI, MachineBasicBlock *BB) const {
    switch (MI->getOpcode()) {
    case AMDGPU::SI_INIT_M0: {
-    const SIInstrInfo *TII =
-      static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
+    const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
      BuildMI(*BB, MI->getIterator(), MI->getDebugLoc(),
              TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
        .addOperand(MI->getOperand(0));
@@ -1073,8 +1073,8 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
    case AMDGPU::BRANCH:
      return BB;
    case AMDGPU::GET_GROUPSTATICSIZE: {
-    const SIInstrInfo *TII =
-      static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
+    const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
+
      MachineFunction *MF = BB->getParent();
      SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
      DebugLoc DL = MI->getDebugLoc();
@@ -1522,8 +1522,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                                                    SelectionDAG &DAG) const {
    MachineFunction &MF = DAG.getMachineFunction();
    auto MFI = MF.getInfo<SIMachineFunctionInfo>();
-  const SIRegisterInfo *TRI =
-      static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
+  const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
  
    EVT VT = Op.getValueType();
    SDLoc DL(Op);
@@ -1562,14 +1561,14 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
    case AMDGPUIntrinsic::AMDGPU_rsq: // Legacy name
      return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
    case Intrinsic::amdgcn_rsq_legacy: {
-    if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+    if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)
        return emitRemovedIntrinsicError(DAG, DL, VT);
  
      return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
    }
    case Intrinsic::amdgcn_rsq_clamp:
    case AMDGPUIntrinsic::AMDGPU_rsq_clamped: { // Legacy name
-    if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
+    if (Subtarget->getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
        return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
  
      Type *Type = VT.getTypeForEVT(*DAG.getContext());
@@ -1730,7 +1729,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
      return DAG.getNode(AMDGPUISD::COS_HW, DL, VT, Op.getOperand(1));
  
    case Intrinsic::amdgcn_log_clamp: {
-    if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
+    if (Subtarget->getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
        return SDValue();
  
      DiagnosticInfoUnsupported BadIntrin(
@@ -2129,7 +2128,7 @@ SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const {
  
    SDValue Scale;
  
-  if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+  if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS) {
      // Workaround a hardware bug on SI where the condition output from div_scale
      // is not usable.
  
@@ -2389,7 +2388,7 @@ SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N,
  /// \brief Return true if the given offset Size in bytes can be folded into
  /// the immediate offsets of a memory instruction for the given address space.
  static bool canFoldOffset(unsigned OffsetSize, unsigned AS,
-                          const AMDGPUSubtarget &STI) {
+                          const SISubtarget &STI) {
    switch (AS) {
    case AMDGPUAS::GLOBAL_ADDRESS: {
      // MUBUF instructions a 12-bit offset in bytes.
@@ -2398,7 +2397,7 @@ static bool canFoldOffset(unsigned OffsetSize, unsigned AS,
    case AMDGPUAS::CONSTANT_ADDRESS: {
      // SMRD instructions have an 8-bit offset in dwords on SI and
      // a 20-bit offset in bytes on VI.
-    if (STI.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+    if (STI.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)
        return isUInt<20>(OffsetSize);
      else
        return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4);
@@ -2449,7 +2448,7 @@ SDValue SITargetLowering::performSHLPtrCombine(SDNode *N,
    // If the resulting offset is too large, we can't fold it into the addressing
    // mode offset.
    APInt Offset = CAdd->getAPIntValue() << CN1->getAPIntValue();
-  if (!canFoldOffset(Offset.getZExtValue(), AddrSpace, *Subtarget))
+  if (!canFoldOffset(Offset.getZExtValue(), AddrSpace, *getSubtarget()))
      return SDValue();
  
    SelectionDAG &DAG = DCI.DAG;
@@ -3013,9 +3012,7 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
  /// Returns -1 if it isn't an immediate, 0 if it's and inline immediate
  /// and the immediate value if it's a literal immediate
  int32_t SITargetLowering::analyzeImmediate(const SDNode *N) const {
-
-  const SIInstrInfo *TII =
-      static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
+  const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
  
    if (const ConstantSDNode *Node = dyn_cast<ConstantSDNode>(N)) {
      if (TII->isInlineConstant(Node->getAPIntValue()))
@@ -3163,8 +3160,7 @@ void SITargetLowering::legalizeTargetIndependentNode(SDNode *Node,
  /// \brief Fold the instructions after selecting them.
  SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
                                            SelectionDAG &DAG) const {
-  const SIInstrInfo *TII =
-      static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
+  const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
    unsigned Opcode = Node->getMachineOpcode();
  
    if (TII->isMIMG(Opcode) && !TII->get(Opcode).mayStore())
@@ -3182,8 +3178,7 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
  /// bits set in the writemask
  void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
                                                       SDNode *Node) const {
-  const SIInstrInfo *TII =
-      static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
+  const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
  
    MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
  
@@ -3260,8 +3255,7 @@ static SDValue buildSMovImm32(SelectionDAG &DAG, const SDLoc &DL,
  MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG,
                                                  const SDLoc &DL,
                                                  SDValue Ptr) const {
-  const SIInstrInfo *TII =
-    static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
+  const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
  
    // Build the half of the subregister with the constants before building the
    // full 128-bit register. If we are building multiple resource descriptors,
diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h

index 4f709a5..2f01319 100644 (file)
--- a/lib/Target/AMDGPU/SIISelLowering.h
+++ b/lib/Target/AMDGPU/SIISelLowering.h
@@ -71,7 +71,9 @@ class SITargetLowering final : public AMDGPUTargetLowering {
  
    bool isCFIntrinsic(const SDNode *Intr) const;
  public:
-  SITargetLowering(TargetMachine &tm, const AMDGPUSubtarget &STI);
+  SITargetLowering(const TargetMachine &tm, const SISubtarget &STI);
+
+  const SISubtarget *getSubtarget() const;
  
    bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &,
                            unsigned IntrinsicID) const override;
diff --git a/lib/Target/AMDGPU/SIInsertWaits.cpp b/lib/Target/AMDGPU/SIInsertWaits.cpp

index abc9c4d..a6976ee 100644 (file)
--- a/lib/Target/AMDGPU/SIInsertWaits.cpp
+++ b/lib/Target/AMDGPU/SIInsertWaits.cpp
@@ -55,6 +55,7 @@ typedef std::pair<unsigned, unsigned> RegInterval;
  class SIInsertWaits : public MachineFunctionPass {
  
  private:
+  const SISubtarget *ST;
    const SIInstrInfo *TII;
    const SIRegisterInfo *TRI;
    const MachineRegisterInfo *MRI;
@@ -136,6 +137,7 @@ public:
  
    SIInsertWaits() :
      MachineFunctionPass(ID),
+    ST(nullptr),
      TII(nullptr),
      TRI(nullptr),
      ExpInstrTypesSeen(0),
@@ -303,8 +305,7 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
      return;
    }
  
-  if (MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() >=
-      AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+  if (ST->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
      // Any occurrence of consecutive VMEM or SMEM instructions forms a VMEM
      // or SMEM clause, respectively.
      //
@@ -486,8 +487,7 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
  
  void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator I) {
-  if (MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() <
-      AMDGPUSubtarget::VOLCANIC_ISLANDS)
+  if (ST->getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
      return;
  
    // There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG.
@@ -514,11 +514,9 @@ void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
  bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
    bool Changes = false;
  
-  TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
-  TRI =
-      static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
-
-  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+  ST = &MF.getSubtarget<SISubtarget>();
+  TII = ST->getInstrInfo();
+  TRI = &TII->getRegisterInfo();
    MRI = &MF.getRegInfo();
  
    WaitedOn = ZeroCounts;
@@ -540,7 +538,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
      for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
           I != E; ++I) {
  
-      if (ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) {
+      if (ST->getGeneration() <= SISubtarget::SEA_ISLANDS) {
          // There is a hardware bug on CI/SI where SMRD instruction may corrupt
          // vccz bit, so when we detect that an instruction may read from a
          // corrupt vccz bit, we need to:
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp

index d23b14c..91fbebf 100644 (file)
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -28,8 +28,8 @@
  
  using namespace llvm;
  
-SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st)
-    : AMDGPUInstrInfo(st), RI() {}
+SIInstrInfo::SIInstrInfo(const SISubtarget &ST)
+  : AMDGPUInstrInfo(ST), RI(), ST(ST) {}
  
  //===----------------------------------------------------------------------===//
  // TargetInstrInfo callbacks
@@ -730,9 +730,8 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB,
                                                 unsigned Size) const {
    MachineFunction *MF = MBB.getParent();
    SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
-  const AMDGPUSubtarget &ST = MF->getSubtarget<AMDGPUSubtarget>();
-  const SIRegisterInfo *TRI =
-      static_cast<const SIRegisterInfo*>(ST.getRegisterInfo());
+  const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
    DebugLoc DL = MBB.findDebugLoc(MI);
    unsigned WorkGroupSize = MFI->getMaximumWorkGroupSize(*MF);
    unsigned WavefrontSize = ST.getWavefrontSize();
@@ -913,8 +912,8 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
    }
  
    case AMDGPU::SI_PC_ADD_REL_OFFSET: {
-    const SIRegisterInfo *TRI =
-        static_cast<const SIRegisterInfo *>(ST.getRegisterInfo());
+    const SIRegisterInfo *TRI
+      = static_cast<const SIRegisterInfo *>(ST.getRegisterInfo());
      MachineFunction &MF = *MBB.getParent();
      unsigned Reg = MI->getOperand(0).getReg();
      unsigned RegLo = TRI->getSubReg(Reg, AMDGPU::sub0);
@@ -1460,7 +1459,7 @@ bool SIInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
    // Target-independent instructions do not have an implicit-use of EXEC, even
    // when they operate on VGPRs. Treating EXEC modifications as scheduling
    // boundaries prevents incorrect movements of such instructions.
-  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  const SIRegisterInfo *TRI = MF.getSubtarget<SISubtarget>().getRegisterInfo();
    if (MI->modifiesRegister(AMDGPU::EXEC, TRI))
      return true;
  
@@ -2422,8 +2421,8 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
      } else {
        // This instructions is the _OFFSET variant, so we need to convert it to
        // ADDR64.
-      assert(MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration()
-             < AMDGPUSubtarget::VOLCANIC_ISLANDS &&
+      assert(MBB.getParent()->getSubtarget<SISubtarget>().getGeneration()
+             < SISubtarget::VOLCANIC_ISLANDS &&
               "FIXME: Need to emit flat atomics here");
  
        MachineOperand *VData = getNamedOperand(*MI, AMDGPU::OpName::vdata);
@@ -2547,37 +2546,37 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
      }
  
      case AMDGPU::S_LSHL_B32:
-      if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+      if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
          NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
          swapOperands(Inst);
        }
        break;
      case AMDGPU::S_ASHR_I32:
-      if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+      if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
          NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
          swapOperands(Inst);
        }
        break;
      case AMDGPU::S_LSHR_B32:
-      if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+      if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
          NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
          swapOperands(Inst);
        }
        break;
      case AMDGPU::S_LSHL_B64:
-      if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+      if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
          NewOpcode = AMDGPU::V_LSHLREV_B64;
          swapOperands(Inst);
        }
        break;
      case AMDGPU::S_ASHR_I64:
-      if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+      if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
          NewOpcode = AMDGPU::V_ASHRREV_I64;
          swapOperands(Inst);
        }
        break;
      case AMDGPU::S_LSHR_B64:
-      if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+      if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
          NewOpcode = AMDGPU::V_LSHRREV_B64;
          swapOperands(Inst);
        }
@@ -3096,7 +3095,7 @@ uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
    if (ST.isAmdHsaOS()) {
      RsrcDataFormat |= (1ULL << 56);
  
-    if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+    if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)
        // Set MTYPE = 2
        RsrcDataFormat |= (2ULL << 59);
    }
@@ -3117,7 +3116,7 @@ uint64_t SIInstrInfo::getScratchRsrcWords23() const {
  
    // If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
    // Clear them unless we want a huge stride.
-  if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+  if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)
      Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
  
    return Rsrc23;
diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h

index a20b817..3f6a8d2 100644 (file)
--- a/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/lib/Target/AMDGPU/SIInstrInfo.h
@@ -25,6 +25,7 @@ namespace llvm {
  class SIInstrInfo final : public AMDGPUInstrInfo {
  private:
    const SIRegisterInfo RI;
+  const SISubtarget &ST;
  
    // The the inverse predicate should have the negative value.
    enum BranchPredicate {
@@ -91,9 +92,9 @@ protected:
                                         unsigned OpIdx1) const override;
  
  public:
-  explicit SIInstrInfo(const AMDGPUSubtarget &st);
+  explicit SIInstrInfo(const SISubtarget &);
  
-  const SIRegisterInfo &getRegisterInfo() const override {
+  const SIRegisterInfo &getRegisterInfo() const {
      return RI;
    }
  
diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td

index 1802a90..301690d 100644 (file)
--- a/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/lib/Target/AMDGPU/SIInstrInfo.td
@@ -7,9 +7,9 @@
  //
  //===----------------------------------------------------------------------===//
  def isCI : Predicate<"Subtarget->getGeneration() "
-                      ">= AMDGPUSubtarget::SEA_ISLANDS">;
+                      ">= SISubtarget::SEA_ISLANDS">;
  def isCIOnly : Predicate<"Subtarget->getGeneration() =="
-                         "AMDGPUSubtarget::SEA_ISLANDS">,
+                         "SISubtarget::SEA_ISLANDS">,
    AssemblerPredicate <"FeatureSeaIslands">;
  
  def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">;
@@ -78,9 +78,9 @@ class smrd<bits<8> si, bits<8> vi = si> {
    field bits<8> VI = vi;
  }
  
-// Execpt for the NONE field, this must be kept in sync with the SISubtarget enum
-// in AMDGPUInstrInfo.cpp
-def SISubtarget {
+// Execpt for the NONE field, this must be kept in sync with the
+// SIEncodingFamily enum in AMDGPUInstrInfo.cpp
+def SIEncodingFamily {
    int NONE = -1;
    int SI = 0;
    int VI = 1;
@@ -425,7 +425,7 @@ class InlineFPImm <ValueType vt> : PatLeaf <(vt fpimm), [{
  }]>;
  
  class SGPRImm <dag frag> : PatLeaf<frag, [{
-  if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+  if (Subtarget->getGeneration() < SISubtarget::SOUTHERN_ISLANDS) {
      return false;
    }
    const SIRegisterInfo *SIRI =
@@ -681,15 +681,15 @@ class EXPCommon : InstSI<
  multiclass EXP_m {
  
    let isPseudo = 1, isCodeGenOnly = 1 in {
-    def "" : EXPCommon, SIMCInstr <"exp", SISubtarget.NONE> ;
+    def "" : EXPCommon, SIMCInstr <"exp", SIEncodingFamily.NONE> ;
    }
  
-  def _si : EXPCommon, SIMCInstr <"exp", SISubtarget.SI>, EXPe {
+  def _si : EXPCommon, SIMCInstr <"exp", SIEncodingFamily.SI>, EXPe {
      let DecoderNamespace="SICI";
      let DisableDecoder = DisableSIDecoder;
    }
  
-  def _vi : EXPCommon, SIMCInstr <"exp", SISubtarget.VI>, EXPe_vi {
+  def _vi : EXPCommon, SIMCInstr <"exp", SIEncodingFamily.VI>, EXPe_vi {
      let DecoderNamespace="VI";
      let DisableDecoder = DisableVIDecoder;
    }
@@ -701,7 +701,7 @@ multiclass EXP_m {
  
  class SOP1_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
    SOP1 <outs, ins, "", pattern>,
-  SIMCInstr<opName, SISubtarget.NONE> {
+  SIMCInstr<opName, SIEncodingFamily.NONE> {
    let isPseudo = 1;
    let isCodeGenOnly = 1;
  }
@@ -709,7 +709,7 @@ class SOP1_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
  class SOP1_Real_si <sop1 op, string opName, dag outs, dag ins, string asm> :
    SOP1 <outs, ins, asm, []>,
    SOP1e <op.SI>,
-  SIMCInstr<opName, SISubtarget.SI> {
+  SIMCInstr<opName, SIEncodingFamily.SI> {
    let isCodeGenOnly = 0;
    let AssemblerPredicates = [isSICI];
    let DecoderNamespace = "SICI";
@@ -719,7 +719,7 @@ class SOP1_Real_si <sop1 op, string opName, dag outs, dag ins, string asm> :
  class SOP1_Real_vi <sop1 op, string opName, dag outs, dag ins, string asm> :
    SOP1 <outs, ins, asm, []>,
    SOP1e <op.VI>,
-  SIMCInstr<opName, SISubtarget.VI> {
+  SIMCInstr<opName, SIEncodingFamily.VI> {
    let isCodeGenOnly = 0;
    let AssemblerPredicates = [isVI];
    let DecoderNamespace = "VI";
@@ -791,7 +791,7 @@ multiclass SOP1_64_32 <sop1 op, string opName, list<dag> pattern> : SOP1_m <
  
  class SOP2_Pseudo<string opName, dag outs, dag ins, list<dag> pattern> :
    SOP2<outs, ins, "", pattern>,
-  SIMCInstr<opName, SISubtarget.NONE> {
+  SIMCInstr<opName, SIEncodingFamily.NONE> {
    let isPseudo = 1;
    let isCodeGenOnly = 1;
    let Size = 4;
@@ -806,7 +806,7 @@ class SOP2_Pseudo<string opName, dag outs, dag ins, list<dag> pattern> :
  class SOP2_Real_si<sop2 op, string opName, dag outs, dag ins, string asm> :
    SOP2<outs, ins, asm, []>,
    SOP2e<op.SI>,
-  SIMCInstr<opName, SISubtarget.SI> {
+  SIMCInstr<opName, SIEncodingFamily.SI> {
    let AssemblerPredicates = [isSICI];
    let DecoderNamespace = "SICI";
    let DisableDecoder = DisableSIDecoder;
@@ -815,7 +815,7 @@ class SOP2_Real_si<sop2 op, string opName, dag outs, dag ins, string asm> :
  class SOP2_Real_vi<sop2 op, string opName, dag outs, dag ins, string asm> :
    SOP2<outs, ins, asm, []>,
    SOP2e<op.VI>,
-  SIMCInstr<opName, SISubtarget.VI> {
+  SIMCInstr<opName, SIEncodingFamily.VI> {
    let AssemblerPredicates = [isVI];
    let DecoderNamespace = "VI";
    let DisableDecoder = DisableVIDecoder;
@@ -875,7 +875,7 @@ class SOPC_64_32<bits<7> op, string opName, list<dag> pattern = []>
  
  class SOPK_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
    SOPK <outs, ins, "", pattern>,
-  SIMCInstr<opName, SISubtarget.NONE> {
+  SIMCInstr<opName, SIEncodingFamily.NONE> {
    let isPseudo = 1;
    let isCodeGenOnly = 1;
  }
@@ -883,7 +883,7 @@ class SOPK_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
  class SOPK_Real_si <sopk op, string opName, dag outs, dag ins, string asm> :
    SOPK <outs, ins, asm, []>,
    SOPKe <op.SI>,
-  SIMCInstr<opName, SISubtarget.SI> {
+  SIMCInstr<opName, SIEncodingFamily.SI> {
    let AssemblerPredicates = [isSICI];
    let DecoderNamespace = "SICI";
    let DisableDecoder = DisableSIDecoder;
@@ -893,7 +893,7 @@ class SOPK_Real_si <sopk op, string opName, dag outs, dag ins, string asm> :
  class SOPK_Real_vi <sopk op, string opName, dag outs, dag ins, string asm> :
    SOPK <outs, ins, asm, []>,
    SOPKe <op.VI>,
-  SIMCInstr<opName, SISubtarget.VI> {
+  SIMCInstr<opName, SIEncodingFamily.VI> {
    let AssemblerPredicates = [isVI];
    let DecoderNamespace = "VI";
    let DisableDecoder = DisableVIDecoder;
@@ -951,7 +951,7 @@ multiclass SOPK_IMM32 <sopk op, string opName, dag outs, dag ins,
  
    def _si : SOPK <outs, ins, asm, []>,
              SOPK64e <op.SI>,
-            SIMCInstr<opName, SISubtarget.SI> {
+            SIMCInstr<opName, SIEncodingFamily.SI> {
                let AssemblerPredicates = [isSICI];
                let DecoderNamespace = "SICI";
                let DisableDecoder = DisableSIDecoder;
@@ -960,7 +960,7 @@ multiclass SOPK_IMM32 <sopk op, string opName, dag outs, dag ins,
  
    def _vi : SOPK <outs, ins, asm, []>,
              SOPK64e <op.VI>,
-            SIMCInstr<opName, SISubtarget.VI> {
+            SIMCInstr<opName, SIEncodingFamily.VI> {
                let AssemblerPredicates = [isVI];
                let DecoderNamespace = "VI";
                let DisableDecoder = DisableVIDecoder;
@@ -973,7 +973,7 @@ multiclass SOPK_IMM32 <sopk op, string opName, dag outs, dag ins,
  
  class SMRD_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
    SMRD <outs, ins, "", pattern>,
-  SIMCInstr<opName, SISubtarget.NONE> {
+  SIMCInstr<opName, SIEncodingFamily.NONE> {
    let isPseudo = 1;
    let isCodeGenOnly = 1;
  }
@@ -982,7 +982,7 @@ class SMRD_IMM_Real_si <bits<5> op, string opName, dag outs, dag ins,
                          string asm> :
    SMRD <outs, ins, asm, []>,
    SMRD_IMMe <op>,
-  SIMCInstr<opName, SISubtarget.SI> {
+  SIMCInstr<opName, SIEncodingFamily.SI> {
    let AssemblerPredicates = [isSICI];
    let DecoderNamespace = "SICI";
    let DisableDecoder = DisableSIDecoder;
@@ -992,7 +992,7 @@ class SMRD_SOFF_Real_si <bits<5> op, string opName, dag outs, dag ins,
                           string asm> :
    SMRD <outs, ins, asm, []>,
    SMRD_SOFFe <op>,
-  SIMCInstr<opName, SISubtarget.SI> {
+  SIMCInstr<opName, SIEncodingFamily.SI> {
    let AssemblerPredicates = [isSICI];
    let DecoderNamespace = "SICI";
    let DisableDecoder = DisableSIDecoder;
@@ -1003,7 +1003,7 @@ class SMRD_IMM_Real_vi <bits<8> op, string opName, dag outs, dag ins,
                          string asm, list<dag> pattern = []> :
    SMRD <outs, ins, asm, pattern>,
    SMEM_IMMe_vi <op>,
-  SIMCInstr<opName, SISubtarget.VI> {
+  SIMCInstr<opName, SIEncodingFamily.VI> {
    let AssemblerPredicates = [isVI];
    let DecoderNamespace = "VI";
    let DisableDecoder = DisableVIDecoder;
@@ -1013,7 +1013,7 @@ class SMRD_SOFF_Real_vi <bits<8> op, string opName, dag outs, dag ins,
                           string asm, list<dag> pattern = []> :
    SMRD <outs, ins, asm, pattern>,
    SMEM_SOFFe_vi <op>,
-  SIMCInstr<opName, SISubtarget.VI> {
+  SIMCInstr<opName, SIEncodingFamily.VI> {
    let AssemblerPredicates = [isVI];
    let DecoderNamespace = "VI";
    let DisableDecoder = DisableVIDecoder;
@@ -1342,7 +1342,7 @@ class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT =
  
  class getAsmSDWA <bit HasDst, int NumSrcArgs, bit HasFloatModifiers,
                    ValueType DstVT = i32> {
-  string dst = !if(HasDst, 
+  string dst = !if(HasDst,
                     !if(!eq(DstVT.Size, 1),
                         "$sdst", // use $sdst for VOPC
                         "$vdst"),
@@ -1350,8 +1350,8 @@ class getAsmSDWA <bit HasDst, int NumSrcArgs, bit HasFloatModifiers,
    string src0 = !if(HasFloatModifiers, "$src0_fmodifiers", "$src0_imodifiers");
    string src1 = !if(HasFloatModifiers, "$src1_fmodifiers", "$src1_imodifiers");
    string args = !if(!eq(NumSrcArgs, 0),
-                    "", 
-                    !if(!eq(NumSrcArgs, 1), 
+                    "",
+                    !if(!eq(NumSrcArgs, 1),
                          ", "#src0#"$clamp",
                          ", "#src0#", "#src1#"$clamp"
                       )
@@ -1652,7 +1652,7 @@ class AtomicNoRet <string noRetOp, bit isRet> {
  class VOP1_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
    VOP1Common <outs, ins, "", pattern>,
    VOP <opName>,
-  SIMCInstr <opName#"_e32", SISubtarget.NONE>,
+  SIMCInstr <opName#"_e32", SIEncodingFamily.NONE>,
    MnemonicAlias<opName#"_e32", opName> {
    let isPseudo = 1;
    let isCodeGenOnly = 1;
@@ -1663,7 +1663,7 @@ class VOP1_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
  
  class VOP1_Real_si <string opName, vop1 op, dag outs, dag ins, string asm> :
    VOP1<op.SI, outs, ins, asm, []>,
-  SIMCInstr <opName#"_e32", SISubtarget.SI> {
+  SIMCInstr <opName#"_e32", SIEncodingFamily.SI> {
    let AssemblerPredicate = SIAssemblerPredicate;
    let DecoderNamespace = "SICI";
    let DisableDecoder = DisableSIDecoder;
@@ -1671,7 +1671,7 @@ class VOP1_Real_si <string opName, vop1 op, dag outs, dag ins, string asm> :
  
  class VOP1_Real_vi <string opName, vop1 op, dag outs, dag ins, string asm> :
    VOP1<op.VI, outs, ins, asm, []>,
-  SIMCInstr <opName#"_e32", SISubtarget.VI> {
+  SIMCInstr <opName#"_e32", SIEncodingFamily.VI> {
    let AssemblerPredicates = [isVI];
    let DecoderNamespace = "VI";
    let DisableDecoder = DisableVIDecoder;
@@ -1741,7 +1741,7 @@ multiclass VOP1SI_m <vop1 op, string opName, VOPProfile p, list<dag> pattern,
  class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
    VOP2Common <outs, ins, "", pattern>,
    VOP <opName>,
-  SIMCInstr<opName#"_e32", SISubtarget.NONE>,
+  SIMCInstr<opName#"_e32", SIEncodingFamily.NONE>,
    MnemonicAlias<opName#"_e32", opName> {
    let isPseudo = 1;
    let isCodeGenOnly = 1;
@@ -1749,7 +1749,7 @@ class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
  
  class VOP2_Real_si <string opName, vop2 op, dag outs, dag ins, string asm> :
    VOP2 <op.SI, outs, ins, opName#asm, []>,
-  SIMCInstr <opName#"_e32", SISubtarget.SI> {
+  SIMCInstr <opName#"_e32", SIEncodingFamily.SI> {
    let AssemblerPredicates = [isSICI];
    let DecoderNamespace = "SICI";
    let DisableDecoder = DisableSIDecoder;
@@ -1757,7 +1757,7 @@ class VOP2_Real_si <string opName, vop2 op, dag outs, dag ins, string asm> :
  
  class VOP2_Real_vi <string opName, vop2 op, dag outs, dag ins, string asm> :
    VOP2 <op.VI, outs, ins, opName#asm, []>,
-  SIMCInstr <opName#"_e32", SISubtarget.VI> {
+  SIMCInstr <opName#"_e32", SIEncodingFamily.VI> {
    let AssemblerPredicates = [isVI];
    let DecoderNamespace = "VI";
    let DisableDecoder = DisableVIDecoder;
@@ -1830,7 +1830,7 @@ class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName,
                     bit HasMods = 0, bit VOP3Only = 0> :
    VOP3Common <outs, ins, "", pattern, HasMods, VOP3Only>,
    VOP <opName>,
-  SIMCInstr<opName#"_e64", SISubtarget.NONE>,
+  SIMCInstr<opName#"_e64", SIEncodingFamily.NONE>,
    MnemonicAlias<opName#"_e64", opName> {
    let isPseudo = 1;
    let isCodeGenOnly = 1;
@@ -1843,7 +1843,7 @@ class VOP3_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName,
                      bit HasMods = 0, bit VOP3Only = 0> :
    VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
    VOP3e <op>,
-  SIMCInstr<opName#"_e64", SISubtarget.SI> {
+  SIMCInstr<opName#"_e64", SIEncodingFamily.SI> {
    let AssemblerPredicates = [isSICI];
    let DecoderNamespace = "SICI";
    let DisableDecoder = DisableSIDecoder;
@@ -1853,7 +1853,7 @@ class VOP3_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName,
                      bit HasMods = 0, bit VOP3Only = 0> :
    VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
    VOP3e_vi <op>,
-  SIMCInstr <opName#"_e64", SISubtarget.VI> {
+  SIMCInstr <opName#"_e64", SIEncodingFamily.VI> {
    let AssemblerPredicates = [isVI];
    let DecoderNamespace = "VI";
    let DisableDecoder = DisableVIDecoder;
@@ -1863,7 +1863,7 @@ class VOP3_C_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName,
                       bit HasMods = 0, bit VOP3Only = 0> :
    VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
    VOP3ce <op>,
-  SIMCInstr<opName#"_e64", SISubtarget.SI> {
+  SIMCInstr<opName#"_e64", SIEncodingFamily.SI> {
    let AssemblerPredicates = [isSICI];
    let DecoderNamespace = "SICI";
    let DisableDecoder = DisableSIDecoder;
@@ -1873,7 +1873,7 @@ class VOP3_C_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName,
                        bit HasMods = 0, bit VOP3Only = 0> :
    VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
    VOP3ce_vi <op>,
-  SIMCInstr <opName#"_e64", SISubtarget.VI> {
+  SIMCInstr <opName#"_e64", SIEncodingFamily.VI> {
    let AssemblerPredicates = [isVI];
    let DecoderNamespace = "VI";
    let DisableDecoder = DisableVIDecoder;
@@ -1883,7 +1883,7 @@ class VOP3b_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName,
                       bit HasMods = 0, bit VOP3Only = 0> :
    VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
    VOP3be <op>,
-  SIMCInstr<opName#"_e64", SISubtarget.SI> {
+  SIMCInstr<opName#"_e64", SIEncodingFamily.SI> {
    let AssemblerPredicates = [isSICI];
    let DecoderNamespace = "SICI";
    let DisableDecoder = DisableSIDecoder;
@@ -1893,7 +1893,7 @@ class VOP3b_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName,
                       bit HasMods = 0, bit VOP3Only = 0> :
    VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
    VOP3be_vi <op>,
-  SIMCInstr <opName#"_e64", SISubtarget.VI> {
+  SIMCInstr <opName#"_e64", SIEncodingFamily.VI> {
    let AssemblerPredicates = [isVI];
    let DecoderNamespace = "VI";
    let DisableDecoder = DisableVIDecoder;
@@ -1903,7 +1903,7 @@ class VOP3e_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName,
                       bit HasMods = 0, bit VOP3Only = 0> :
    VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
    VOP3e <op>,
-  SIMCInstr<opName#"_e64", SISubtarget.SI> {
+  SIMCInstr<opName#"_e64", SIEncodingFamily.SI> {
    let AssemblerPredicates = [isSICI];
    let DecoderNamespace = "SICI";
    let DisableDecoder = DisableSIDecoder;
@@ -1913,7 +1913,7 @@ class VOP3e_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName,
                       bit HasMods = 0, bit VOP3Only = 0> :
    VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
    VOP3e_vi <op>,
-  SIMCInstr <opName#"_e64", SISubtarget.VI> {
+  SIMCInstr <opName#"_e64", SIEncodingFamily.VI> {
    let AssemblerPredicates = [isVI];
    let DecoderNamespace = "VI";
    let DisableDecoder = DisableVIDecoder;
@@ -2039,11 +2039,11 @@ multiclass VOP2SI_3VI_m <vop3 op, string opName, dag outs, dag ins,
                           string asm, list<dag> pattern = []> {
    let isPseudo = 1, isCodeGenOnly = 1 in {
      def "" : VOPAnyCommon <outs, ins, "", pattern>,
-             SIMCInstr<opName, SISubtarget.NONE>;
+             SIMCInstr<opName, SIEncodingFamily.NONE>;
    }
  
    def _si : VOP2 <op.SI3{5-0}, outs, ins, asm, []>,
-            SIMCInstr <opName, SISubtarget.SI> {
+            SIMCInstr <opName, SIEncodingFamily.SI> {
              let AssemblerPredicates = [isSICI];
              let DecoderNamespace = "SICI";
              let DisableDecoder = DisableSIDecoder;
@@ -2052,7 +2052,7 @@ multiclass VOP2SI_3VI_m <vop3 op, string opName, dag outs, dag ins,
    def _vi : VOP3Common <outs, ins, asm, []>,
              VOP3e_vi <op.VI3>,
              VOP3DisableFields <1, 0, 0>,
-            SIMCInstr <opName, SISubtarget.VI> {
+            SIMCInstr <opName, SIEncodingFamily.VI> {
              let AssemblerPredicates = [isVI];
              let DecoderNamespace = "VI";
              let DisableDecoder = DisableVIDecoder;
@@ -2221,7 +2221,7 @@ multiclass VOP2MADK <vop2 op, string opName, VOPProfile P, list<dag> pattern = [
  let isCodeGenOnly = 0 in {
    def _si : VOP2Common <P.Outs, P.Ins32,
                          !strconcat(opName, P.Asm32), []>,
-            SIMCInstr <opName#"_e32", SISubtarget.SI>,
+            SIMCInstr <opName#"_e32", SIEncodingFamily.SI>,
              VOP2_MADKe <op.SI> {
              let AssemblerPredicates = [isSICI];
              let DecoderNamespace = "SICI";
@@ -2230,7 +2230,7 @@ let isCodeGenOnly = 0 in {
  
    def _vi : VOP2Common <P.Outs, P.Ins32,
                          !strconcat(opName, P.Asm32), []>,
-            SIMCInstr <opName#"_e32", SISubtarget.VI>,
+            SIMCInstr <opName#"_e32", SIEncodingFamily.VI>,
              VOP2_MADKe <op.VI> {
              let AssemblerPredicates = [isVI];
              let DecoderNamespace = "VI";
@@ -2242,7 +2242,7 @@ let isCodeGenOnly = 0 in {
  class VOPC_Pseudo <dag ins, list<dag> pattern, string opName> :
    VOPCCommon <ins, "", pattern>,
    VOP <opName>,
-  SIMCInstr<opName#"_e32", SISubtarget.NONE> {
+  SIMCInstr<opName#"_e32", SIEncodingFamily.NONE> {
    let isPseudo = 1;
    let isCodeGenOnly = 1;
  }
@@ -2260,7 +2260,7 @@ multiclass VOPC_m <vopc op, dag ins, string op_asm, list<dag> pattern,
  
    let AssemblerPredicates = [isSICI] in {
      def _si : VOPC<op.SI, ins, asm, []>,
-              SIMCInstr <opName#"_e32", SISubtarget.SI> {
+              SIMCInstr <opName#"_e32", SIEncodingFamily.SI> {
        let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
        let hasSideEffects = DefExec;
        let SchedRW = sched;
@@ -2272,7 +2272,7 @@ multiclass VOPC_m <vopc op, dag ins, string op_asm, list<dag> pattern,
  
    let AssemblerPredicates = [isVI] in {
      def _vi : VOPC<op.VI, ins, asm, []>,
-              SIMCInstr <opName#"_e32", SISubtarget.VI> {
+              SIMCInstr <opName#"_e32", SIEncodingFamily.VI> {
        let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
        let hasSideEffects = DefExec;
        let SchedRW = sched;
@@ -2459,7 +2459,7 @@ class Vop3ModPat<Instruction Inst, VOPProfile P, SDPatternOperator node> : Pat<
  
  class VINTRP_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
    VINTRPCommon <outs, ins, "", pattern>,
-  SIMCInstr<opName, SISubtarget.NONE> {
+  SIMCInstr<opName, SIEncodingFamily.NONE> {
    let isPseudo = 1;
    let isCodeGenOnly = 1;
  }
@@ -2468,7 +2468,7 @@ class VINTRP_Real_si <bits <2> op, string opName, dag outs, dag ins,
                        string asm> :
    VINTRPCommon <outs, ins, asm, []>,
    VINTRPe <op>,
-  SIMCInstr<opName, SISubtarget.SI> {
+  SIMCInstr<opName, SIEncodingFamily.SI> {
    let AssemblerPredicate = SIAssemblerPredicate;
    let DecoderNamespace = "SICI";
    let DisableDecoder = DisableSIDecoder;
@@ -2478,7 +2478,7 @@ class VINTRP_Real_vi <bits <2> op, string opName, dag outs, dag ins,
                        string asm> :
    VINTRPCommon <outs, ins, asm, []>,
    VINTRPe_vi <op>,
-  SIMCInstr<opName, SISubtarget.VI> {
+  SIMCInstr<opName, SIEncodingFamily.VI> {
    let AssemblerPredicate = VIAssemblerPredicate;
    let DecoderNamespace = "VI";
    let DisableDecoder = DisableVIDecoder;
@@ -2499,7 +2499,7 @@ multiclass VINTRP_m <bits <2> op, dag outs, dag ins, string asm,
  
  class DS_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
    DS <outs, ins, "", pattern>,
-  SIMCInstr <opName, SISubtarget.NONE> {
+  SIMCInstr <opName, SIEncodingFamily.NONE> {
    let isPseudo = 1;
    let isCodeGenOnly = 1;
  }
@@ -2507,7 +2507,7 @@ class DS_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
  class DS_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> :
    DS <outs, ins, asm, []>,
    DSe <op>,
-  SIMCInstr <opName, SISubtarget.SI> {
+  SIMCInstr <opName, SIEncodingFamily.SI> {
    let isCodeGenOnly = 0;
    let AssemblerPredicates = [isSICI];
    let DecoderNamespace="SICI";
@@ -2517,7 +2517,7 @@ class DS_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> :
  class DS_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
    DS <outs, ins, asm, []>,
    DSe_vi <op>,
-  SIMCInstr <opName, SISubtarget.VI> {
+  SIMCInstr <opName, SIEncodingFamily.VI> {
    let isCodeGenOnly = 0;
    let AssemblerPredicates = [isVI];
    let DecoderNamespace="VI";
@@ -2730,7 +2730,7 @@ multiclass DS_1A <bits<8> op, string opName,
  
  class MTBUF_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
    MTBUF <outs, ins, "", pattern>,
-  SIMCInstr<opName, SISubtarget.NONE> {
+  SIMCInstr<opName, SIEncodingFamily.NONE> {
    let isPseudo = 1;
    let isCodeGenOnly = 1;
  }
@@ -2739,7 +2739,7 @@ class MTBUF_Real_si <bits<3> op, string opName, dag outs, dag ins,
                      string asm> :
    MTBUF <outs, ins, asm, []>,
    MTBUFe <op>,
-  SIMCInstr<opName, SISubtarget.SI> {
+  SIMCInstr<opName, SIEncodingFamily.SI> {
    let DecoderNamespace="SICI";
    let DisableDecoder = DisableSIDecoder;
  }
@@ -2747,7 +2747,7 @@ class MTBUF_Real_si <bits<3> op, string opName, dag outs, dag ins,
  class MTBUF_Real_vi <bits<4> op, string opName, dag outs, dag ins, string asm> :
    MTBUF <outs, ins, asm, []>,
    MTBUFe_vi <op>,
-  SIMCInstr <opName, SISubtarget.VI> {
+  SIMCInstr <opName, SIEncodingFamily.VI> {
    let DecoderNamespace="VI";
    let DisableDecoder = DisableVIDecoder;
  }
@@ -2821,7 +2821,7 @@ class MUBUFAddr64Table <bit is_addr64, string suffix = ""> {
  
  class MUBUF_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
    MUBUF <outs, ins, "", pattern>,
-  SIMCInstr<opName, SISubtarget.NONE> {
+  SIMCInstr<opName, SIEncodingFamily.NONE> {
    let isPseudo = 1;
    let isCodeGenOnly = 1;
  
@@ -2839,7 +2839,7 @@ class MUBUF_Real_si <mubuf op, string opName, dag outs, dag ins,
                       string asm> :
    MUBUF <outs, ins, asm, []>,
    MUBUFe <op.SI>,
-  SIMCInstr<opName, SISubtarget.SI> {
+  SIMCInstr<opName, SIEncodingFamily.SI> {
    let lds = 0;
    let AssemblerPredicate = SIAssemblerPredicate;
    let DecoderNamespace="SICI";
@@ -2850,7 +2850,7 @@ class MUBUF_Real_vi <mubuf op, string opName, dag outs, dag ins,
                       string asm> :
    MUBUF <outs, ins, asm, []>,
    MUBUFe_vi <op.VI>,
-  SIMCInstr<opName, SISubtarget.VI> {
+  SIMCInstr<opName, SIEncodingFamily.VI> {
    let lds = 0;
    let AssemblerPredicate = VIAssemblerPredicate;
    let DecoderNamespace="VI";
@@ -3174,21 +3174,21 @@ class flat <bits<7> ci, bits<7> vi = ci> {
  
  class FLAT_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
       FLAT <0, outs, ins, "", pattern>,
-      SIMCInstr<opName, SISubtarget.NONE> {
+      SIMCInstr<opName, SIEncodingFamily.NONE> {
    let isPseudo = 1;
    let isCodeGenOnly = 1;
  }
  
  class FLAT_Real_ci <bits<7> op, string opName, dag outs, dag ins, string asm> :
      FLAT <op, outs, ins, asm, []>,
-    SIMCInstr<opName, SISubtarget.SI> {
+    SIMCInstr<opName, SIEncodingFamily.SI> {
    let AssemblerPredicate = isCIOnly;
    let DecoderNamespace="CI";
  }
  
  class FLAT_Real_vi <bits<7> op, string opName, dag outs, dag ins, string asm> :
      FLAT <op, outs, ins, asm, []>,
-    SIMCInstr<opName, SISubtarget.VI> {
+    SIMCInstr<opName, SIEncodingFamily.VI> {
    let AssemblerPredicate = VIAssemblerPredicate;
    let DecoderNamespace="VI";
    let DisableDecoder = DisableVIDecoder;
@@ -3375,7 +3375,7 @@ class MIMG_Atomic_Helper <string asm, RegisterClass data_rc,
  class MIMG_Atomic_Real_si<mimg op, string name, string asm,
    RegisterClass data_rc, RegisterClass addr_rc> :
    MIMG_Atomic_Helper<asm, data_rc, addr_rc>,
-  SIMCInstr<name, SISubtarget.SI>,
+  SIMCInstr<name, SIEncodingFamily.SI>,
    MIMGe<op.SI> {
    let isCodeGenOnly = 0;
    let AssemblerPredicates = [isSICI];
@@ -3386,7 +3386,7 @@ class MIMG_Atomic_Real_si<mimg op, string name, string asm,
  class MIMG_Atomic_Real_vi<mimg op, string name, string asm,
    RegisterClass data_rc, RegisterClass addr_rc> :
    MIMG_Atomic_Helper<asm, data_rc, addr_rc>,
-  SIMCInstr<name, SISubtarget.VI>,
+  SIMCInstr<name, SIEncodingFamily.VI>,
    MIMGe<op.VI> {
    let isCodeGenOnly = 0;
    let AssemblerPredicates = [isVI];
@@ -3398,7 +3398,7 @@ multiclass MIMG_Atomic_Helper_m <mimg op, string name, string asm,
                                   RegisterClass data_rc, RegisterClass addr_rc> {
    let isPseudo = 1, isCodeGenOnly = 1 in {
      def "" : MIMG_Atomic_Helper<asm, data_rc, addr_rc>,
-             SIMCInstr<name, SISubtarget.NONE>;
+             SIMCInstr<name, SIEncodingFamily.NONE>;
    }
  
    let ssamp = 0 in {
@@ -3573,8 +3573,9 @@ def getMCOpcodeGen : InstrMapping {
    let FilterClass = "SIMCInstr";
    let RowFields = ["PseudoInstr"];
    let ColFields = ["Subtarget"];
-  let KeyCol = [!cast<string>(SISubtarget.NONE)];
-  let ValueCols = [[!cast<string>(SISubtarget.SI)],[!cast<string>(SISubtarget.VI)]];
+  let KeyCol = [!cast<string>(SIEncodingFamily.NONE)];
+  let ValueCols = [[!cast<string>(SIEncodingFamily.SI)],
+                   [!cast<string>(SIEncodingFamily.VI)]];
  }
  
  def getAddr64Inst : InstrMapping {
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td

index db1038d..1f7c7ee 100644 (file)
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -19,10 +19,10 @@ int P20 = 1;
  def INTERP : InterpSlots;
  
  def isGCN : Predicate<"Subtarget->getGeneration() "
-                      ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">,
+                      ">= SISubtarget::SOUTHERN_ISLANDS">,
              AssemblerPredicate<"FeatureGCN">;
  def isSI : Predicate<"Subtarget->getGeneration() "
-                      "== AMDGPUSubtarget::SOUTHERN_ISLANDS">,
+                      "== SISubtarget::SOUTHERN_ISLANDS">,
             AssemblerPredicate<"FeatureSouthernIslands">;
  
  
diff --git a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp

index ec92265..0b9b29a 100644 (file)
--- a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -36,6 +36,7 @@
  //===----------------------------------------------------------------------===//
  
  #include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
  #include "SIInstrInfo.h"
  #include "SIRegisterInfo.h"
  #include "llvm/CodeGen/LiveIntervalAnalysis.h"
@@ -61,7 +62,6 @@ private:
    MachineRegisterInfo *MRI;
    LiveIntervals *LIS;
  
-
    static bool offsetsCanBeCombined(unsigned Offset0,
                                     unsigned Offset1,
                                     unsigned EltSize);
@@ -411,9 +411,10 @@ bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) {
    if (skipFunction(*MF.getFunction()))
      return false;
  
-  const TargetSubtargetInfo &STM = MF.getSubtarget();
-  TRI = static_cast<const SIRegisterInfo *>(STM.getRegisterInfo());
-  TII = static_cast<const SIInstrInfo *>(STM.getInstrInfo());
+  const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
+  TII = STM.getInstrInfo();
+  TRI = &TII->getRegisterInfo();
+
    MRI = &MF.getRegInfo();
  
    LIS = &getAnalysis<LiveIntervals>();
diff --git a/lib/Target/AMDGPU/SILowerControlFlow.cpp b/lib/Target/AMDGPU/SILowerControlFlow.cpp

index cd382f5..60a0bc3 100644 (file)
--- a/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -590,9 +590,10 @@ bool SILowerControlFlow::indirectDst(MachineInstr &MI) {
  }
  
  bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
-  TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
-  TRI =
-      static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+  TII = ST.getInstrInfo();
+  TRI = &TII->getRegisterInfo();
+
    SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
  
    bool HaveKill = false;
diff --git a/lib/Target/AMDGPU/SILowerI1Copies.cpp b/lib/Target/AMDGPU/SILowerI1Copies.cpp

index ca942f6..dc1d20d 100644 (file)
--- a/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -66,9 +66,10 @@ FunctionPass *llvm::createSILowerI1CopiesPass() {
  
  bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) {
    MachineRegisterInfo &MRI = MF.getRegInfo();
-  const SIInstrInfo *TII =
-      static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
-  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
+
    std::vector<unsigned> I1Defs;
  
    for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

index faab72c..6daebba 100644 (file)
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -79,7 +79,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
      WorkItemIDX(false),
      WorkItemIDY(false),
      WorkItemIDZ(false) {
-  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
    const Function *F = MF.getFunction();
  
    PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
@@ -129,7 +129,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
    // We don't need to worry about accessing spills with flat instructions.
    // TODO: On VI where we must use flat for global, we should be able to omit
    // this if it is never used for generic access.
-  if (HasStackObjects && ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS &&
+  if (HasStackObjects && ST.getGeneration() >= SISubtarget::SEA_ISLANDS &&
        ST.isAmdHsaOS())
      FlatScratchInit = true;
  
@@ -178,16 +178,17 @@ unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
    return FlatScratchInitUserSGPR;
  }
  
-SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
+SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg (
                                                         MachineFunction *MF,
                                                         unsigned FrameIndex,
                                                         unsigned SubIdx) {
    if (!EnableSpillSGPRToVGPR)
      return SpilledReg();
  
+  const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
+
    MachineFrameInfo *FrameInfo = MF->getFrameInfo();
-  const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
-      MF->getSubtarget<AMDGPUSubtarget>().getRegisterInfo());
    MachineRegisterInfo &MRI = MF->getRegInfo();
    int64_t Offset = FrameInfo->getObjectOffset(FrameIndex);
    Offset += SubIdx * 4;
diff --git a/lib/Target/AMDGPU/SIMachineScheduler.cpp b/lib/Target/AMDGPU/SIMachineScheduler.cpp

index cefacb0..147eb99 100644 (file)
--- a/lib/Target/AMDGPU/SIMachineScheduler.cpp
+++ b/lib/Target/AMDGPU/SIMachineScheduler.cpp
@@ -12,8 +12,8 @@
  //
  //===----------------------------------------------------------------------===//
  
+#include "AMDGPU.h"
  #include "SIMachineScheduler.h"
-#include "AMDGPUSubtarget.h"
  #include "llvm/CodeGen/LiveInterval.h"
  #include "llvm/CodeGen/LiveIntervalAnalysis.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp

index a3a19ef..5a934ec 100644 (file)
--- a/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -15,6 +15,7 @@
  #include "SIRegisterInfo.h"
  #include "SIInstrInfo.h"
  #include "SIMachineFunctionInfo.h"
+#include "AMDGPUSubtarget.h"
  #include "llvm/CodeGen/MachineFrameInfo.h"
  #include "llvm/CodeGen/MachineInstrBuilder.h"
  #include "llvm/CodeGen/RegisterScavenging.h"
@@ -24,8 +25,8 @@
  using namespace llvm;
  
  static unsigned getMaxWaveCountPerSIMD(const MachineFunction &MF) {
-  const SIMachineFunctionInfo& MFI = *MF.getInfo<SIMachineFunctionInfo>();
-  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+  const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
    unsigned SIMDPerCU = 4;
  
    unsigned MaxInvocationsPerWave = SIMDPerCU * ST.getWavefrontSize();
@@ -34,13 +35,13 @@ static unsigned getMaxWaveCountPerSIMD(const MachineFunction &MF) {
  }
  
  static unsigned getMaxWorkGroupSGPRCount(const MachineFunction &MF) {
-  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
    unsigned MaxWaveCountPerSIMD = getMaxWaveCountPerSIMD(MF);
  
    unsigned TotalSGPRCountPerSIMD, AddressableSGPRCount, SGPRUsageAlignment;
    unsigned ReservedSGPRCount;
  
-  if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+  if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
      TotalSGPRCountPerSIMD = 800;
      AddressableSGPRCount = 102;
      SGPRUsageAlignment = 16;
@@ -56,7 +57,7 @@ static unsigned getMaxWorkGroupSGPRCount(const MachineFunction &MF) {
    MaxSGPRCount = alignDown(MaxSGPRCount, SGPRUsageAlignment);
  
    if (ST.hasSGPRInitBug())
-    MaxSGPRCount = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
+    MaxSGPRCount = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
  
    return std::min(MaxSGPRCount - ReservedSGPRCount, AddressableSGPRCount);
  }
@@ -195,7 +196,7 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
  
    // Reserve registers for debugger usage if "amdgpu-debugger-reserve-trap-regs"
    // attribute was specified.
-  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
    if (ST.debuggerReserveRegs()) {
      unsigned ReservedVGPRFirst =
        MaxWorkGroupVGPRCount - MFI->getDebuggerReservedVGPRCount();
@@ -210,10 +211,9 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
  
  unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
                                                  unsigned Idx) const {
-  const AMDGPUSubtarget &STI = MF.getSubtarget<AMDGPUSubtarget>();
+  const SISubtarget &STI = MF.getSubtarget<SISubtarget>();
    // FIXME: We should adjust the max number of waves based on LDS size.
-  unsigned SGPRLimit = getNumSGPRsAllowed(STI.getGeneration(),
-                                          STI.getMaxWavesPerCU());
+  unsigned SGPRLimit = getNumSGPRsAllowed(STI, STI.getMaxWavesPerCU());
    unsigned VGPRLimit = getNumVGPRsAllowed(STI.getMaxWavesPerCU());
  
    unsigned VSLimit = SGPRLimit + VGPRLimit;
@@ -274,8 +274,8 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
      DL = Ins->getDebugLoc();
  
    MachineFunction *MF = MBB->getParent();
-  const AMDGPUSubtarget &Subtarget = MF->getSubtarget<AMDGPUSubtarget>();
-  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+  const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
+  const SIInstrInfo *TII = Subtarget.getInstrInfo();
  
    if (Offset == 0) {
      BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg)
@@ -297,9 +297,8 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
  
    MachineBasicBlock *MBB = MI.getParent();
    MachineFunction *MF = MBB->getParent();
-  const AMDGPUSubtarget &Subtarget = MF->getSubtarget<AMDGPUSubtarget>();
-  const SIInstrInfo *TII
-    = static_cast<const SIInstrInfo *>(Subtarget.getInstrInfo());
+  const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
+  const SIInstrInfo *TII = Subtarget.getInstrInfo();
  
  #ifndef NDEBUG
    // FIXME: Is it possible to be storing a frame index to itself?
@@ -409,10 +408,11 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
    bool IsKill = SrcDst->isKill();
    MachineBasicBlock *MBB = MI->getParent();
    MachineFunction *MF = MI->getParent()->getParent();
-  const SIInstrInfo *TII =
-      static_cast<const SIInstrInfo *>(MF->getSubtarget().getInstrInfo());
+  const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
+  const SIInstrInfo *TII = ST.getInstrInfo();
+
    DebugLoc DL = MI->getDebugLoc();
-  bool IsStore = TII->get(LoadStoreOp).mayStore();
+  bool IsStore = MI->mayStore();
  
    bool RanOutOfSGPRs = false;
    bool Scavenged = false;
@@ -489,8 +489,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
    MachineBasicBlock *MBB = MI->getParent();
    SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
    MachineFrameInfo *FrameInfo = MF->getFrameInfo();
-  const SIInstrInfo *TII =
-      static_cast<const SIInstrInfo *>(MF->getSubtarget().getInstrInfo());
+  const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
+  const SIInstrInfo *TII = ST.getInstrInfo();
    DebugLoc DL = MI->getDebugLoc();
  
    MachineOperand &FIOp = MI->getOperand(FIOperandNum);
@@ -662,10 +662,6 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
    }
  }
  
-unsigned SIRegisterInfo::getHWRegIndex(unsigned Reg) const {
-  return getEncodingValue(Reg) & 0xff;
-}
-
  // FIXME: This is very slow. It might be worth creating a map from physreg to
  // register class.
  const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
@@ -900,7 +896,7 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
                                             enum PreloadedValue Value) const {
  
    const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
    (void)ST;
    switch (Value) {
    case SIRegisterInfo::WORKGROUP_ID_X:
@@ -971,9 +967,9 @@ unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const {
    }
  }
  
-unsigned SIRegisterInfo::getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,
+unsigned SIRegisterInfo::getNumSGPRsAllowed(const SISubtarget &ST,
                                              unsigned WaveCount) const {
-  if (gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+  if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
      switch (WaveCount) {
        case 10: return 80;
        case 9:  return 80;
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h

index 45d14e9..c02c5a9 100644 (file)
--- a/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -12,17 +12,17 @@
  //
  //===----------------------------------------------------------------------===//
  
-
  #ifndef LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H
  #define LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H
  
  #include "AMDGPURegisterInfo.h"
-#include "AMDGPUSubtarget.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/Debug.h"
  
  namespace llvm {
  
+class SISubtarget;
+class MachineRegisterInfo;
+
  struct SIRegisterInfo final : public AMDGPURegisterInfo {
  private:
    unsigned SGPR32SetID;
@@ -80,7 +80,9 @@ public:
                             unsigned FIOperandNum,
                             RegScavenger *RS) const override;
  
-  unsigned getHWRegIndex(unsigned Reg) const override;
+  unsigned getHWRegIndex(unsigned Reg) const {
+    return getEncodingValue(Reg) & 0xff;
+  }
  
    /// \brief Return the 'base' register class for this register.
    /// e.g. SGPR0 => SReg_32, VGPR => VGPR_32 SGPR0_SGPR1 -> SReg_32, etc.
@@ -179,8 +181,7 @@ public:
  
    /// \brief Give the maximum number of SGPRs that can be used by \p WaveCount
    ///        concurrent waves.
-  unsigned getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,
-                              unsigned WaveCount) const;
+  unsigned getNumSGPRsAllowed(const SISubtarget &ST, unsigned WaveCount) const;
  
    unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
                                const TargetRegisterClass *RC) const;
diff --git a/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/lib/Target/AMDGPU/SIShrinkInstructions.cpp

index 1e13e98..517533e 100644 (file)
--- a/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -199,9 +199,10 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
      return false;
  
    MachineRegisterInfo &MRI = MF.getRegInfo();
-  const SIInstrInfo *TII =
-      static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+  const SIInstrInfo *TII = ST.getInstrInfo();
    const SIRegisterInfo &TRI = TII->getRegisterInfo();
+
    std::vector<unsigned> I1Defs;
  
    for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
diff --git a/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/lib/Target/AMDGPU/SIWholeQuadMode.cpp

index 7979685..3315c4c 100644 (file)
--- a/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -476,8 +476,10 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
    ExecExports.clear();
    LiveMaskQueries.clear();
  
-  TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
-  TRI = static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+
+  TII = ST.getInstrInfo();
+  TRI = &TII->getRegisterInfo();
    MRI = &MF.getRegInfo();
  
    char GlobalFlags = analyzeFunction(MF);
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Fri, 24 Jun 2016 06:30:11 +0000 (06:30 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Fri, 24 Jun 2016 06:30:11 +0000 (06:30 +0000)
lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUFrameLowering.cpp		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUFrameLowering.h		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUISelLowering.cpp		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUISelLowering.h		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUInstrInfo.cpp		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUInstrInfo.h		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUMCInstLower.cpp		patch \| blob \| history
lib/Target/AMDGPU/AMDGPURegisterInfo.cpp		patch \| blob \| history
lib/Target/AMDGPU/AMDGPURegisterInfo.h		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUSubtarget.cpp		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUSubtarget.h		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUTargetMachine.cpp		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUTargetMachine.h		patch \| blob \| history
lib/Target/AMDGPU/AMDILCFGStructurizer.cpp		patch \| blob \| history
lib/Target/AMDGPU/CMakeLists.txt		patch \| blob \| history
lib/Target/AMDGPU/GCNHazardRecognizer.cpp		patch \| blob \| history
lib/Target/AMDGPU/GCNHazardRecognizer.h		patch \| blob \| history
lib/Target/AMDGPU/R600ClauseMergePass.cpp		patch \| blob \| history
lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp		patch \| blob \| history
lib/Target/AMDGPU/R600EmitClauseMarkers.cpp		patch \| blob \| history
lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp		patch \| blob \| history
lib/Target/AMDGPU/R600FrameLowering.cpp	[new file with mode: 0644]	patch \| blob
lib/Target/AMDGPU/R600FrameLowering.h	[new file with mode: 0644]	patch \| blob
lib/Target/AMDGPU/R600ISelLowering.cpp		patch \| blob \| history
lib/Target/AMDGPU/R600ISelLowering.h		patch \| blob \| history
lib/Target/AMDGPU/R600InstrInfo.cpp		patch \| blob \| history
lib/Target/AMDGPU/R600InstrInfo.h		patch \| blob \| history
lib/Target/AMDGPU/R600Instructions.td		patch \| blob \| history
lib/Target/AMDGPU/R600MachineScheduler.cpp		patch \| blob \| history
lib/Target/AMDGPU/R600MachineScheduler.h		patch \| blob \| history
lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp		patch \| blob \| history
lib/Target/AMDGPU/R600Packetizer.cpp		patch \| blob \| history
lib/Target/AMDGPU/R600RegisterInfo.cpp		patch \| blob \| history
lib/Target/AMDGPU/R600RegisterInfo.h		patch \| blob \| history
lib/Target/AMDGPU/SIDebuggerInsertNops.cpp		patch \| blob \| history
lib/Target/AMDGPU/SIDefines.h		patch \| blob \| history
lib/Target/AMDGPU/SIFixSGPRCopies.cpp		patch \| blob \| history
lib/Target/AMDGPU/SIFoldOperands.cpp		patch \| blob \| history
lib/Target/AMDGPU/SIFrameLowering.cpp		patch \| blob \| history
lib/Target/AMDGPU/SIFrameLowering.h		patch \| blob \| history
lib/Target/AMDGPU/SIISelLowering.cpp		patch \| blob \| history
lib/Target/AMDGPU/SIISelLowering.h		patch \| blob \| history
lib/Target/AMDGPU/SIInsertWaits.cpp		patch \| blob \| history
lib/Target/AMDGPU/SIInstrInfo.cpp		patch \| blob \| history
lib/Target/AMDGPU/SIInstrInfo.h		patch \| blob \| history
lib/Target/AMDGPU/SIInstrInfo.td		patch \| blob \| history
lib/Target/AMDGPU/SIInstructions.td		patch \| blob \| history
lib/Target/AMDGPU/SILoadStoreOptimizer.cpp		patch \| blob \| history
lib/Target/AMDGPU/SILowerControlFlow.cpp		patch \| blob \| history
lib/Target/AMDGPU/SILowerI1Copies.cpp		patch \| blob \| history
lib/Target/AMDGPU/SIMachineFunctionInfo.cpp		patch \| blob \| history
lib/Target/AMDGPU/SIMachineScheduler.cpp		patch \| blob \| history
lib/Target/AMDGPU/SIRegisterInfo.cpp		patch \| blob \| history
lib/Target/AMDGPU/SIRegisterInfo.h		patch \| blob \| history
lib/Target/AMDGPU/SIShrinkInstructions.cpp		patch \| blob \| history
lib/Target/AMDGPU/SIWholeQuadMode.cpp		patch \| blob \| history