[AMDGPU] Get address space mapping by target triple environment

author Yaxun Liu <Yaxun.Liu@amd.com>

Mon, 27 Mar 2017 14:04:01 +0000 (14:04 +0000)

committer Yaxun Liu <Yaxun.Liu@amd.com>

Mon, 27 Mar 2017 14:04:01 +0000 (14:04 +0000)
author Yaxun Liu <Yaxun.Liu@amd.com>
Mon, 27 Mar 2017 14:04:01 +0000 (14:04 +0000)
committer Yaxun Liu <Yaxun.Liu@amd.com>
Mon, 27 Mar 2017 14:04:01 +0000 (14:04 +0000)
diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h

index 99d71f5..4e2f0af 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPU.h
+++ b/lib/Target/AMDGPU/AMDGPU.h
@@ -23,6 +23,7 @@ class Pass;
  class Target;
  class TargetMachine;
  class PassRegistry;
+class Module;
  
  // R600 Passes
  FunctionPass *createR600VectorRegMerger(TargetMachine &tm);
@@ -150,43 +151,53 @@ enum TargetIndex {
  /// however on the GPU, each address space points to
  /// a separate piece of memory that is unique from other
  /// memory locations.
-namespace AMDGPUAS {
-enum AddressSpaces : unsigned {
-  PRIVATE_ADDRESS  = 0, ///< Address space for private memory.
-  GLOBAL_ADDRESS   = 1, ///< Address space for global memory (RAT0, VTX0).
-  CONSTANT_ADDRESS = 2, ///< Address space for constant memory (VTX2)
-  LOCAL_ADDRESS    = 3, ///< Address space for local memory.
-  FLAT_ADDRESS     = 4, ///< Address space for flat memory.
-  REGION_ADDRESS   = 5, ///< Address space for region memory.
-  PARAM_D_ADDRESS  = 6, ///< Address space for direct addressible parameter memory (CONST0)
-  PARAM_I_ADDRESS  = 7, ///< Address space for indirect addressible parameter memory (VTX1)
+struct AMDGPUAS {
+  // The following address space values depend on the triple environment.
+  unsigned PRIVATE_ADDRESS;  ///< Address space for private memory.
+  unsigned CONSTANT_ADDRESS; ///< Address space for constant memory (VTX2)
+  unsigned FLAT_ADDRESS;     ///< Address space for flat memory.
+  unsigned REGION_ADDRESS;   ///< Address space for region memory.
+
+  // The maximum value for flat, generic, local, private, constant and region.
+  const static unsigned MAX_COMMON_ADDRESS = 5;
+
+  const static unsigned GLOBAL_ADDRESS   = 1;  ///< Address space for global memory (RAT0, VTX0).
+  const static unsigned LOCAL_ADDRESS    = 3;  ///< Address space for local memory.
+  const static unsigned PARAM_D_ADDRESS  = 6;  ///< Address space for direct addressible parameter memory (CONST0)
+  const static unsigned PARAM_I_ADDRESS  = 7;  ///< Address space for indirect addressible parameter memory (VTX1)
  
    // Do not re-order the CONSTANT_BUFFER_* enums.  Several places depend on this
    // order to be able to dynamically index a constant buffer, for example:
    //
    // ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx
  
-  CONSTANT_BUFFER_0 = 8,
-  CONSTANT_BUFFER_1 = 9,
-  CONSTANT_BUFFER_2 = 10,
-  CONSTANT_BUFFER_3 = 11,
-  CONSTANT_BUFFER_4 = 12,
-  CONSTANT_BUFFER_5 = 13,
-  CONSTANT_BUFFER_6 = 14,
-  CONSTANT_BUFFER_7 = 15,
-  CONSTANT_BUFFER_8 = 16,
-  CONSTANT_BUFFER_9 = 17,
-  CONSTANT_BUFFER_10 = 18,
-  CONSTANT_BUFFER_11 = 19,
-  CONSTANT_BUFFER_12 = 20,
-  CONSTANT_BUFFER_13 = 21,
-  CONSTANT_BUFFER_14 = 22,
-  CONSTANT_BUFFER_15 = 23,
+  const static unsigned CONSTANT_BUFFER_0 = 8;
+  const static unsigned CONSTANT_BUFFER_1 = 9;
+  const static unsigned CONSTANT_BUFFER_2 = 10;
+  const static unsigned CONSTANT_BUFFER_3 = 11;
+  const static unsigned CONSTANT_BUFFER_4 = 12;
+  const static unsigned CONSTANT_BUFFER_5 = 13;
+  const static unsigned CONSTANT_BUFFER_6 = 14;
+  const static unsigned CONSTANT_BUFFER_7 = 15;
+  const static unsigned CONSTANT_BUFFER_8 = 16;
+  const static unsigned CONSTANT_BUFFER_9 = 17;
+  const static unsigned CONSTANT_BUFFER_10 = 18;
+  const static unsigned CONSTANT_BUFFER_11 = 19;
+  const static unsigned CONSTANT_BUFFER_12 = 20;
+  const static unsigned CONSTANT_BUFFER_13 = 21;
+  const static unsigned CONSTANT_BUFFER_14 = 22;
+  const static unsigned CONSTANT_BUFFER_15 = 23;
  
    // Some places use this if the address space can't be determined.
-  UNKNOWN_ADDRESS_SPACE = ~0u
+  const static unsigned UNKNOWN_ADDRESS_SPACE = ~0u;
  };
  
-} // namespace AMDGPUAS
+namespace llvm {
+namespace AMDGPU {
+AMDGPUAS getAMDGPUAS(const Module &M);
+AMDGPUAS getAMDGPUAS(const TargetMachine &TM);
+AMDGPUAS getAMDGPUAS(Triple T);
+} // namespace AMDGPU
+} // namespace llvm
  
  #endif
diff --git a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp

index 127b263..aa5ebae 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
@@ -37,26 +37,60 @@ void AMDGPUAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
    AU.setPreservesAll();
  }
  
+// Must match the table in getAliasResult.
+AMDGPUAAResult::ASAliasRulesTy::ASAliasRulesTy(AMDGPUAS AS_) : AS(AS_) {
+  // These arrarys are indexed by address space value
+  // enum elements 0 ... to 5
+  static const AliasResult ASAliasRulesPrivIsZero[6][6] = {
+  /*             Private    Global    Constant  Group     Flat      Region*/
+  /* Private  */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias, NoAlias},
+  /* Global   */ {NoAlias , MayAlias, NoAlias , NoAlias , MayAlias, NoAlias},
+  /* Constant */ {NoAlias , NoAlias , MayAlias, NoAlias , MayAlias, NoAlias},
+  /* Group    */ {NoAlias , NoAlias , NoAlias , MayAlias, MayAlias, NoAlias},
+  /* Flat     */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
+  /* Region   */ {NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, MayAlias}
+  };
+  static const AliasResult ASAliasRulesGenIsZero[6][6] = {
+  /*             Flat       Global    Region    Group     Constant  Private */
+  /* Flat     */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
+  /* Global   */ {MayAlias, MayAlias, NoAlias , NoAlias , NoAlias , NoAlias},
+  /* Region   */ {NoAlias , NoAlias , MayAlias, NoAlias,  NoAlias , MayAlias},
+  /* Group    */ {MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , NoAlias},
+  /* Constant */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias, NoAlias},
+  /* Private  */ {MayAlias, NoAlias , NoAlias , NoAlias , NoAlias , MayAlias}
+  };
+  assert(AS.MAX_COMMON_ADDRESS <= 5);
+  if (AS.FLAT_ADDRESS == 0) {
+    assert(AS.GLOBAL_ADDRESS   == 1 &&
+           AS.REGION_ADDRESS   == 2 &&
+           AS.LOCAL_ADDRESS    == 3 &&
+           AS.CONSTANT_ADDRESS == 4 &&
+           AS.PRIVATE_ADDRESS  == 5);
+    ASAliasRules = &ASAliasRulesGenIsZero;
+  } else {
+    assert(AS.PRIVATE_ADDRESS  == 0 &&
+           AS.GLOBAL_ADDRESS   == 1 &&
+           AS.CONSTANT_ADDRESS == 2 &&
+           AS.LOCAL_ADDRESS    == 3 &&
+           AS.FLAT_ADDRESS     == 4 &&
+           AS.REGION_ADDRESS   == 5);
+    ASAliasRules = &ASAliasRulesPrivIsZero;
+  }
+}
+
+AliasResult AMDGPUAAResult::ASAliasRulesTy::getAliasResult(unsigned AS1,
+    unsigned AS2) const {
+  if (AS1 > AS.MAX_COMMON_ADDRESS || AS2 > AS.MAX_COMMON_ADDRESS)
+    report_fatal_error("Pointer address space out of range");
+  return (*ASAliasRules)[AS1][AS2];
+}
+
  AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
                                    const MemoryLocation &LocB) {
-  // This array is indexed by the AMDGPUAS::AddressSpaces
-  // enum elements PRIVATE_ADDRESS ... to FLAT_ADDRESS
-  // see "llvm/Transforms/AMDSPIRUtils.h"
-  static const AliasResult ASAliasRules[5][5] = {
- /*             Private    Global    Constant  Group     Flat */
- /* Private  */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias},
- /* Global   */ {NoAlias , MayAlias, NoAlias , NoAlias , MayAlias},
- /* Constant */ {NoAlias , NoAlias , MayAlias, NoAlias , MayAlias},
- /* Group    */ {NoAlias , NoAlias , NoAlias , MayAlias, MayAlias},
- /* Flat     */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias}
-  };
    unsigned asA = LocA.Ptr->getType()->getPointerAddressSpace();
    unsigned asB = LocB.Ptr->getType()->getPointerAddressSpace();
-  if (asA > AMDGPUAS::AddressSpaces::FLAT_ADDRESS ||
-      asB > AMDGPUAS::AddressSpaces::FLAT_ADDRESS)
-    report_fatal_error("Pointer address space out of range");
  
-  AliasResult Result = ASAliasRules[asA][asB];
+  AliasResult Result = ASAliasRules.getAliasResult(asA, asB);
    if (Result == NoAlias) return Result;
  
    if (isa<Argument>(LocA.Ptr) && isa<Argument>(LocB.Ptr)) {
@@ -75,8 +109,7 @@ bool AMDGPUAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
                                              bool OrLocal) {
    const Value *Base = GetUnderlyingObject(Loc.Ptr, DL);
  
-  if (Base->getType()->getPointerAddressSpace() ==
-     AMDGPUAS::AddressSpaces::CONSTANT_ADDRESS) {
+  if (Base->getType()->getPointerAddressSpace() == AS.CONSTANT_ADDRESS) {
      return true;
    }
  
diff --git a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h

index 943b4a6..f73aa47 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
+++ b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
@@ -13,6 +13,7 @@
  #ifndef LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H
  #define LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H
  
+#include "AMDGPU.h"
  #include "llvm/Analysis/AliasAnalysis.h"
  #include "llvm/IR/Function.h"
  #include "llvm/IR/Module.h"
@@ -25,11 +26,14 @@ class AMDGPUAAResult : public AAResultBase<AMDGPUAAResult> {
    friend AAResultBase<AMDGPUAAResult>;
  
    const DataLayout &DL;
+  AMDGPUAS AS;
  
  public:
-  explicit AMDGPUAAResult(const DataLayout &DL) : AAResultBase(), DL(DL) {}
+  explicit AMDGPUAAResult(const DataLayout &DL, Triple T) : AAResultBase(),
+    DL(DL), AS(AMDGPU::getAMDGPUAS(T)), ASAliasRules(AS) {}
    AMDGPUAAResult(AMDGPUAAResult &&Arg)
-      : AAResultBase(std::move(Arg)), DL(Arg.DL){}
+      : AAResultBase(std::move(Arg)), DL(Arg.DL), AS(Arg.AS),
+        ASAliasRules(Arg.ASAliasRules){}
  
    /// Handle invalidation events from the new pass manager.
    ///
@@ -42,6 +46,15 @@ public:
  private:
    bool Aliases(const MDNode *A, const MDNode *B) const;
    bool PathAliases(const MDNode *A, const MDNode *B) const;
+
+  class ASAliasRulesTy {
+  public:
+    ASAliasRulesTy(AMDGPUAS AS_);
+    AliasResult getAliasResult(unsigned AS1, unsigned AS2) const;
+  private:
+    AMDGPUAS AS;
+    const AliasResult (*ASAliasRules)[6][6];
+  } ASAliasRules;
  };
  
  /// Analysis pass providing a never-invalidated alias analysis result.
@@ -53,7 +66,8 @@ public:
    typedef AMDGPUAAResult Result;
  
    AMDGPUAAResult run(Function &F, AnalysisManager<Function> &AM) {
-    return AMDGPUAAResult(F.getParent()->getDataLayout());
+    return AMDGPUAAResult(F.getParent()->getDataLayout(),
+        Triple(F.getParent()->getTargetTriple()));
    }
  };
  
@@ -72,7 +86,8 @@ public:
    const AMDGPUAAResult &getResult() const { return *Result; }
  
    bool doInitialization(Module &M) override {
-    Result.reset(new AMDGPUAAResult(M.getDataLayout()));
+    Result.reset(new AMDGPUAAResult(M.getDataLayout(),
+        Triple(M.getTargetTriple())));
      return false;
    }
    bool doFinalization(Module &M) override {
diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp

index 09d3ff7..3d8db7c 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
@@ -28,7 +28,8 @@ namespace {
  class AMDGPUAnnotateKernelFeatures : public ModulePass {
  private:
    const TargetMachine *TM;
-  static bool hasAddrSpaceCast(const Function &F);
+  AMDGPUAS AS;
+  static bool hasAddrSpaceCast(const Function &F, AMDGPUAS AS);
  
    void addAttrToCallers(Function *Intrin, StringRef AttrName);
    bool addAttrsForIntrinsics(Module &M, ArrayRef<StringRef[2]>);
@@ -48,10 +49,11 @@ public:
      ModulePass::getAnalysisUsage(AU);
    }
  
-  static bool visitConstantExpr(const ConstantExpr *CE);
+  static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS);
    static bool visitConstantExprsRecursively(
      const Constant *EntryC,
-    SmallPtrSet<const Constant *, 8> &ConstantExprVisited);
+    SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
+    AMDGPUAS AS);
  };
  
  }
@@ -65,18 +67,20 @@ INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
  
  
  // The queue ptr is only needed when casting to flat, not from it.
-static bool castRequiresQueuePtr(unsigned SrcAS) {
-  return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
+static bool castRequiresQueuePtr(unsigned SrcAS, const AMDGPUAS &AS) {
+  return SrcAS == AS.LOCAL_ADDRESS || SrcAS == AS.PRIVATE_ADDRESS;
  }
  
-static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {
-  return castRequiresQueuePtr(ASC->getSrcAddressSpace());
+static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC,
+    const AMDGPUAS &AS) {
+  return castRequiresQueuePtr(ASC->getSrcAddressSpace(), AS);
  }
  
-bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
+bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE,
+    AMDGPUAS AS) {
    if (CE->getOpcode() == Instruction::AddrSpaceCast) {
      unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
-    return castRequiresQueuePtr(SrcAS);
+    return castRequiresQueuePtr(SrcAS, AS);
    }
  
    return false;
@@ -84,7 +88,8 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
  
  bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
    const Constant *EntryC,
-  SmallPtrSet<const Constant *, 8> &ConstantExprVisited) {
+  SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
+  AMDGPUAS AS) {
  
    if (!ConstantExprVisited.insert(EntryC).second)
      return false;
@@ -97,7 +102,7 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
  
      // Check this constant expression.
      if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
-      if (visitConstantExpr(CE))
+      if (visitConstantExpr(CE, AS))
          return true;
      }
  
@@ -118,13 +123,14 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
  }
  
  // Return true if an addrspacecast is used that requires the queue ptr.
-bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F) {
+bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F,
+    AMDGPUAS AS) {
    SmallPtrSet<const Constant *, 8> ConstantExprVisited;
  
    for (const BasicBlock &BB : F) {
      for (const Instruction &I : BB) {
        if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
-        if (castRequiresQueuePtr(ASC))
+        if (castRequiresQueuePtr(ASC, AS))
            return true;
        }
  
@@ -133,7 +139,7 @@ bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F) {
          if (!OpC)
            continue;
  
-        if (visitConstantExprsRecursively(OpC, ConstantExprVisited))
+        if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS))
            return true;
        }
      }
@@ -173,6 +179,7 @@ bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics(
  
  bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
    Triple TT(M.getTargetTriple());
+  AS = AMDGPU::getAMDGPUAS(M);
  
    static const StringRef IntrinsicToAttr[][2] = {
      // .x omitted
@@ -216,7 +223,7 @@ bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
  
        bool HasApertureRegs =
          TM && TM->getSubtarget<AMDGPUSubtarget>(F).hasApertureRegs();
-      if (!HasApertureRegs && hasAddrSpaceCast(F))
+      if (!HasApertureRegs && hasAddrSpaceCast(F, AS))
          F.addFnAttr("amdgpu-queue-ptr");
      }
    }
diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp

index c011be6..91b3649 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
@@ -37,6 +37,7 @@ class AMDGPUAnnotateUniformValues : public FunctionPass,
    LoopInfo *LI;
    DenseMap<Value*, GetElementPtrInst*> noClobberClones;
    bool isKernelFunc;
+  AMDGPUAS AMDGPUASI;
  
  public:
    static char ID;
@@ -130,8 +131,8 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
    Value *Ptr = I.getPointerOperand();
    if (!DA->isUniform(Ptr))
      return;
-  auto isGlobalLoad = [](LoadInst &Load)->bool {
-    return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
+  auto isGlobalLoad = [&](LoadInst &Load)->bool {
+    return Load.getPointerAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
    };
    // We're tracking up to the Function boundaries
    // We cannot go beyond because of FunctionPass restrictions
@@ -166,6 +167,7 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
  }
  
  bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) {
+  AMDGPUASI = AMDGPU::getAMDGPUAS(M);
    return false;
  }
  
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

index accfd69..4f2a0ca 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -17,6 +17,7 @@
  //
  
  #include "AMDGPUAsmPrinter.h"
+#include "AMDGPUTargetMachine.h"
  #include "MCTargetDesc/AMDGPUTargetStreamer.h"
  #include "InstPrinter/AMDGPUInstPrinter.h"
  #include "Utils/AMDGPUBaseInfo.h"
@@ -92,7 +93,9 @@ extern "C" void LLVMInitializeAMDGPUAsmPrinter() {
  
  AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
                                     std::unique_ptr<MCStreamer> Streamer)
-  : AsmPrinter(TM, std::move(Streamer)) {}
+  : AsmPrinter(TM, std::move(Streamer)) {
+    AMDGPUASI = static_cast<AMDGPUTargetMachine*>(&TM)->getAMDGPUAS();
+  }
  
  StringRef AMDGPUAsmPrinter::getPassName() const {
    return "AMDGPU Assembly Printer";
@@ -174,7 +177,7 @@ void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
  void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
  
    // Group segment variables aren't emitted in HSA.
-  if (AMDGPU::isGroupSegment(GV))
+  if (AMDGPU::isGroupSegment(GV, AMDGPUASI))
      return;
  
    AsmPrinter::EmitGlobalVariable(GV);
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h

index be476e3..13425c8 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -16,6 +16,7 @@
  #define LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H
  
  #include "AMDKernelCodeT.h"
+#include "AMDGPU.h"
  #include "llvm/ADT/StringRef.h"
  #include "llvm/CodeGen/AsmPrinter.h"
  #include <cstddef>
@@ -150,6 +151,7 @@ public:
  protected:
    std::vector<std::string> DisasmLines, HexLines;
    size_t DisasmLineMaxLen;
+  AMDGPUAS AMDGPUASI;
  };
  
  } // end namespace llvm
diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp

index ce70d15..e67ae09 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -31,7 +31,7 @@ using namespace llvm;
  #endif
  
  AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
-  : CallLowering(&TLI) {
+  : CallLowering(&TLI), AMDGPUASI(TLI.getAMDGPUAS()) {
  }
  
  bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
@@ -49,7 +49,7 @@ unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
    MachineRegisterInfo &MRI = MF.getRegInfo();
    const Function &F = *MF.getFunction();
    const DataLayout &DL = F.getParent()->getDataLayout();
-  PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
+  PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS);
    LLT PtrType = getLLTForType(*PtrTy, DL);
    unsigned DstReg = MRI.createGenericVirtualRegister(PtrType);
    unsigned KernArgSegmentPtr =
@@ -70,7 +70,7 @@ void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
    MachineFunction &MF = MIRBuilder.getMF();
    const Function &F = *MF.getFunction();
    const DataLayout &DL = F.getParent()->getDataLayout();
-  PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
+  PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS);
    MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
    unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
    unsigned Align = DL.getABITypeAlignment(ParamTy);
diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.h b/lib/Target/AMDGPU/AMDGPUCallLowering.h

index b5f3fa5..09bdf8f 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUCallLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUCallLowering.h
@@ -15,6 +15,7 @@
  #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUCALLLOWERING_H
  #define LLVM_LIB_TARGET_AMDGPU_AMDGPUCALLLOWERING_H
  
+#include "AMDGPU.h"
  #include "llvm/CodeGen/GlobalISel/CallLowering.h"
  
  namespace llvm {
@@ -22,6 +23,7 @@ namespace llvm {
  class AMDGPUTargetLowering;
  
  class AMDGPUCallLowering: public CallLowering {
+  AMDGPUAS AMDGPUASI;
  
    unsigned lowerParameterPtr(MachineIRBuilder &MIRBuilder, Type *ParamTy,
                               unsigned Offset) const;
diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

index fddf943..ca695c1 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -67,10 +67,13 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
    // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
    // make the right decision when generating code for different targets.
    const AMDGPUSubtarget *Subtarget;
+  AMDGPUAS AMDGPUASI;
  
  public:
    explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel)
-      : SelectionDAGISel(TM, OptLevel) {}
+      : SelectionDAGISel(TM, OptLevel){
+    AMDGPUASI = AMDGPU::getAMDGPUAS(TM);
+  }
    ~AMDGPUDAGToDAGISel() override = default;
  
    bool runOnMachineFunction(MachineFunction &MF) override;
@@ -269,7 +272,7 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
  
  SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
    if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
-      cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
+      cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS)
      return N;
  
    const SITargetLowering& Lowering =
@@ -586,9 +589,9 @@ bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
    if (!N->readMem())
      return false;
    if (CbId == -1)
-    return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
+    return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS;
  
-  return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
+  return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId;
  }
  
  bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
@@ -1536,7 +1539,7 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
  void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
    MemSDNode *Mem = cast<MemSDNode>(N);
    unsigned AS = Mem->getAddressSpace();
-  if (AS == AMDGPUAS::FLAT_ADDRESS) {
+  if (AS == AMDGPUASI.FLAT_ADDRESS) {
      SelectCode(N);
      return;
    }
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

index b57cc00..af3c9ff 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -59,6 +59,7 @@ EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) {
  AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
                                             const AMDGPUSubtarget &STI)
      : TargetLowering(TM), Subtarget(&STI) {
+  AMDGPUASI = AMDGPU::getAMDGPUAS(TM);
    // Lower floating point store/load to integer store/load to reduce the number
    // of patterns in tablegen.
    setOperationAction(ISD::LOAD, MVT::f32, Promote);
@@ -967,19 +968,16 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
    GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
    const GlobalValue *GV = G->getGlobal();
  
-  switch (G->getAddressSpace()) {
-  case AMDGPUAS::LOCAL_ADDRESS: {
+  if  (G->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS) {
      // XXX: What does the value of G->getOffset() mean?
      assert(G->getOffset() == 0 &&
           "Do not know what to do with an non-zero offset");
  
      // TODO: We could emit code to handle the initialization somewhere.
-    if (hasDefinedInitializer(GV))
-      break;
-
-    unsigned Offset = MFI->allocateLDSGlobal(DL, *GV);
-    return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType());
-  }
+    if (!hasDefinedInitializer(GV)) {
+      unsigned Offset = MFI->allocateLDSGlobal(DL, *GV);
+      return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType());
+    }
    }
  
    const Function &Fn = *DAG.getMachineFunction().getFunction();
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h

index a41200c..7386038 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -16,6 +16,7 @@
  #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H
  #define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H
  
+#include "AMDGPU.h"
  #include "llvm/Target/TargetLowering.h"
  
  namespace llvm {
@@ -34,6 +35,7 @@ private:
  
  protected:
    const AMDGPUSubtarget *Subtarget;
+  AMDGPUAS AMDGPUASI;
  
    SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
    SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
@@ -224,6 +226,10 @@ public:
    /// type of implicit parameter.
    uint32_t getImplicitParameterOffset(const AMDGPUMachineFunction *MFI,
                                        const ImplicitParameter Param) const;
+
+  AMDGPUAS getAMDGPUAS() const {
+    return AMDGPUASI;
+  }
  };
  
  namespace AMDGPUISD {
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp

index a3abb96..a01f5d3 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
@@ -30,7 +30,7 @@ using namespace llvm;
  void AMDGPUInstrInfo::anchor() {}
  
  AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)
-  : AMDGPUGenInstrInfo(-1, -1), ST(ST) {}
+  : AMDGPUGenInstrInfo(-1, -1), ST(ST), AMDGPUASI(ST.getAMDGPUAS()) {}
  
  // FIXME: This behaves strangely. If, for example, you have 32 load + stores,
  // the first 16 loads will be interleaved with the stores, and the next 16 will
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/lib/Target/AMDGPU/AMDGPUInstrInfo.h

index bd8e389..a122fd6 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -16,6 +16,7 @@
  #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H
  #define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H
  
+#include "AMDGPU.h"
  #include "llvm/Target/TargetInstrInfo.h"
  #include "Utils/AMDGPUBaseInfo.h"
  
@@ -35,6 +36,8 @@ private:
    const AMDGPUSubtarget &ST;
  
    virtual void anchor();
+protected:
+  AMDGPUAS AMDGPUASI;
  
  public:
    explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st);
diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

index d133851..8867ed6 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -33,7 +33,7 @@ using namespace llvm;
  AMDGPUInstructionSelector::AMDGPUInstructionSelector(
      const SISubtarget &STI, const AMDGPURegisterBankInfo &RBI)
      : InstructionSelector(), TII(*STI.getInstrInfo()),
-      TRI(*STI.getRegisterInfo()), RBI(RBI) {}
+      TRI(*STI.getRegisterInfo()), RBI(RBI), AMDGPUASI(STI.getAMDGPUAS()) {}
  
  MachineOperand
  AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
@@ -291,7 +291,7 @@ bool AMDGPUInstructionSelector::selectSMRD(MachineInstr &I,
    if (!I.hasOneMemOperand())
      return false;
  
-  if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS)
+  if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUASI.CONSTANT_ADDRESS)
      return false;
  
    if (!isInstrUniform(I))
diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h

index 783f140..c87102e 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -14,6 +14,7 @@
  #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H
  #define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H
  
+#include "AMDGPU.h"
  #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
  #include "llvm/ADT/ArrayRef.h"
  #include "llvm/ADT/SmallVector.h"
@@ -35,7 +36,6 @@ public:
                              const AMDGPURegisterBankInfo &RBI);
  
    bool select(MachineInstr &I) const override;
-
  private:
    struct GEPInfo {
      const MachineInstr &GEP;
@@ -59,6 +59,8 @@ private:
    const SIInstrInfo &TII;
    const SIRegisterInfo &TRI;
    const AMDGPURegisterBankInfo &RBI;
+protected:
+  AMDGPUAS AMDGPUASI;
  };
  
  } // End llvm namespace.
diff --git a/lib/Target/AMDGPU/AMDGPUInstructions.td b/lib/Target/AMDGPU/AMDGPUInstructions.td

index c4ac318..b8d6812 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -204,7 +204,7 @@ def COND_NULL : PatLeaf <
  //===----------------------------------------------------------------------===//
  
  class PrivateMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{
-  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
+  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS;
  }]>;
  
  class PrivateLoad <SDPatternOperator op> : PrivateMemOp <
@@ -222,7 +222,7 @@ def truncstorei16_private : PrivateStore <truncstorei16>;
  def store_private : PrivateStore <store>;
  
  class GlobalMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{
-  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
+  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
  }]>;
  
  // Global address space loads
@@ -242,7 +242,7 @@ def global_store_atomic : GlobalStore<atomic_store>;
  
  
  class ConstantMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{
-  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
+  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS;
  }]>;
  
  // Constant address space loads
@@ -253,7 +253,7 @@ class ConstantLoad <SDPatternOperator op> : ConstantMemOp <
  def constant_load : ConstantLoad<load>;
  
  class LocalMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{
-  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
  }]>;
  
  // Local address space loads
@@ -266,7 +266,7 @@ class LocalStore <SDPatternOperator op> : LocalMemOp <
  >;
  
  class FlatMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{
-  return cast<MemSDNode>(N)->getAddressSPace() == AMDGPUAS::FLAT_ADDRESS;
+  return cast<MemSDNode>(N)->getAddressSPace() == AMDGPUASI.FLAT_ADDRESS;
  }]>;
  
  class FlatLoad <SDPatternOperator op> : FlatMemOp <
@@ -348,7 +348,7 @@ def local_store_aligned8bytes : Aligned8Bytes <
  class local_binary_atomic_op<SDNode atomic_op> :
    PatFrag<(ops node:$ptr, node:$value),
      (atomic_op node:$ptr, node:$value), [{
-  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
  }]>;
  
  
@@ -366,7 +366,7 @@ def atomic_load_umax_local : local_binary_atomic_op<atomic_load_umax>;
  
  def mskor_global : PatFrag<(ops node:$val, node:$ptr),
                              (AMDGPUstore_mskor node:$val, node:$ptr), [{
-  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
+  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
  }]>;
  
  multiclass AtomicCmpSwapLocal <SDNode cmp_swap_node> {
@@ -376,7 +376,7 @@ multiclass AtomicCmpSwapLocal <SDNode cmp_swap_node> {
      (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
        AtomicSDNode *AN = cast<AtomicSDNode>(N);
        return AN->getMemoryVT() == MVT::i32 &&
-             AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+             AN->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
    }]>;
  
    def _64_local : PatFrag<
@@ -384,7 +384,7 @@ multiclass AtomicCmpSwapLocal <SDNode cmp_swap_node> {
      (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
        AtomicSDNode *AN = cast<AtomicSDNode>(N);
        return AN->getMemoryVT() == MVT::i64 &&
-             AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+             AN->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
    }]>;
  }
  
@@ -394,17 +394,17 @@ multiclass global_binary_atomic_op<SDNode atomic_op> {
    def "" : PatFrag<
          (ops node:$ptr, node:$value),
          (atomic_op node:$ptr, node:$value),
-        [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>;
+        [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>;
  
    def _noret : PatFrag<
          (ops node:$ptr, node:$value),
          (atomic_op node:$ptr, node:$value),
-        [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
+        [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
  
    def _ret : PatFrag<
          (ops node:$ptr, node:$value),
          (atomic_op node:$ptr, node:$value),
-        [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
+        [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
  }
  
  defm atomic_swap_global : global_binary_atomic_op<atomic_swap>;
@@ -422,22 +422,22 @@ defm atomic_xor_global : global_binary_atomic_op<atomic_load_xor>;
  def AMDGPUatomic_cmp_swap_global : PatFrag<
          (ops node:$ptr, node:$value),
          (AMDGPUatomic_cmp_swap node:$ptr, node:$value),
-        [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>;
+        [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>;
  
  def atomic_cmp_swap_global : PatFrag<
        (ops node:$ptr, node:$cmp, node:$value),
        (atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
-      [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>;
+      [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>;
  
  def atomic_cmp_swap_global_noret : PatFrag<
        (ops node:$ptr, node:$cmp, node:$value),
        (atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
-      [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
+      [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
  
  def atomic_cmp_swap_global_ret : PatFrag<
        (ops node:$ptr, node:$cmp, node:$value),
        (atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
-      [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
+      [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
  
  //===----------------------------------------------------------------------===//
  // Misc Pattern Fragments
diff --git a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp

index e46b7ff..96bc53d 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -72,6 +72,7 @@ private:
    Module *Mod = nullptr;
    const DataLayout *DL = nullptr;
    MDNode *MaxWorkGroupSizeRange = nullptr;
+  AMDGPUAS AS;
  
    // FIXME: This should be per-kernel.
    uint32_t LocalMemLimit = 0;
@@ -154,6 +155,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
    const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
    if (!ST.isPromoteAllocaEnabled())
      return false;
+  AS = AMDGPU::getAMDGPUAS(*F.getParent());
  
    FunctionType *FTy = F.getFunctionType();
  
@@ -162,7 +164,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
    // we cannot use local memory in the pass.
    for (Type *ParamTy : FTy->params()) {
      PointerType *PtrTy = dyn_cast<PointerType>(ParamTy);
-    if (PtrTy && PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+    if (PtrTy && PtrTy->getAddressSpace() == AS.LOCAL_ADDRESS) {
        LocalMemLimit = 0;
        DEBUG(dbgs() << "Function has local memory argument. Promoting to "
                        "local memory disabled.\n");
@@ -179,7 +181,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
    // Check how much local memory is being used by global objects
    CurrentLocalMemUsage = 0;
    for (GlobalVariable &GV : Mod->globals()) {
-    if (GV.getType()->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
+    if (GV.getType()->getAddressSpace() != AS.LOCAL_ADDRESS)
        continue;
  
      for (const User *U : GV.users()) {
@@ -317,7 +319,7 @@ AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
  
    Type *I32Ty = Type::getInt32Ty(Mod->getContext());
    Value *CastDispatchPtr = Builder.CreateBitCast(
-    DispatchPtr, PointerType::get(I32Ty, AMDGPUAS::CONSTANT_ADDRESS));
+    DispatchPtr, PointerType::get(I32Ty, AS.CONSTANT_ADDRESS));
  
    // We could do a single 64-bit load here, but it's likely that the basic
    // 32-bit and extract sequence is already present, and it is probably easier
@@ -413,7 +415,7 @@ static bool canVectorizeInst(Instruction *Inst, User *User) {
    }
  }
  
-static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
+static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
    ArrayType *AllocaTy = dyn_cast<ArrayType>(Alloca->getAllocatedType());
  
    DEBUG(dbgs() << "Alloca candidate for vectorization\n");
@@ -468,7 +470,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
      IRBuilder<> Builder(Inst);
      switch (Inst->getOpcode()) {
      case Instruction::Load: {
-      Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
+      Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS);
        Value *Ptr = Inst->getOperand(0);
        Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
  
@@ -480,7 +482,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
        break;
      }
      case Instruction::Store: {
-      Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
+      Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS);
  
        Value *Ptr = Inst->getOperand(1);
        Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
@@ -673,7 +675,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
  
    DEBUG(dbgs() << "Trying to promote " << I << '\n');
  
-  if (tryPromoteAllocaToVector(&I)) {
+  if (tryPromoteAllocaToVector(&I, AS)) {
      DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n");
      return;
    }
@@ -734,7 +736,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
        Twine(F->getName()) + Twine('.') + I.getName(),
        nullptr,
        GlobalVariable::NotThreadLocal,
-      AMDGPUAS::LOCAL_ADDRESS);
+      AS.LOCAL_ADDRESS);
    GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
    GV->setAlignment(I.getAlignment());
  
@@ -767,7 +769,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
        if (ICmpInst *CI = dyn_cast<ICmpInst>(V)) {
          Value *Src0 = CI->getOperand(0);
          Type *EltTy = Src0->getType()->getPointerElementType();
-        PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
+        PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS);
  
          if (isa<ConstantPointerNull>(CI->getOperand(0)))
            CI->setOperand(0, ConstantPointerNull::get(NewTy));
@@ -784,7 +786,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
          continue;
  
        Type *EltTy = V->getType()->getPointerElementType();
-      PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
+      PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS);
  
        // FIXME: It doesn't really make sense to try to do this for all
        // instructions.
@@ -852,7 +854,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
        Type *SrcTy = Src->getType()->getPointerElementType();
        Function *ObjectSize = Intrinsic::getDeclaration(Mod,
          Intrinsic::objectsize,
-        { Intr->getType(), PointerType::get(SrcTy, AMDGPUAS::LOCAL_ADDRESS) }
+        { Intr->getType(), PointerType::get(SrcTy, AS.LOCAL_ADDRESS) }
        );
  
        CallInst *NewCall = Builder.CreateCall(
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

index a4bb8b9..9282568 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -135,6 +135,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
  
      FeatureDisable(false),
      InstrItins(getInstrItineraryForCPU(GPU)) {
+  AS = AMDGPU::getAMDGPUAS(TT);
    initializeSubtargetDependencies(TT, GPU, FS);
  }
  
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h

index 39289d0..c61a2ff 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -157,6 +157,7 @@ protected:
  
    InstrItineraryData InstrItins;
    SelectionDAGTargetInfo TSInfo;
+  AMDGPUAS AS;
  
  public:
    AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
@@ -214,6 +215,10 @@ public:
      return MaxPrivateElementSize;
    }
  
+  AMDGPUAS getAMDGPUAS() const {
+    return AS;
+  }
+
    bool has16BitInsts() const {
      return Has16BitInsts;
    }
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

index 68e78d5..e8954c5 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -240,6 +240,7 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
    : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU),
                        FS, Options, getEffectiveRelocModel(RM), CM, OptLevel),
      TLOF(createTLOF(getTargetTriple())) {
+  AS = AMDGPU::getAMDGPUAS(TT);
    initAsmInfo();
  }
  
@@ -809,3 +810,4 @@ void GCNPassConfig::addPreEmitPass() {
  TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) {
    return new GCNPassConfig(this, PM);
  }
+
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/lib/Target/AMDGPU/AMDGPUTargetMachine.h

index e36e940..934bf7f 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -35,6 +35,7 @@ class AMDGPUTargetMachine : public LLVMTargetMachine {
  protected:
    std::unique_ptr<TargetLoweringObjectFile> TLOF;
    AMDGPUIntrinsicInfo IntrinsicInfo;
+  AMDGPUAS AS;
  
    StringRef getGPUName(const Function &F) const;
    StringRef getFeatureString(const Function &F) const;
@@ -57,17 +58,16 @@ public:
    TargetLoweringObjectFile *getObjFileLowering() const override {
      return TLOF.get();
    }
+  AMDGPUAS getAMDGPUAS() const {
+    return AS;
+  }
  
    void adjustPassManager(PassManagerBuilder &) override;
    /// Get the integer value of a null pointer in the given address space.
    uint64_t getNullPointerValue(unsigned AddrSpace) const {
-    switch(AddrSpace) {
-    case AMDGPUAS::LOCAL_ADDRESS:
-    case AMDGPUAS::REGION_ADDRESS:
+    if (AddrSpace == AS.LOCAL_ADDRESS || AddrSpace == AS.REGION_ADDRESS)
        return -1;
-    default:
-      return 0;
-    }
+    return 0;
    }
  
  };
diff --git a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp

index 1fddc88..c96761c 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
@@ -7,6 +7,7 @@
  //
  //===----------------------------------------------------------------------===//
  
+#include "AMDGPUTargetMachine.h"
  #include "AMDGPUTargetObjectFile.h"
  #include "AMDGPU.h"
  #include "llvm/MC/MCContext.h"
@@ -22,7 +23,8 @@ using namespace llvm;
  
  MCSection *AMDGPUTargetObjectFile::SelectSectionForGlobal(
      const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
-  if (Kind.isReadOnly() && AMDGPU::isReadOnlySegment(GO) &&
+  auto AS = static_cast<const AMDGPUTargetMachine*>(&TM)->getAMDGPUAS();
+  if (Kind.isReadOnly() && AMDGPU::isReadOnlySegment(GO, AS) &&
        AMDGPU::shouldEmitConstantsToTextSection(TM.getTargetTriple()))
      return TextSection;
  
diff --git a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h

index de32778..ca6210f 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
+++ b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
@@ -16,6 +16,7 @@
  #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETOBJECTFILE_H
  #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETOBJECTFILE_H
  
+#include "AMDGPU.h"
  #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
  #include "llvm/Target/TargetMachine.h"
  
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

index 4a6d12b..c5b7086 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -48,7 +48,7 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L,
      const DataLayout &DL = BB->getModule()->getDataLayout();
      for (const Instruction &I : *BB) {
        const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I);
-      if (!GEP || GEP->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
+      if (!GEP || GEP->getAddressSpace() != ST->getAMDGPUAS().PRIVATE_ADDRESS)
          continue;
  
        const Value *Ptr = GEP->getPointerOperand();
@@ -108,25 +108,24 @@ unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool Vector) {
  }
  
  unsigned AMDGPUTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
-  switch (AddrSpace) {
-  case AMDGPUAS::GLOBAL_ADDRESS:
-  case AMDGPUAS::CONSTANT_ADDRESS:
-  case AMDGPUAS::FLAT_ADDRESS:
+  AMDGPUAS AS = ST->getAMDGPUAS();
+  if (AddrSpace == AS.GLOBAL_ADDRESS ||
+      AddrSpace == AS.CONSTANT_ADDRESS ||
+      AddrSpace == AS.FLAT_ADDRESS)
      return 128;
-  case AMDGPUAS::LOCAL_ADDRESS:
-  case AMDGPUAS::REGION_ADDRESS:
+  if (AddrSpace == AS.LOCAL_ADDRESS ||
+      AddrSpace == AS.REGION_ADDRESS)
      return 64;
-  case AMDGPUAS::PRIVATE_ADDRESS:
+  if (AddrSpace == AS.PRIVATE_ADDRESS)
      return 8 * ST->getMaxPrivateElementSize();
-  default:
-    if (ST->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS &&
-        (AddrSpace == AMDGPUAS::PARAM_D_ADDRESS ||
-         AddrSpace == AMDGPUAS::PARAM_I_ADDRESS ||
-         (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 &&
-          AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15)))
-      return 128;
-    llvm_unreachable("unhandled address space");
-  }
+
+  if (ST->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS &&
+      (AddrSpace == AS.PARAM_D_ADDRESS ||
+      AddrSpace == AS.PARAM_I_ADDRESS ||
+      (AddrSpace >= AS.CONSTANT_BUFFER_0 &&
+      AddrSpace <= AS.CONSTANT_BUFFER_15)))
+    return 128;
+  llvm_unreachable("unhandled address space");
  }
  
  bool AMDGPUTTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
@@ -135,7 +134,7 @@ bool AMDGPUTTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
    // We allow vectorization of flat stores, even though we may need to decompose
    // them later if they may access private memory. We don't have enough context
    // here, and legalization can handle it.
-  if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) {
+  if (AddrSpace == ST->getAMDGPUAS().PRIVATE_ADDRESS) {
      return (Alignment >= 4 || ST->hasUnalignedScratchAccess()) &&
        ChainSizeInBytes <= ST->getMaxPrivateElementSize();
    }
@@ -362,7 +361,7 @@ bool AMDGPUTTIImpl::isSourceOfDivergence(const Value *V) const {
    // All other loads are not divergent, because if threads issue loads with the
    // same arguments, they will always get the same result.
    if (const LoadInst *Load = dyn_cast<LoadInst>(V))
-    return Load->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
+    return Load->getPointerAddressSpace() == ST->getAMDGPUAS().PRIVATE_ADDRESS;
  
    // Atomics are divergent because they are executed sequentially: when an
    // atomic operation refers to the same address in each thread, then each
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h

index c64c4bf..71d6306 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -110,7 +110,7 @@ public:
      if (IsGraphicsShader)
        return -1;
      return ST->hasFlatAddressSpace() ?
-      AMDGPUAS::FLAT_ADDRESS : AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
+      ST->getAMDGPUAS().FLAT_ADDRESS : ST->getAMDGPUAS().UNKNOWN_ADDRESS_SPACE;
    }
  
    unsigned getVectorSplitCost() { return 0; }
diff --git a/lib/Target/AMDGPU/BUFInstructions.td b/lib/Target/AMDGPU/BUFInstructions.td

index 45a7fe6..29f5eef 100644 (file)
--- a/lib/Target/AMDGPU/BUFInstructions.td
+++ b/lib/Target/AMDGPU/BUFInstructions.td
@@ -21,8 +21,8 @@ def MUBUFIntrinsicVOffset : ComplexPattern<i32, 3, "SelectMUBUFIntrinsicVOffset"
  class MubufLoad <SDPatternOperator op> : PatFrag <
    (ops node:$ptr), (op node:$ptr), [{
    auto const AS = cast<MemSDNode>(N)->getAddressSpace();
-  return AS == AMDGPUAS::GLOBAL_ADDRESS ||
-         AS == AMDGPUAS::CONSTANT_ADDRESS;
+  return AS == AMDGPUASI.GLOBAL_ADDRESS ||
+         AS == AMDGPUASI.CONSTANT_ADDRESS;
  }]>;
  
  def mubuf_load          : MubufLoad <load>;
diff --git a/lib/Target/AMDGPU/FLATInstructions.td b/lib/Target/AMDGPU/FLATInstructions.td

index 849fb8a..b0ac0e6 100644 (file)
--- a/lib/Target/AMDGPU/FLATInstructions.td
+++ b/lib/Target/AMDGPU/FLATInstructions.td
@@ -136,7 +136,7 @@ multiclass FLAT_Atomic_Pseudo<
  class flat_binary_atomic_op<SDNode atomic_op> : PatFrag<
    (ops node:$ptr, node:$value),
    (atomic_op node:$ptr, node:$value),
-  [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;}]
+  [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.FLAT_ADDRESS;}]
  >;
  
  def atomic_cmp_swap_flat : flat_binary_atomic_op<AMDGPUatomic_cmp_swap>;
@@ -284,16 +284,16 @@ defm FLAT_ATOMIC_FMAX_X2     : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2",
  class flat_ld <SDPatternOperator ld> : PatFrag<(ops node:$ptr),
                                                 (ld node:$ptr), [{
    auto const AS = cast<MemSDNode>(N)->getAddressSpace();
-  return AS == AMDGPUAS::FLAT_ADDRESS ||
-         AS == AMDGPUAS::GLOBAL_ADDRESS ||
-         AS == AMDGPUAS::CONSTANT_ADDRESS;
+  return AS == AMDGPUASI.FLAT_ADDRESS ||
+         AS == AMDGPUASI.GLOBAL_ADDRESS ||
+         AS == AMDGPUASI.CONSTANT_ADDRESS;
  }]>;
  
  class flat_st <SDPatternOperator st> : PatFrag<(ops node:$val, node:$ptr),
                                                 (st node:$val, node:$ptr), [{
    auto const AS = cast<MemSDNode>(N)->getAddressSpace();
-  return AS == AMDGPUAS::FLAT_ADDRESS ||
-         AS == AMDGPUAS::GLOBAL_ADDRESS;
+  return AS == AMDGPUASI.FLAT_ADDRESS ||
+         AS == AMDGPUASI.GLOBAL_ADDRESS;
  }]>;
  
  def atomic_flat_load   : flat_ld <atomic_load>;
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp

index 7b4f251..59571a4 100644 (file)
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp
@@ -264,20 +264,18 @@ AccessQualifier MetadataStreamer::getAccessQualifier(StringRef AccQual) const {
  
  AddressSpaceQualifier MetadataStreamer::getAddressSpaceQualifer(
      unsigned AddressSpace) const {
-  switch (AddressSpace) {
-  case AMDGPUAS::PRIVATE_ADDRESS:
+  if (AddressSpace == AMDGPUASI.PRIVATE_ADDRESS)
      return AddressSpaceQualifier::Private;
-  case AMDGPUAS::GLOBAL_ADDRESS:
+  if (AddressSpace == AMDGPUASI.GLOBAL_ADDRESS)
      return AddressSpaceQualifier::Global;
-  case AMDGPUAS::CONSTANT_ADDRESS:
+  if (AddressSpace == AMDGPUASI.CONSTANT_ADDRESS)
      return AddressSpaceQualifier::Constant;
-  case AMDGPUAS::LOCAL_ADDRESS:
+  if (AddressSpace == AMDGPUASI.LOCAL_ADDRESS)
      return AddressSpaceQualifier::Local;
-  case AMDGPUAS::FLAT_ADDRESS:
+  if (AddressSpace == AMDGPUASI.FLAT_ADDRESS)
      return AddressSpaceQualifier::Generic;
-  case AMDGPUAS::REGION_ADDRESS:
+  if (AddressSpace == AMDGPUASI.REGION_ADDRESS)
      return AddressSpaceQualifier::Region;
-  }
  
    llvm_unreachable("Unknown address space qualifier");
  }
@@ -304,7 +302,7 @@ ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual,
                      "image3d_t", ValueKind::Image)
               .Default(isa<PointerType>(Ty) ?
                            (Ty->getPointerAddressSpace() ==
-                           AMDGPUAS::LOCAL_ADDRESS ?
+                           AMDGPUASI.LOCAL_ADDRESS ?
                             ValueKind::DynamicSharedPointer :
                             ValueKind::GlobalBuffer) :
                        ValueKind::ByValue);
@@ -460,7 +458,7 @@ void MetadataStreamer::emitKernelArgs(const Function &Func) {
      return;
  
    auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
-                                      AMDGPUAS::GLOBAL_ADDRESS);
+                                      AMDGPUASI.GLOBAL_ADDRESS);
    emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
  }
  
@@ -513,7 +511,7 @@ void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty,
  
    if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
      auto ElTy = PtrTy->getElementType();
-    if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && ElTy->isSized())
+    if (PtrTy->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS && ElTy->isSized())
        Arg.mPointeeAlign = DL.getABITypeAlignment(ElTy);
    }
  
@@ -576,6 +574,7 @@ void MetadataStreamer::emitKernelDebugProps(
  }
  
  void MetadataStreamer::begin(const Module &Mod) {
+  AMDGPUASI = getAMDGPUAS(Mod);
    emitVersion();
    emitPrintf(Mod);
  }
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h

index 12d4c5e..8d4c517 100644 (file)
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h
@@ -16,6 +16,7 @@
  #ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H
  #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H
  
+#include "AMDGPU.h"
  #include "AMDGPUCodeObjectMetadata.h"
  #include "AMDKernelCodeT.h"
  #include "llvm/ADT/StringRef.h"
@@ -36,6 +37,7 @@ namespace CodeObject {
  class MetadataStreamer final {
  private:
    Metadata CodeObjectMetadata;
+  AMDGPUAS AMDGPUASI;
  
    void dump(StringRef YamlString) const;
  
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp

index 8125550..a8db5cc 100644 (file)
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -557,7 +557,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
      }
  
      case Intrinsic::r600_implicitarg_ptr: {
-      MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUAS::PARAM_I_ADDRESS);
+      MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUASI.PARAM_I_ADDRESS);
        uint32_t ByteOffset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT);
        return DAG.getConstant(ByteOffset, DL, PtrVT);
      }
@@ -707,12 +707,12 @@ SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
                                                 SDValue Op,
                                                 SelectionDAG &DAG) const {
    GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
-  if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
+  if (GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS)
      return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
  
    const DataLayout &DL = DAG.getDataLayout();
    const GlobalValue *GV = GSD->getGlobal();
-  MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
+  MVT ConstPtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS);
  
    SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
    return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
@@ -869,7 +869,7 @@ SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
                                                     unsigned DwordOffset) const {
    unsigned ByteOffset = DwordOffset * 4;
    PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
-                                      AMDGPUAS::CONSTANT_BUFFER_0);
+                                      AMDGPUASI.CONSTANT_BUFFER_0);
  
    // We shouldn't be using an offset wider than 16-bits for implicit parameters.
    assert(isInt<16>(ByteOffset));
@@ -1107,7 +1107,7 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
    //TODO: Who creates the i8 stores?
    assert(Store->isTruncatingStore()
           || Store->getValue().getValueType() == MVT::i8);
-  assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS);
+  assert(Store->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS);
  
    SDValue Mask;
    if (Store->getMemoryVT() == MVT::i8) {
@@ -1205,9 +1205,10 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
    SDLoc DL(Op);
  
    // Neither LOCAL nor PRIVATE can do vectors at the moment
-  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
+  if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS) &&
        VT.isVector()) {
-    if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && StoreNode->isTruncatingStore()) {
+    if ((AS == AMDGPUASI.PRIVATE_ADDRESS) &&
+         StoreNode->isTruncatingStore()) {
        // Add an extra level of chain to isolate this vector
        SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
        // TODO: can the chain be replaced without creating a new store?
@@ -1230,7 +1231,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
    SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
                                    DAG.getConstant(2, DL, PtrVT));
  
-  if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
+  if (AS == AMDGPUASI.GLOBAL_ADDRESS) {
      // It is beneficial to create MSKOR here instead of combiner to avoid
      // artificial dependencies introduced by RMW
      if (StoreNode->isTruncatingStore()) {
@@ -1283,7 +1284,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
    }
  
    // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
-  if (AS != AMDGPUAS::PRIVATE_ADDRESS)
+  if (AS != AMDGPUASI.PRIVATE_ADDRESS)
      return SDValue();
  
    if (MemVT.bitsLT(MVT::i32))
@@ -1302,39 +1303,39 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
  
  // return (512 + (kc_bank << 12)
  static int
-ConstantAddressBlock(unsigned AddressSpace) {
+ConstantAddressBlock(unsigned AddressSpace, AMDGPUAS AMDGPUASI) {
    switch (AddressSpace) {
-  case AMDGPUAS::CONSTANT_BUFFER_0:
+  case AMDGPUASI.CONSTANT_BUFFER_0:
      return 512;
-  case AMDGPUAS::CONSTANT_BUFFER_1:
+  case AMDGPUASI.CONSTANT_BUFFER_1:
      return 512 + 4096;
-  case AMDGPUAS::CONSTANT_BUFFER_2:
+  case AMDGPUASI.CONSTANT_BUFFER_2:
      return 512 + 4096 * 2;
-  case AMDGPUAS::CONSTANT_BUFFER_3:
+  case AMDGPUASI.CONSTANT_BUFFER_3:
      return 512 + 4096 * 3;
-  case AMDGPUAS::CONSTANT_BUFFER_4:
+  case AMDGPUASI.CONSTANT_BUFFER_4:
      return 512 + 4096 * 4;
-  case AMDGPUAS::CONSTANT_BUFFER_5:
+  case AMDGPUASI.CONSTANT_BUFFER_5:
      return 512 + 4096 * 5;
-  case AMDGPUAS::CONSTANT_BUFFER_6:
+  case AMDGPUASI.CONSTANT_BUFFER_6:
      return 512 + 4096 * 6;
-  case AMDGPUAS::CONSTANT_BUFFER_7:
+  case AMDGPUASI.CONSTANT_BUFFER_7:
      return 512 + 4096 * 7;
-  case AMDGPUAS::CONSTANT_BUFFER_8:
+  case AMDGPUASI.CONSTANT_BUFFER_8:
      return 512 + 4096 * 8;
-  case AMDGPUAS::CONSTANT_BUFFER_9:
+  case AMDGPUASI.CONSTANT_BUFFER_9:
      return 512 + 4096 * 9;
-  case AMDGPUAS::CONSTANT_BUFFER_10:
+  case AMDGPUASI.CONSTANT_BUFFER_10:
      return 512 + 4096 * 10;
-  case AMDGPUAS::CONSTANT_BUFFER_11:
+  case AMDGPUASI.CONSTANT_BUFFER_11:
      return 512 + 4096 * 11;
-  case AMDGPUAS::CONSTANT_BUFFER_12:
+  case AMDGPUASI.CONSTANT_BUFFER_12:
      return 512 + 4096 * 12;
-  case AMDGPUAS::CONSTANT_BUFFER_13:
+  case AMDGPUASI.CONSTANT_BUFFER_13:
      return 512 + 4096 * 13;
-  case AMDGPUAS::CONSTANT_BUFFER_14:
+  case AMDGPUASI.CONSTANT_BUFFER_14:
      return 512 + 4096 * 14;
-  case AMDGPUAS::CONSTANT_BUFFER_15:
+  case AMDGPUASI.CONSTANT_BUFFER_15:
      return 512 + 4096 * 15;
    default:
      return -1;
@@ -1402,7 +1403,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
    EVT MemVT = LoadNode->getMemoryVT();
    ISD::LoadExtType ExtType = LoadNode->getExtensionType();
  
-  if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
+  if (AS == AMDGPUASI.PRIVATE_ADDRESS &&
        ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
      return lowerPrivateExtLoad(Op, DAG);
    }
@@ -1412,13 +1413,14 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
    SDValue Chain = LoadNode->getChain();
    SDValue Ptr = LoadNode->getBasePtr();
  
-  if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
-      LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
+  if ((LoadNode->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS ||
+      LoadNode->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS) &&
        VT.isVector()) {
        return scalarizeVectorLoad(LoadNode, DAG);
    }
  
-  int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
+  int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace(),
+      AMDGPUASI);
    if (ConstantBlock > -1 &&
        ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
         (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
@@ -1450,7 +1452,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
            DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
                        DAG.getConstant(4, DL, MVT::i32)),
                        DAG.getConstant(LoadNode->getAddressSpace() -
-                                      AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
+                                      AMDGPUASI.CONSTANT_BUFFER_0, DL, MVT::i32)
            );
      }
  
@@ -1486,7 +1488,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
      return DAG.getMergeValues(MergedValues, DL);
    }
  
-  if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
+  if (LoadNode->getAddressSpace() != AMDGPUASI.PRIVATE_ADDRESS) {
      return SDValue();
    }
  
@@ -1563,7 +1565,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
      }
  
      PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
-                                          AMDGPUAS::CONSTANT_BUFFER_0);
+                                          AMDGPUASI.CONSTANT_BUFFER_0);
  
      // i64 isn't a legal type, so the register type used ends up as i32, which
      // isn't expected here. It attempts to create this sextload, but it ends up
diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td

index a5d1a0a..bac557b 100644 (file)
--- a/lib/Target/AMDGPU/R600Instructions.td
+++ b/lib/Target/AMDGPU/R600Instructions.td
@@ -316,7 +316,7 @@ class VTX_READ <string name, dag outs, list<dag> pattern>
  class LoadParamFrag <PatFrag load_type> : PatFrag <
    (ops node:$ptr), (load_type node:$ptr),
    [{ return isConstantLoad(cast<LoadSDNode>(N), 0) ||
-            (cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS); }]
+            (cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUASI.PARAM_I_ADDRESS); }]
  >;
  
  def vtx_id3_az_extloadi8 : LoadParamFrag<az_extloadi8>;
@@ -326,8 +326,8 @@ def vtx_id3_load : LoadParamFrag<load>;
  class LoadVtxId1 <PatFrag load> : PatFrag <
    (ops node:$ptr), (load node:$ptr), [{
    const MemSDNode *LD = cast<MemSDNode>(N);
-  return LD->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
-         (LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
+  return LD->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS ||
+         (LD->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
             !isa<GlobalValue>(GetUnderlyingObject(
             LD->getMemOperand()->getValue(), CurDAG->getDataLayout())));
  }]>;
@@ -339,7 +339,7 @@ def vtx_id1_load : LoadVtxId1 <load>;
  class LoadVtxId2 <PatFrag load> : PatFrag <
    (ops node:$ptr), (load node:$ptr), [{
    const MemSDNode *LD = cast<MemSDNode>(N);
-  return LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
+  return LD->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
           isa<GlobalValue>(GetUnderlyingObject(
           LD->getMemOperand()->getValue(), CurDAG->getDataLayout()));
  }]>;
diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp

index 68afcca..abe6af9 100644 (file)
--- a/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -202,6 +202,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
    // Emit debugger prologue if "amdgpu-debugger-emit-prologue" attribute was
    // specified.
    const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+  auto AMDGPUASI = ST.getAMDGPUAS();
    if (ST.debuggerEmitPrologue())
      emitDebuggerPrologue(MF, MBB);
  
@@ -340,7 +341,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
  
          PointerType *PtrTy =
            PointerType::get(Type::getInt64Ty(MF.getFunction()->getContext()),
-                           AMDGPUAS::CONSTANT_ADDRESS);
+                           AMDGPUASI.CONSTANT_ADDRESS);
          MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
          auto MMO = MF.getMachineMemOperand(PtrInfo,
                                             MachineMemOperand::MOLoad |
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp

index 5673434..783369c 100644 (file)
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -597,8 +597,7 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
    if (AM.BaseGV)
      return false;
  
-  switch (AS) {
-  case AMDGPUAS::GLOBAL_ADDRESS:
+  if (AS == AMDGPUASI.GLOBAL_ADDRESS) {
      if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
        // Assume the we will use FLAT for all global memory accesses
        // on VI.
@@ -613,8 +612,7 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
      }
  
      return isLegalMUBUFAddressingMode(AM);
-
-  case AMDGPUAS::CONSTANT_ADDRESS:
+  } else if (AS == AMDGPUASI.CONSTANT_ADDRESS) {
      // If the offset isn't a multiple of 4, it probably isn't going to be
      // correctly aligned.
      // FIXME: Can we get the real alignment here?
@@ -652,11 +650,10 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
  
      return false;
  
-  case AMDGPUAS::PRIVATE_ADDRESS:
+  } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) {
      return isLegalMUBUFAddressingMode(AM);
-
-  case AMDGPUAS::LOCAL_ADDRESS:
-  case AMDGPUAS::REGION_ADDRESS:
+  } else if (AS == AMDGPUASI.LOCAL_ADDRESS ||
+             AS == AMDGPUASI.REGION_ADDRESS) {
      // Basic, single offset DS instructions allow a 16-bit unsigned immediate
      // field.
      // XXX - If doing a 4-byte aligned 8-byte type access, we effectively have
@@ -671,17 +668,15 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
        return true;
  
      return false;
-
-  case AMDGPUAS::FLAT_ADDRESS:
-  case AMDGPUAS::UNKNOWN_ADDRESS_SPACE:
+  } else if (AS == AMDGPUASI.FLAT_ADDRESS ||
+             AS == AMDGPUASI.UNKNOWN_ADDRESS_SPACE) {
      // For an unknown address space, this usually means that this is for some
      // reason being used for pure arithmetic, and not based on some addressing
      // computation. We don't have instructions that compute pointers with any
      // addressing modes, so treat them as having no offset like flat
      // instructions.
      return isLegalFlatAddressingMode(AM);
-
-  default:
+  } else {
      llvm_unreachable("unhandled address space");
    }
  }
@@ -702,8 +697,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
      return false;
    }
  
-  if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
-      AddrSpace == AMDGPUAS::REGION_ADDRESS) {
+  if (AddrSpace == AMDGPUASI.LOCAL_ADDRESS ||
+      AddrSpace == AMDGPUASI.REGION_ADDRESS) {
      // ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte
      // aligned, 8 byte access in a single operation using ds_read2/write2_b32
      // with adjacent offsets.
@@ -718,8 +713,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
    // will access scratch.  If we had access to the IR function, then we
    // could determine if any private memory was used in the function.
    if (!Subtarget->hasUnalignedScratchAccess() &&
-      (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS ||
-       AddrSpace == AMDGPUAS::FLAT_ADDRESS)) {
+      (AddrSpace == AMDGPUASI.PRIVATE_ADDRESS ||
+       AddrSpace == AMDGPUASI.FLAT_ADDRESS)) {
      return false;
    }
  
@@ -727,7 +722,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
      // If we have an uniform constant load, it still requires using a slow
      // buffer instruction if unaligned.
      if (IsFast) {
-      *IsFast = (AddrSpace == AMDGPUAS::CONSTANT_ADDRESS) ?
+      *IsFast = (AddrSpace == AMDGPUASI.CONSTANT_ADDRESS) ?
          (Align % 4 == 0) : true;
      }
  
@@ -767,15 +762,16 @@ EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
    return MVT::Other;
  }
  
-static bool isFlatGlobalAddrSpace(unsigned AS) {
-  return AS == AMDGPUAS::GLOBAL_ADDRESS ||
-         AS == AMDGPUAS::FLAT_ADDRESS ||
-         AS == AMDGPUAS::CONSTANT_ADDRESS;
+static bool isFlatGlobalAddrSpace(unsigned AS, AMDGPUAS AMDGPUASI) {
+  return AS == AMDGPUASI.GLOBAL_ADDRESS ||
+         AS == AMDGPUASI.FLAT_ADDRESS ||
+         AS == AMDGPUASI.CONSTANT_ADDRESS;
  }
  
  bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
                                             unsigned DestAS) const {
-  return isFlatGlobalAddrSpace(SrcAS) && isFlatGlobalAddrSpace(DestAS);
+  return isFlatGlobalAddrSpace(SrcAS, AMDGPUASI) &&
+         isFlatGlobalAddrSpace(DestAS, AMDGPUASI);
  }
  
  bool SITargetLowering::isMemOpHasNoClobberedMemOperand(const SDNode *N) const {
@@ -789,7 +785,7 @@ bool SITargetLowering::isCheapAddrSpaceCast(unsigned SrcAS,
                                              unsigned DestAS) const {
    // Flat -> private/local is a simple truncate.
    // Flat -> global is no-op
-  if (SrcAS == AMDGPUAS::FLAT_ADDRESS)
+  if (SrcAS == AMDGPUASI.FLAT_ADDRESS)
      return true;
  
    return isNoopAddrSpaceCast(SrcAS, DestAS);
@@ -850,7 +846,7 @@ SDValue SITargetLowering::LowerParameterPtr(SelectionDAG &DAG,
    unsigned InputPtrReg = TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
  
    MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
-  MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
+  MVT PtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS);
    SDValue BasePtr = DAG.getCopyFromReg(Chain, SL,
                                         MRI.getLiveInVirtReg(InputPtrReg), PtrVT);
    return DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
@@ -863,7 +859,7 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
                                           const ISD::InputArg *Arg) const {
    const DataLayout &DL = DAG.getDataLayout();
    Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
-  PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
+  PointerType *PtrTy = PointerType::get(Ty, AMDGPUASI.CONSTANT_ADDRESS);
    MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
  
    unsigned Align = DL.getABITypeAlignment(Ty);
@@ -1073,7 +1069,7 @@ SDValue SITargetLowering::LowerFormalArguments(
        auto *ParamTy =
          dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex()));
        if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS &&
-          ParamTy && ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+          ParamTy && ParamTy->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS) {
          // On SI local pointers are just offsets into LDS, so they are always
          // less than 16-bits.  On CI and newer they could potentially be
          // real pointers, so we can't guarantee their size.
@@ -2206,13 +2202,13 @@ void SITargetLowering::createDebuggerPrologueStackObjects(
  
  bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const {
    const Triple &TT = getTargetMachine().getTargetTriple();
-  return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
+  return GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
           AMDGPU::shouldEmitConstantsToTextSection(TT);
  }
  
  bool SITargetLowering::shouldEmitGOTReloc(const GlobalValue *GV) const {
-  return (GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
-              GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) &&
+  return (GV->getType()->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS ||
+              GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS) &&
           !shouldEmitFixup(GV) &&
           !getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
  }
@@ -2351,7 +2347,7 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS,
                                               SelectionDAG &DAG) const {
  
    if (Subtarget->hasApertureRegs()) { // Read from Aperture Registers directly.
-    unsigned RegNo = (AS == AMDGPUAS::LOCAL_ADDRESS) ? AMDGPU::SRC_SHARED_BASE :
+    unsigned RegNo = (AS == AMDGPUASI.LOCAL_ADDRESS) ? AMDGPU::SRC_SHARED_BASE :
                                                         AMDGPU::SRC_PRIVATE_BASE;
      return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, RegNo, MVT::i32);
    }
@@ -2367,7 +2363,7 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS,
  
    // Offset into amd_queue_t for group_segment_aperture_base_hi /
    // private_segment_aperture_base_hi.
-  uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44;
+  uint32_t StructOffset = (AS == AMDGPUASI.LOCAL_ADDRESS) ? 0x40 : 0x44;
  
    SDValue Ptr = DAG.getNode(ISD::ADD, SL, MVT::i64, QueuePtr,
                              DAG.getConstant(StructOffset, SL, MVT::i64));
@@ -2376,7 +2372,7 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS,
    // TODO: We should use the value from the IR intrinsic call, but it might not
    // be available and how do we get it?
    Value *V = UndefValue::get(PointerType::get(Type::getInt8Ty(*DAG.getContext()),
-                                              AMDGPUAS::CONSTANT_ADDRESS));
+                                              AMDGPUASI.CONSTANT_ADDRESS));
  
    MachinePointerInfo PtrInfo(V, StructOffset);
    return DAG.getLoad(MVT::i32, SL, QueuePtr.getValue(1), Ptr, PtrInfo,
@@ -2397,9 +2393,11 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
      static_cast<const AMDGPUTargetMachine &>(getTargetMachine());
  
    // flat -> local/private
-  if (ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS) {
+  if (ASC->getSrcAddressSpace() == AMDGPUASI.FLAT_ADDRESS) {
      unsigned DestAS = ASC->getDestAddressSpace();
-    if (DestAS == AMDGPUAS::LOCAL_ADDRESS || DestAS == AMDGPUAS::PRIVATE_ADDRESS) {
+
+    if (DestAS == AMDGPUASI.LOCAL_ADDRESS ||
+        DestAS == AMDGPUASI.PRIVATE_ADDRESS) {
        unsigned NullVal = TM.getNullPointerValue(DestAS);
        SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32);
        SDValue NonNull = DAG.getSetCC(SL, MVT::i1, Src, FlatNullPtr, ISD::SETNE);
@@ -2411,9 +2409,11 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
    }
  
    // local/private -> flat
-  if (ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) {
+  if (ASC->getDestAddressSpace() == AMDGPUASI.FLAT_ADDRESS) {
      unsigned SrcAS = ASC->getSrcAddressSpace();
-    if (SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS) {
+
+    if (SrcAS == AMDGPUASI.LOCAL_ADDRESS ||
+        SrcAS == AMDGPUASI.PRIVATE_ADDRESS) {
        unsigned NullVal = TM.getNullPointerValue(SrcAS);
        SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32);
  
@@ -2513,8 +2513,8 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
  bool
  SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
    // We can fold offsets for anything that doesn't require a GOT relocation.
-  return (GA->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
-              GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) &&
+  return (GA->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS ||
+              GA->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS) &&
           !shouldEmitGOTReloc(GA->getGlobal());
  }
  
@@ -2565,8 +2565,8 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
                                               SelectionDAG &DAG) const {
    GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
  
-  if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS &&
-      GSD->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS)
+  if (GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS &&
+      GSD->getAddressSpace() != AMDGPUASI.GLOBAL_ADDRESS)
      return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
  
    SDLoc DL(GSD);
@@ -2583,7 +2583,7 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
                                              SIInstrInfo::MO_GOTPCREL32);
  
    Type *Ty = PtrVT.getTypeForEVT(*DAG.getContext());
-  PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
+  PointerType *PtrTy = PointerType::get(Ty, AMDGPUASI.CONSTANT_ADDRESS);
    const DataLayout &DataLayout = DAG.getDataLayout();
    unsigned Align = DataLayout.getABITypeAlignment(PtrTy);
    // FIXME: Use a PseudoSourceValue once those can be assigned an address space.
@@ -3229,21 +3229,20 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
    SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
    // If there is a possibilty that flat instruction access scratch memory
    // then we need to use the same legalization rules we use for private.
-  if (AS == AMDGPUAS::FLAT_ADDRESS)
+  if (AS == AMDGPUASI.FLAT_ADDRESS)
      AS = MFI->hasFlatScratchInit() ?
-         AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;
+         AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS;
  
    unsigned NumElements = MemVT.getVectorNumElements();
-  switch (AS) {
-  case AMDGPUAS::CONSTANT_ADDRESS:
+  if (AS == AMDGPUASI.CONSTANT_ADDRESS) {
      if (isMemOpUniform(Load))
        return SDValue();
      // Non-uniform loads will be selected to MUBUF instructions, so they
      // have the same legalization requirements as global and private
      // loads.
      //
-    LLVM_FALLTHROUGH;
-  case AMDGPUAS::GLOBAL_ADDRESS:
+  }
+  if (AS == AMDGPUASI.CONSTANT_ADDRESS || AS == AMDGPUASI.GLOBAL_ADDRESS) {
      if (Subtarget->getScalarizeGlobalBehavior() && isMemOpUniform(Load) &&
                    isMemOpHasNoClobberedMemOperand(Load))
        return SDValue();
@@ -3251,13 +3250,15 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
      // have the same legalization requirements as global and private
      // loads.
      //
-    LLVM_FALLTHROUGH;
-  case AMDGPUAS::FLAT_ADDRESS:
+  }
+  if (AS == AMDGPUASI.CONSTANT_ADDRESS || AS == AMDGPUASI.GLOBAL_ADDRESS ||
+      AS == AMDGPUASI.FLAT_ADDRESS) {
      if (NumElements > 4)
        return SplitVectorLoad(Op, DAG);
      // v4 loads are supported for private and global memory.
      return SDValue();
-  case AMDGPUAS::PRIVATE_ADDRESS:
+  }
+  if (AS == AMDGPUASI.PRIVATE_ADDRESS) {
      // Depending on the setting of the private_element_size field in the
      // resource descriptor, we can only make private accesses up to a certain
      // size.
@@ -3276,7 +3277,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
      default:
        llvm_unreachable("unsupported private_element_size");
      }
-  case AMDGPUAS::LOCAL_ADDRESS:
+  } else if (AS == AMDGPUASI.LOCAL_ADDRESS) {
      if (NumElements > 2)
        return SplitVectorLoad(Op, DAG);
  
@@ -3285,9 +3286,8 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
  
      // If properly aligned, if we split we might be able to use ds_read_b64.
      return SplitVectorLoad(Op, DAG);
-  default:
-    return SDValue();
    }
+  return SDValue();
  }
  
  SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
@@ -3656,18 +3656,17 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
    SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
    // If there is a possibilty that flat instruction access scratch memory
    // then we need to use the same legalization rules we use for private.
-  if (AS == AMDGPUAS::FLAT_ADDRESS)
+  if (AS == AMDGPUASI.FLAT_ADDRESS)
      AS = MFI->hasFlatScratchInit() ?
-         AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;
+         AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS;
  
    unsigned NumElements = VT.getVectorNumElements();
-  switch (AS) {
-  case AMDGPUAS::GLOBAL_ADDRESS:
-  case AMDGPUAS::FLAT_ADDRESS:
+  if (AS == AMDGPUASI.GLOBAL_ADDRESS ||
+      AS == AMDGPUASI.FLAT_ADDRESS) {
      if (NumElements > 4)
        return SplitVectorStore(Op, DAG);
      return SDValue();
-  case AMDGPUAS::PRIVATE_ADDRESS: {
+  } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) {
      switch (Subtarget->getMaxPrivateElementSize()) {
      case 4:
        return scalarizeVectorStore(Store, DAG);
@@ -3682,8 +3681,7 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
      default:
        llvm_unreachable("unsupported private_element_size");
      }
-  }
-  case AMDGPUAS::LOCAL_ADDRESS: {
+  } else if (AS == AMDGPUASI.LOCAL_ADDRESS) {
      if (NumElements > 2)
        return SplitVectorStore(Op, DAG);
  
@@ -3692,8 +3690,7 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
  
      // If properly aligned, if we split we might be able to use ds_write_b64.
      return SplitVectorStore(Op, DAG);
-  }
-  default:
+  } else {
      llvm_unreachable("unhandled address space");
    }
  }
@@ -3724,7 +3721,7 @@ SDValue SITargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) co
    unsigned AS = AtomicNode->getAddressSpace();
  
    // No custom lowering required for local address space
-  if (!isFlatGlobalAddrSpace(AS))
+  if (!isFlatGlobalAddrSpace(AS, AMDGPUASI))
      return Op;
  
    // Non-local address space requires custom lowering for atomic compare
@@ -3781,26 +3778,26 @@ SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N,
  /// the immediate offsets of a memory instruction for the given address space.
  static bool canFoldOffset(unsigned OffsetSize, unsigned AS,
                            const SISubtarget &STI) {
-  switch (AS) {
-  case AMDGPUAS::GLOBAL_ADDRESS:
+  auto AMDGPUASI = STI.getAMDGPUAS();
+  if (AS == AMDGPUASI.GLOBAL_ADDRESS) {
      // MUBUF instructions a 12-bit offset in bytes.
      return isUInt<12>(OffsetSize);
-  case AMDGPUAS::CONSTANT_ADDRESS:
+  }
+  if (AS == AMDGPUASI.CONSTANT_ADDRESS) {
      // SMRD instructions have an 8-bit offset in dwords on SI and
      // a 20-bit offset in bytes on VI.
      if (STI.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)
        return isUInt<20>(OffsetSize);
      else
        return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4);
-  case AMDGPUAS::LOCAL_ADDRESS:
-  case AMDGPUAS::REGION_ADDRESS:
+  }
+  if (AS == AMDGPUASI.LOCAL_ADDRESS ||
+      AS == AMDGPUASI.REGION_ADDRESS) {
      // The single offset versions have a 16-bit offset in bytes.
      return isUInt<16>(OffsetSize);
-  case AMDGPUAS::PRIVATE_ADDRESS:
-  // Indirect register addressing does not use any offsets.
-  default:
-    return false;
    }
+  // Indirect register addressing does not use any offsets.
+  return false;
  }
  
  // (shl (add x, c1), c2) -> add (shl x, c2), (shl c1, c2)
@@ -3858,7 +3855,7 @@ SDValue SITargetLowering::performMemSDNodeCombine(MemSDNode *N,
  
    // TODO: We could also do this for multiplies.
    unsigned AS = N->getAddressSpace();
-  if (Ptr.getOpcode() == ISD::SHL && AS != AMDGPUAS::PRIVATE_ADDRESS) {
+  if (Ptr.getOpcode() == ISD::SHL && AS != AMDGPUASI.PRIVATE_ADDRESS) {
      SDValue NewPtr = performSHLPtrCombine(Ptr.getNode(), AS, DCI);
      if (NewPtr) {
        SmallVector<SDValue, 8> NewOps(N->op_begin(), N->op_end());
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp

index e2e0895..bbd8de2 100644 (file)
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3747,7 +3747,7 @@ unsigned SIInstrInfo::isStackAccess(const MachineInstr &MI,
      return AMDGPU::NoRegister;
  
    assert(!MI.memoperands_empty() &&
-         (*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS);
+         (*MI.memoperands_begin())->getAddrSpace() == AMDGPUASI.PRIVATE_ADDRESS);
  
    FrameIndex = Addr->getIndex();
    return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
@@ -3854,7 +3854,7 @@ bool SIInstrInfo::mayAccessFlatAddressSpace(const MachineInstr &MI) const {
      return true;
  
    for (const MachineMemOperand *MMO : MI.memoperands()) {
-    if (MMO->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS)
+    if (MMO->getAddrSpace() == AMDGPUASI.FLAT_ADDRESS)
        return true;
    }
    return false;
diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td

index 8431915..561feb9 100644 (file)
--- a/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/lib/Target/AMDGPU/SIInstrInfo.td
@@ -107,7 +107,7 @@ def SIld_local : SDNode <"ISD::LOAD", SDTLoad,
  >;
  
  def si_ld_local : PatFrag <(ops node:$ptr), (SIld_local node:$ptr), [{
-  return cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+  return cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
  }]>;
  
  def si_load_local : PatFrag <(ops node:$ptr), (si_ld_local node:$ptr), [{
@@ -144,7 +144,7 @@ def SIst_local : SDNode <"ISD::STORE", SDTStore,
  
  def si_st_local : PatFrag <
    (ops node:$val, node:$ptr), (SIst_local node:$val, node:$ptr), [{
-  return cast<StoreSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+  return cast<StoreSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
  }]>;
  
  def si_store_local : PatFrag <
diff --git a/lib/Target/AMDGPU/SMInstructions.td b/lib/Target/AMDGPU/SMInstructions.td

index 5dfae3f..5b840a1 100644 (file)
--- a/lib/Target/AMDGPU/SMInstructions.td
+++ b/lib/Target/AMDGPU/SMInstructions.td
@@ -226,9 +226,9 @@ def S_MEMREALTIME   : SM_Time_Pseudo <"s_memrealtime", int_amdgcn_s_memrealtime>
  def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
    auto Ld = cast<LoadSDNode>(N);
    return Ld->getAlignment() >= 4  &&
-    ((Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
+    ((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
      static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N)) ||
-    (Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
+    (Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS &&
      static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N) &&
      static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)));
  }]>;
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

index de0fda4..6b9a819 100644 (file)
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -19,6 +19,7 @@
  #include "llvm/IR/GlobalValue.h"
  #include "llvm/IR/Instruction.h"
  #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
  #include "llvm/MC/MCContext.h"
  #include "llvm/MC/MCInstrDesc.h"
  #include "llvm/MC/MCRegisterInfo.h"
@@ -354,16 +355,16 @@ MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) {
                             ELF::SHF_AMDGPU_HSA_AGENT);
  }
  
-bool isGroupSegment(const GlobalValue *GV) {
-  return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) {
+  return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS;
  }
  
-bool isGlobalSegment(const GlobalValue *GV) {
-  return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
+bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS) {
+  return GV->getType()->getAddressSpace() == AS.GLOBAL_ADDRESS;
  }
  
-bool isReadOnlySegment(const GlobalValue *GV) {
-  return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
+bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS) {
+  return GV->getType()->getAddressSpace() == AS.CONSTANT_ADDRESS;
  }
  
  bool shouldEmitConstantsToTextSection(const Triple &TT) {
@@ -736,6 +737,60 @@ bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
    return isSI(ST) || isCI(ST) ? isUInt<8>(EncodedOffset) :
                                  isUInt<20>(EncodedOffset);
  }
-
  } // end namespace AMDGPU
+
  } // end namespace llvm
+
+const unsigned AMDGPUAS::MAX_COMMON_ADDRESS;
+const unsigned AMDGPUAS::GLOBAL_ADDRESS;
+const unsigned AMDGPUAS::LOCAL_ADDRESS;
+const unsigned AMDGPUAS::PARAM_D_ADDRESS;
+const unsigned AMDGPUAS::PARAM_I_ADDRESS;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_0;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_1;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_2;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_3;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_4;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_5;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_6;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_7;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_8;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_9;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_10;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_11;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_12;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_13;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_14;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_15;
+const unsigned AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
+
+namespace llvm {
+namespace AMDGPU {
+
+AMDGPUAS getAMDGPUAS(Triple T) {
+  auto Env = T.getEnvironmentName();
+  AMDGPUAS AS;
+  if (Env == "amdgiz" || Env == "amdgizcl") {
+    AS.FLAT_ADDRESS     = 0;
+    AS.CONSTANT_ADDRESS = 4;
+    AS.PRIVATE_ADDRESS  = 5;
+    AS.REGION_ADDRESS   = 2;
+  }
+  else {
+    AS.FLAT_ADDRESS     = 4;
+    AS.CONSTANT_ADDRESS = 2;
+    AS.PRIVATE_ADDRESS  = 0;
+    AS.REGION_ADDRESS   = 5;
+   }
+  return AS;
+}
+
+AMDGPUAS getAMDGPUAS(const TargetMachine &M) {
+  return getAMDGPUAS(M.getTargetTriple());
+}
+
+AMDGPUAS getAMDGPUAS(const Module &M) {
+  return getAMDGPUAS(Triple(M.getTargetTriple()));
+}
+} // namespace AMDGPU
+} // namespace llvm
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

index 9617156..0ce9028 100644 (file)
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -10,6 +10,7 @@
  #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
  #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
  
+#include "AMDGPU.h"
  #include "AMDKernelCodeT.h"
  #include "SIDefines.h"
  #include "llvm/ADT/StringRef.h"
@@ -160,9 +161,9 @@ MCSection *getHSADataGlobalProgramSection(MCContext &Ctx);
  
  MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx);
  
-bool isGroupSegment(const GlobalValue *GV);
-bool isGlobalSegment(const GlobalValue *GV);
-bool isReadOnlySegment(const GlobalValue *GV);
+bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS);
+bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS);
+bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS);
  
  /// \returns True if constants should be emitted to .text section for given
  /// target triple \p TT, false otherwise.
author	Yaxun Liu <Yaxun.Liu@amd.com>
	Mon, 27 Mar 2017 14:04:01 +0000 (14:04 +0000)
committer	Yaxun Liu <Yaxun.Liu@amd.com>
	Mon, 27 Mar 2017 14:04:01 +0000 (14:04 +0000)
lib/Target/AMDGPU/AMDGPU.h		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUAliasAnalysis.h		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUAsmPrinter.h		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUCallLowering.cpp		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUCallLowering.h		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUISelLowering.cpp		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUISelLowering.h		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUInstrInfo.cpp		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUInstrInfo.h		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUInstructionSelector.h		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUInstructions.td		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUSubtarget.cpp		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUSubtarget.h		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUTargetMachine.cpp		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUTargetMachine.h		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUTargetObjectFile.h		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h		patch \| blob \| history
lib/Target/AMDGPU/BUFInstructions.td		patch \| blob \| history
lib/Target/AMDGPU/FLATInstructions.td		patch \| blob \| history
lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp		patch \| blob \| history
lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h		patch \| blob \| history
lib/Target/AMDGPU/R600ISelLowering.cpp		patch \| blob \| history
lib/Target/AMDGPU/R600Instructions.td		patch \| blob \| history
lib/Target/AMDGPU/SIFrameLowering.cpp		patch \| blob \| history
lib/Target/AMDGPU/SIISelLowering.cpp		patch \| blob \| history
lib/Target/AMDGPU/SIInstrInfo.cpp		patch \| blob \| history
lib/Target/AMDGPU/SIInstrInfo.td		patch \| blob \| history
lib/Target/AMDGPU/SMInstructions.td		patch \| blob \| history
lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp		patch \| blob \| history
lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h		patch \| blob \| history