AMDGPU: Cleanup subtarget features

author Matt Arsenault <Matthew.Arsenault@amd.com>

Mon, 7 Aug 2017 14:58:04 +0000 (14:58 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Mon, 7 Aug 2017 14:58:04 +0000 (14:58 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Mon, 7 Aug 2017 14:58:04 +0000 (14:58 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Mon, 7 Aug 2017 14:58:04 +0000 (14:58 +0000)
diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td

index 6d0bb5e..959a43b 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPU.td
+++ b/lib/Target/AMDGPU/AMDGPU.td
@@ -172,12 +172,6 @@ def FeatureGCN : SubtargetFeature<"gcn",
    "GCN or newer GPU"
  >;
  
-def FeatureGCN1Encoding : SubtargetFeature<"gcn1-encoding",
-  "GCN1Encoding",
-  "true",
-  "Encoding format for SI and CI"
->;
-
  def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding",
    "GCN3Encoding",
    "true",
@@ -442,14 +436,14 @@ def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
  
  def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
    [FeatureFP64, FeatureLocalMemorySize32768,
-  FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding,
+  FeatureWavefrontSize64, FeatureGCN,
    FeatureLDSBankCount32, FeatureMovrel]
  >;
  
  def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
    [FeatureFP64, FeatureLocalMemorySize65536,
    FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
-  FeatureGCN1Encoding, FeatureCIInsts, FeatureMovrel]
+  FeatureCIInsts, FeatureMovrel]
  >;
  
  def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
@@ -667,7 +661,7 @@ def TruePredicate : Predicate<"true">;
  def isSICI : Predicate<
    "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
    "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
->, AssemblerPredicate<"FeatureGCN1Encoding">;
+>, AssemblerPredicate<"!FeatureGCN3Encoding">;
  
  def isVI : Predicate <
    "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

index b696118..deba76a 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -50,7 +50,7 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
  
    SmallString<256> FullFS("+promote-alloca,+fp64-fp16-denormals,+dx10-clamp,+load-store-opt,");
    if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
-    FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,";
+    FullFS += "+flat-address-space,+flat-for-global,+unaligned-buffer-access,+trap-handler,";
  
    FullFS += FS;
  
@@ -75,6 +75,18 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
    if (MaxPrivateElementSize == 0)
      MaxPrivateElementSize = 4;
  
+  if (LDSBankCount == 0)
+    LDSBankCount = 32;
+
+  if (TT.getArch() == Triple::amdgcn) {
+    if (LocalMemorySize == 0)
+      LocalMemorySize = 32768;
+
+    // Do something sensible for unspecified target.
+    if (!HasMovrel && !HasVGPRIndexMode)
+      HasMovrel = true;
+  }
+
    return *this;
  }
  
@@ -117,7 +129,6 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
  
      FP64(false),
      IsGCN(false),
-    GCN1Encoding(false),
      GCN3Encoding(false),
      CIInsts(false),
      GFX9Insts(false),
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h

index 346788d..8aad97c 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -139,7 +139,6 @@ protected:
    // Subtarget statically properties set by tablegen
    bool FP64;
    bool IsGCN;
-  bool GCN1Encoding;
    bool GCN3Encoding;
    bool CIInsts;
    bool GFX9Insts;
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

index 7a8fd01..3ad6104 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -256,9 +256,8 @@ static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) {
    if (!GPU.empty())
      return GPU;
  
-  // HSA only supports CI+, so change the default GPU to a CI for HSA.
    if (TT.getArch() == Triple::amdgcn)
-    return (TT.getOS() == Triple::AMDHSA) ? "kaveri" : "tahiti";
+    return "generic";
  
    return "r600";
  }
diff --git a/lib/Target/AMDGPU/FLATInstructions.td b/lib/Target/AMDGPU/FLATInstructions.td

index df9fcff..a47bb52 100644 (file)
--- a/lib/Target/AMDGPU/FLATInstructions.td
+++ b/lib/Target/AMDGPU/FLATInstructions.td
@@ -25,8 +25,6 @@ class FLAT_Pseudo<string opName, dag outs, dag ins,
    let isPseudo = 1;
    let isCodeGenOnly = 1;
  
-  let SubtargetPredicate = isCIVI;
-
    let FLAT = 1;
  
    let UseNamedOperandTable = 1;
@@ -55,6 +53,9 @@ class FLAT_Pseudo<string opName, dag outs, dag ins,
    bits<1> has_glc  = 1;
    bits<1> glcValue = 0;
  
+  let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts,
+    !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace));
+
    // TODO: M0 if it could possibly access LDS (before gfx9? only)?
    let Uses = !if(is_flat_global, [EXEC], [EXEC, FLAT_SCR]);
  
@@ -667,7 +668,7 @@ class FlatSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType v
    (inst $vaddr, $data, $offset, $slc)
  >;
  
-let Predicates = [isCIVI] in {
+let Predicates = [HasFlatAddressSpace] in {
  
  def : FlatLoadPat <FLAT_LOAD_UBYTE, flat_az_extloadi8, i32>;
  def : FlatLoadPat <FLAT_LOAD_SBYTE, flat_sextloadi8, i32>;
@@ -719,13 +720,9 @@ def : FlatAtomicPat <FLAT_ATOMIC_SWAP_X2_RTN, atomic_swap_global, i64>;
  def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global, i64, v2i64>;
  def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_xor_global, i64>;
  
-} // End Predicates = [isCIVI]
-
-let Predicates = [isVI] in {
-  def : FlatStorePat <FLAT_STORE_BYTE, flat_truncstorei8, i16>;
-  def : FlatStorePat <FLAT_STORE_SHORT, flat_store, i16>;
-}
-
+def : FlatStorePat <FLAT_STORE_BYTE, flat_truncstorei8, i16>;
+def : FlatStorePat <FLAT_STORE_SHORT, flat_store, i16>;
+} // End Predicates = [HasFlatAddressSpace]
  
  let Predicates = [HasFlatGlobalInsts], AddedComplexity = 10 in {
  
diff --git a/lib/Target/AMDGPU/Processors.td b/lib/Target/AMDGPU/Processors.td

index d30d1d3..23bf664 100644 (file)
--- a/lib/Target/AMDGPU/Processors.td
+++ b/lib/Target/AMDGPU/Processors.td
@@ -10,6 +10,12 @@
  class Proc<string Name, ProcessorItineraries itin, list<SubtargetFeature> Features>
  : Processor<Name, itin, Features>;
  
+// The code produced for "generic" is only useful for tests and cannot
+// reasonably be expected to execute on any particular target.
+def : ProcessorModel<"generic", NoSchedModel, [
+  FeatureGCN
+]>;
+
  //===----------------------------------------------------------------------===//
  // R600
  //===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

index f6d5853..03c9f7f 100644 (file)
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -531,6 +531,10 @@ bool isGFX9(const MCSubtargetInfo &STI) {
    return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
  }
  
+bool isGCN3Encoding(const MCSubtargetInfo &STI) {
+  return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
+}
+
  bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
    const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
    const unsigned FirstSubReg = TRI->getSubReg(Reg, 1);
@@ -773,16 +777,15 @@ bool isUniformMMO(const MachineMemOperand *MMO) {
  }
  
  int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
-  if (isSI(ST) || isCI(ST))
-    return ByteOffset >> 2;
-
-  return ByteOffset;
+  if (isGCN3Encoding(ST))
+    return ByteOffset;
+  return ByteOffset >> 2;
  }
  
  bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
    int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
-  return isSI(ST) || isCI(ST) ? isUInt<8>(EncodedOffset) :
-                                isUInt<20>(EncodedOffset);
+  return isGCN3Encoding(ST) ?
+    isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset);
  }
  } // end namespace AMDGPU
  
diff --git a/test/CodeGen/AMDGPU/addrspacecast.ll b/test/CodeGen/AMDGPU/addrspacecast.ll

index 93f6d88..901b197 100644 (file)
--- a/test/CodeGen/AMDGPU/addrspacecast.ll
+++ b/test/CodeGen/AMDGPU/addrspacecast.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=CI %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=CI %s
  ; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=GFX9 %s
  
  ; HSA-LABEL: {{^}}use_group_to_flat_addrspacecast:
diff --git a/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll b/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll

index daa3442..90bc6a2 100644 (file)
--- a/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll
+++ b/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll
@@ -1,5 +1,5 @@
-; RUN: llc -verify-machineinstrs -march=amdgcn -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s
-; RUN: llc -verify-machineinstrs -march=amdgcn -mattr=+promote-alloca < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
+; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s
+; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tahiti -mattr=+promote-alloca < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
  
  declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
  declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1
diff --git a/test/CodeGen/AMDGPU/bitcast-vector-extract.ll b/test/CodeGen/AMDGPU/bitcast-vector-extract.ll

index cf95f74..722099f 100644 (file)
--- a/test/CodeGen/AMDGPU/bitcast-vector-extract.ll
+++ b/test/CodeGen/AMDGPU/bitcast-vector-extract.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
  ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
  
  ; The bitcast should be pushed through the bitcasts so the vectors can
diff --git a/test/CodeGen/AMDGPU/br_cc.f16.ll b/test/CodeGen/AMDGPU/br_cc.f16.ll

index b7a0c87..283384d 100644 (file)
--- a/test/CodeGen/AMDGPU/br_cc.f16.ll
+++ b/test/CodeGen/AMDGPU/br_cc.f16.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
  ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
  
  ; GCN-LABEL: {{^}}br_cc_f16:
diff --git a/test/CodeGen/AMDGPU/branch-relaxation.ll b/test/CodeGen/AMDGPU/branch-relaxation.ll

index 233d5a5..661a6bd 100644 (file)
--- a/test/CodeGen/AMDGPU/branch-relaxation.ll
+++ b/test/CodeGen/AMDGPU/branch-relaxation.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs -amdgpu-s-branch-bits=4 < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -amdgpu-s-branch-bits=4 < %s | FileCheck -check-prefix=GCN %s
  
  
  ; FIXME: We should use llvm-mc for this, but we can't even parse our own output.
diff --git a/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll b/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll

index 11e1a95..1518c0e 100644 (file)
--- a/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
+++ b/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s
  ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
  
  ; GCN-LABEL: {{^}}use_dispatch_ptr:
diff --git a/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll b/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll

index 10ccce6..50da6b4 100644 (file)
--- a/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
+++ b/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
  
  ; GCN-LABEL: {{^}}use_workitem_id_x:
  ; GCN: s_waitcnt
diff --git a/test/CodeGen/AMDGPU/captured-frame-index.ll b/test/CodeGen/AMDGPU/captured-frame-index.ll

index 5fe1b27..99f6ec4 100644 (file)
--- a/test/CodeGen/AMDGPU/captured-frame-index.ll
+++ b/test/CodeGen/AMDGPU/captured-frame-index.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=amdgcn-- -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
  
  ; GCN-LABEL: {{^}}store_fi_lifetime:
  ; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4{{$}}
diff --git a/test/CodeGen/AMDGPU/collapse-endcf.ll b/test/CodeGen/AMDGPU/collapse-endcf.ll

index f4199d4..ec3bbc8 100644 (file)
--- a/test/CodeGen/AMDGPU/collapse-endcf.ll
+++ b/test/CodeGen/AMDGPU/collapse-endcf.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
  
  ; GCN-LABEL: {{^}}simple_nested_if:
  ; GCN:      s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]]
diff --git a/test/CodeGen/AMDGPU/ctpop.ll b/test/CodeGen/AMDGPU/ctpop.ll

index 68b39ba..4a6743e 100644 (file)
--- a/test/CodeGen/AMDGPU/ctpop.ll
+++ b/test/CodeGen/AMDGPU/ctpop.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=SI %s
  ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=VI %s
  ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
  
diff --git a/test/CodeGen/AMDGPU/ctpop64.ll b/test/CodeGen/AMDGPU/ctpop64.ll

index 4850370..8d2dac9 100644 (file)
--- a/test/CodeGen/AMDGPU/ctpop64.ll
+++ b/test/CodeGen/AMDGPU/ctpop64.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
  ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
  
  declare i32 @llvm.r600.read.tidig.x() nounwind readnone
diff --git a/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll

index f839129..0600633 100644 (file)
--- a/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
+++ b/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
  ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
  
  declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -12,7 +12,7 @@ declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone
  ; GCN: buffer_store_dword [[CONV]],
  define amdgpu_kernel void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind {
    %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr i8, i8 addrspace(1)* %in, i32 %tid 
+  %gep = getelementptr i8, i8 addrspace(1)* %in, i32 %tid
    %load = load i8, i8 addrspace(1)* %gep, align 1
    %cvt = uitofp i8 %load to float
    store float %cvt, float addrspace(1)* %out, align 4
@@ -26,7 +26,7 @@ define amdgpu_kernel void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 a
  ; GCN: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
  define amdgpu_kernel void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8> addrspace(1)* noalias %in) nounwind {
    %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr <2 x i8>, <2 x i8> addrspace(1)* %in, i32 %tid 
+  %gep = getelementptr <2 x i8>, <2 x i8> addrspace(1)* %in, i32 %tid
    %load = load <2 x i8>, <2 x i8> addrspace(1)* %gep, align 2
    %cvt = uitofp <2 x i8> %load to <2 x float>
    store <2 x float> %cvt, <2 x float> addrspace(1)* %out, align 16
@@ -42,7 +42,7 @@ define amdgpu_kernel void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias
  ; GCN: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
  define amdgpu_kernel void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8> addrspace(1)* noalias %in) nounwind {
    %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %in, i32 %tid 
+  %gep = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %in, i32 %tid
    %load = load <3 x i8>, <3 x i8> addrspace(1)* %gep, align 4
    %cvt = uitofp <3 x i8> %load to <3 x float>
    store <3 x float> %cvt, <3 x float> addrspace(1)* %out, align 16
@@ -60,7 +60,7 @@ define amdgpu_kernel void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias
  ; GCN: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
  define amdgpu_kernel void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
    %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid 
+  %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid
    %load = load <4 x i8>, <4 x i8> addrspace(1)* %gep, align 4
    %cvt = uitofp <4 x i8> %load to <4 x float>
    store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
@@ -86,7 +86,7 @@ define amdgpu_kernel void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias
  ; GCN: buffer_store_dwordx4
  define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
    %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid 
+  %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid
    %load = load <4 x i8>, <4 x i8> addrspace(1)* %gep, align 1
    %cvt = uitofp <4 x i8> %load to <4 x float>
    store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
@@ -135,7 +135,7 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* n
  ; GCN: s_endpgm
  define amdgpu_kernel void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8> addrspace(1)* noalias %in) nounwind {
    %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr <7 x i8>, <7 x i8> addrspace(1)* %in, i32 %tid 
+  %gep = getelementptr <7 x i8>, <7 x i8> addrspace(1)* %in, i32 %tid
    %load = load <7 x i8>, <7 x i8> addrspace(1)* %gep, align 1
    %cvt = uitofp <7 x i8> %load to <7 x float>
    store <7 x float> %cvt, <7 x float> addrspace(1)* %out, align 16
@@ -160,7 +160,7 @@ define amdgpu_kernel void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias
  ; GCN: buffer_store_dwordx4
  define amdgpu_kernel void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind {
    %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr <8 x i8>, <8 x i8> addrspace(1)* %in, i32 %tid 
+  %gep = getelementptr <8 x i8>, <8 x i8> addrspace(1)* %in, i32 %tid
    %load = load <8 x i8>, <8 x i8> addrspace(1)* %gep, align 8
    %cvt = uitofp <8 x i8> %load to <8 x float>
    store <8 x float> %cvt, <8 x float> addrspace(1)* %out, align 16
@@ -174,7 +174,7 @@ define amdgpu_kernel void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias
  ; GCN: buffer_store_dword [[CONV]],
  define amdgpu_kernel void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
    %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid 
+  %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
    %load = load i32, i32 addrspace(1)* %gep, align 4
    %add = add i32 %load, 2
    %inreg = and i32 %add, 255
@@ -186,7 +186,7 @@ define amdgpu_kernel void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias
  ; GCN-LABEL: {{^}}i8_zext_inreg_hi1_to_f32:
  define amdgpu_kernel void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
    %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid 
+  %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
    %load = load i32, i32 addrspace(1)* %gep, align 4
    %inreg = and i32 %load, 65280
    %shr = lshr i32 %inreg, 8
@@ -200,7 +200,7 @@ define amdgpu_kernel void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias
  ; GCN-LABEL: {{^}}i8_zext_i32_to_f32:
  define amdgpu_kernel void @i8_zext_i32_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind {
    %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr i8, i8 addrspace(1)* %in, i32 %tid 
+  %gep = getelementptr i8, i8 addrspace(1)* %in, i32 %tid
    %load = load i8, i8 addrspace(1)* %gep, align 1
    %ext = zext i8 %load to i32
    %cvt = uitofp i32 %ext to float
@@ -211,7 +211,7 @@ define amdgpu_kernel void @i8_zext_i32_to_f32(float addrspace(1)* noalias %out,
  ; GCN-LABEL: {{^}}v4i8_zext_v4i32_to_v4f32:
  define amdgpu_kernel void @v4i8_zext_v4i32_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
    %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid 
+  %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid
    %load = load <4 x i8>, <4 x i8> addrspace(1)* %gep, align 1
    %ext = zext <4 x i8> %load to <4 x i32>
    %cvt = uitofp <4 x i32> %ext to <4 x float>
@@ -226,7 +226,7 @@ define amdgpu_kernel void @v4i8_zext_v4i32_to_v4f32(<4 x float> addrspace(1)* no
  ; GCN: buffer_store_dword [[CONV]]
  define amdgpu_kernel void @extract_byte0_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
    %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid 
+  %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
    %val = load i32, i32 addrspace(1)* %gep
    %and = and i32 %val, 255
    %cvt = uitofp i32 %and to float
@@ -241,7 +241,7 @@ define amdgpu_kernel void @extract_byte0_to_f32(float addrspace(1)* noalias %out
  ; GCN: buffer_store_dword [[CONV]]
  define amdgpu_kernel void @extract_byte1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
    %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid 
+  %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
    %val = load i32, i32 addrspace(1)* %gep
    %srl = lshr i32 %val, 8
    %and = and i32 %srl, 255
@@ -257,7 +257,7 @@ define amdgpu_kernel void @extract_byte1_to_f32(float addrspace(1)* noalias %out
  ; GCN: buffer_store_dword [[CONV]]
  define amdgpu_kernel void @extract_byte2_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
    %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid 
+  %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
    %val = load i32, i32 addrspace(1)* %gep
    %srl = lshr i32 %val, 16
    %and = and i32 %srl, 255
@@ -273,7 +273,7 @@ define amdgpu_kernel void @extract_byte2_to_f32(float addrspace(1)* noalias %out
  ; GCN: buffer_store_dword [[CONV]]
  define amdgpu_kernel void @extract_byte3_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
    %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid 
+  %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
    %val = load i32, i32 addrspace(1)* %gep
    %srl = lshr i32 %val, 24
    %and = and i32 %srl, 255
diff --git a/test/CodeGen/AMDGPU/fabs.f16.ll b/test/CodeGen/AMDGPU/fabs.f16.ll

index b90290e..9da2479 100644 (file)
--- a/test/CodeGen/AMDGPU/fabs.f16.ll
+++ b/test/CodeGen/AMDGPU/fabs.f16.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
  ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
  ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
  
diff --git a/test/CodeGen/AMDGPU/fadd.f16.ll b/test/CodeGen/AMDGPU/fadd.f16.ll

index 88b3be0..d32b620 100644 (file)
--- a/test/CodeGen/AMDGPU/fadd.f16.ll
+++ b/test/CodeGen/AMDGPU/fadd.f16.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
  ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
  
  ; GCN-LABEL: {{^}}fadd_f16
diff --git a/test/CodeGen/AMDGPU/fcopysign.f16.ll b/test/CodeGen/AMDGPU/fcopysign.f16.ll

index 335c680..2f24f5f 100644 (file)
--- a/test/CodeGen/AMDGPU/fcopysign.f16.ll
+++ b/test/CodeGen/AMDGPU/fcopysign.f16.ll
@@ -1,6 +1,6 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX8 %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX8 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s
  
  declare half @llvm.copysign.f16(half, half)
  declare float @llvm.copysign.f32(float, float)
diff --git a/test/CodeGen/AMDGPU/fence-amdgiz.ll b/test/CodeGen/AMDGPU/fence-amdgiz.ll

index 98e9d24..3055f32 100644 (file)
--- a/test/CodeGen/AMDGPU/fence-amdgiz.ll
+++ b/test/CodeGen/AMDGPU/fence-amdgiz.ll
@@ -1,7 +1,6 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgizcl -mcpu=kaveri < %s | FileCheck %s
  
  target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
-target triple = "amdgcn-amd-amdhsa-amdgizcl"
  
  ; CHECK-LABEL: atomic_fence
  ; CHECK:       BB#0:
diff --git a/test/CodeGen/AMDGPU/fmax3.f64.ll b/test/CodeGen/AMDGPU/fmax3.f64.ll

index 8b9104b..a56a586 100644 (file)
--- a/test/CodeGen/AMDGPU/fmax3.f64.ll
+++ b/test/CodeGen/AMDGPU/fmax3.f64.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
  ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
  
  declare double @llvm.maxnum.f64(double, double) nounwind readnone
diff --git a/test/CodeGen/AMDGPU/fmul.f16.ll b/test/CodeGen/AMDGPU/fmul.f16.ll

index 5f120f6..fb8aeb8 100644 (file)
--- a/test/CodeGen/AMDGPU/fmul.f16.ll
+++ b/test/CodeGen/AMDGPU/fmul.f16.ll
@@ -1,5 +1,5 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
  
  ; GCN-LABEL: {{^}}fmul_f16
  ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
diff --git a/test/CodeGen/AMDGPU/fpext.f16.ll b/test/CodeGen/AMDGPU/fpext.f16.ll

index ec19fd1..ae82973 100644 (file)
--- a/test/CodeGen/AMDGPU/fpext.f16.ll
+++ b/test/CodeGen/AMDGPU/fpext.f16.ll
@@ -1,6 +1,6 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s
  
  ; GCN-LABEL: {{^}}fpext_f16_to_f32
  ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
diff --git a/test/CodeGen/AMDGPU/fptosi.f16.ll b/test/CodeGen/AMDGPU/fptosi.f16.ll

index f593030..57d8c62 100644 (file)
--- a/test/CodeGen/AMDGPU/fptosi.f16.ll
+++ b/test/CodeGen/AMDGPU/fptosi.f16.ll
@@ -57,8 +57,8 @@ entry:
  ; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
  ; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
  ; SI: v_cvt_i32_f32_e32 v[[R_I16_0:[0-9]+]], v[[A_F32_0]]
-; SI: v_cvt_i32_f32_e32 v[[R_I16_1:[0-9]+]], v[[A_F32_1]]
-; SI: v_and_b32_e32 v[[R_I16_LO:[0-9]+]], 0xffff, v[[R_I16_0]]
+; SI-DAG: v_cvt_i32_f32_e32 v[[R_I16_1:[0-9]+]], v[[A_F32_1]]
+; SI-DAG: v_and_b32_e32 v[[R_I16_LO:[0-9]+]], 0xffff, v[[R_I16_0]]
  ; SI: v_lshlrev_b32_e32 v[[R_I16_HI:[0-9]+]], 16, v[[R_I16_1]]
  ; SI: v_or_b32_e32 v[[R_V2_I16:[0-9]+]], v[[R_I16_LO]], v[[R_I16_HI]]
  
diff --git a/test/CodeGen/AMDGPU/fptoui.f16.ll b/test/CodeGen/AMDGPU/fptoui.f16.ll

index cebe330..e2c8aac 100644 (file)
--- a/test/CodeGen/AMDGPU/fptoui.f16.ll
+++ b/test/CodeGen/AMDGPU/fptoui.f16.ll
@@ -1,5 +1,5 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
  
  ; GCN-LABEL: {{^}}fptoui_f16_to_i16
  ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
diff --git a/test/CodeGen/AMDGPU/frame-index-amdgiz.ll b/test/CodeGen/AMDGPU/frame-index-amdgiz.ll

index dd46403..47716a4 100644 (file)
--- a/test/CodeGen/AMDGPU/frame-index-amdgiz.ll
+++ b/test/CodeGen/AMDGPU/frame-index-amdgiz.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=amdgcn---amdgiz -mcpu=kaveri -verify-machineinstrs < %s | FileCheck %s
  ;
  ; The original OpenCL kernel:
  ; kernel void f(global int *a, int i,  int j) {
@@ -6,10 +6,9 @@
  ;  x[i] = 7;
  ;  a[0] = x[j];
  ; }
-; clang -cc1 -triple amdgcn---amdgizcl -emit-llvm -o - 
+; clang -cc1 -triple amdgcn---amdgizcl -emit-llvm -o -
  
  target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
-target triple = "amdgcn---amdgiz"
  
  define amdgpu_kernel void @f(i32 addrspace(1)* nocapture %a, i32 %i, i32 %j) local_unnamed_addr #0 {
  entry:
diff --git a/test/CodeGen/AMDGPU/frame-index-elimination.ll b/test/CodeGen/AMDGPU/frame-index-elimination.ll

index 61b0982..b98428a 100644 (file)
--- a/test/CodeGen/AMDGPU/frame-index-elimination.ll
+++ b/test/CodeGen/AMDGPU/frame-index-elimination.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
  
  ; Test that non-entry function frame indices are expanded properly to
  ; give an index relative to the scratch wave offset register
diff --git a/test/CodeGen/AMDGPU/fsub.f16.ll b/test/CodeGen/AMDGPU/fsub.f16.ll

index 15a4ce2..5e3668a 100644 (file)
--- a/test/CodeGen/AMDGPU/fsub.f16.ll
+++ b/test/CodeGen/AMDGPU/fsub.f16.ll
@@ -1,6 +1,6 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=VI -check-prefix=SIVI %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=VI -check-prefix=SIVI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s
  
  ; GCN-LABEL: {{^}}fsub_f16:
  ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
diff --git a/test/CodeGen/AMDGPU/half.ll b/test/CodeGen/AMDGPU/half.ll

index 43745d4..0df5851 100644 (file)
--- a/test/CodeGen/AMDGPU/half.ll
+++ b/test/CodeGen/AMDGPU/half.ll
@@ -1,5 +1,5 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
  
  ; half args should be promoted to float for SI and lower.
  
diff --git a/test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll b/test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll

index 8e207a3..b287340 100644 (file)
--- a/test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll
+++ b/test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -O0 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s
  
  ; FIXME: Merge into indirect-addressing-si.ll
  
diff --git a/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/test/CodeGen/AMDGPU/indirect-addressing-si.ll

index 799be7f..906a1f1 100644 (file)
--- a/test/CodeGen/AMDGPU/indirect-addressing-si.ll
+++ b/test/CodeGen/AMDGPU/indirect-addressing-si.ll
@@ -1,7 +1,7 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=MOVREL %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=MOVREL %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-vgpr-index-mode -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=IDXMODE %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=IDXMODE %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=MOVREL %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=MOVREL %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-vgpr-index-mode -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=IDXMODE %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=IDXMODE %s
  
  ; Tests for indirect addressing on SI, which is implemented using dynamic
  ; indexing of vectors.
diff --git a/test/CodeGen/AMDGPU/insert_vector_elt.ll b/test/CodeGen/AMDGPU/insert_vector_elt.ll

index 6391b6b..463e7ba 100644 (file)
--- a/test/CodeGen/AMDGPU/insert_vector_elt.ll
+++ b/test/CodeGen/AMDGPU/insert_vector_elt.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -march=amdgcn -mattr=+max-private-element-size-16 < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=GCN-NO-TONGA %s
+; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tahiti -mattr=+max-private-element-size-16 < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=GCN-NO-TONGA %s
  ; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=+max-private-element-size-16 < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=GCN-TONGA %s
  
  ; FIXME: Broken on evergreen
diff --git a/test/CodeGen/AMDGPU/llvm.ceil.f16.ll b/test/CodeGen/AMDGPU/llvm.ceil.f16.ll

index 071f2a6..0f3cf79 100644 (file)
--- a/test/CodeGen/AMDGPU/llvm.ceil.f16.ll
+++ b/test/CodeGen/AMDGPU/llvm.ceil.f16.ll
@@ -1,5 +1,5 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
  
  declare half @llvm.ceil.f16(half %a)
  declare <2 x half> @llvm.ceil.v2f16(<2 x half> %a)
diff --git a/test/CodeGen/AMDGPU/llvm.cos.f16.ll b/test/CodeGen/AMDGPU/llvm.cos.f16.ll

index 8931de6..16f167a 100644 (file)
--- a/test/CodeGen/AMDGPU/llvm.cos.f16.ll
+++ b/test/CodeGen/AMDGPU/llvm.cos.f16.ll
@@ -1,5 +1,5 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
  
  declare half @llvm.cos.f16(half %a)
  declare <2 x half> @llvm.cos.v2f16(<2 x half> %a)
diff --git a/test/CodeGen/AMDGPU/llvm.exp2.f16.ll b/test/CodeGen/AMDGPU/llvm.exp2.f16.ll

index 4e96a76..5764224 100644 (file)
--- a/test/CodeGen/AMDGPU/llvm.exp2.f16.ll
+++ b/test/CodeGen/AMDGPU/llvm.exp2.f16.ll
@@ -1,5 +1,5 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
  
  declare half @llvm.exp2.f16(half %a)
  declare <2 x half> @llvm.exp2.v2f16(<2 x half> %a)
diff --git a/test/CodeGen/AMDGPU/llvm.floor.f16.ll b/test/CodeGen/AMDGPU/llvm.floor.f16.ll

index 74d1e69..907fd48 100644 (file)
--- a/test/CodeGen/AMDGPU/llvm.floor.f16.ll
+++ b/test/CodeGen/AMDGPU/llvm.floor.f16.ll
@@ -1,5 +1,5 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
  
  declare half @llvm.floor.f16(half %a)
  declare <2 x half> @llvm.floor.v2f16(<2 x half> %a)
diff --git a/test/CodeGen/AMDGPU/llvm.fma.f16.ll b/test/CodeGen/AMDGPU/llvm.fma.f16.ll

index a379b18..738f429 100644 (file)
--- a/test/CodeGen/AMDGPU/llvm.fma.f16.ll
+++ b/test/CodeGen/AMDGPU/llvm.fma.f16.ll
@@ -1,5 +1,5 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
  
  declare half @llvm.fma.f16(half %a, half %b, half %c)
  declare <2 x half> @llvm.fma.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c)
diff --git a/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll b/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll

index 2d4fe08..add22db 100644 (file)
--- a/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
+++ b/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
@@ -1,7 +1,7 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-FLUSH %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-DENORM %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-FLUSH %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-DENORM %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s
  
  declare half @llvm.fmuladd.f16(half %a, half %b, half %c)
  declare <2 x half> @llvm.fmuladd.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c)
diff --git a/test/CodeGen/AMDGPU/llvm.log2.f16.ll b/test/CodeGen/AMDGPU/llvm.log2.f16.ll

index 277195c..7228c40 100644 (file)
--- a/test/CodeGen/AMDGPU/llvm.log2.f16.ll
+++ b/test/CodeGen/AMDGPU/llvm.log2.f16.ll
@@ -1,5 +1,5 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
  
  declare half @llvm.log2.f16(half %a)
  declare <2 x half> @llvm.log2.v2f16(<2 x half> %a)
diff --git a/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll b/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll

index c727164..59a4f61 100644 (file)
--- a/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
+++ b/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
@@ -1,5 +1,5 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
  
  declare half @llvm.maxnum.f16(half %a, half %b)
  declare <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> %b)
diff --git a/test/CodeGen/AMDGPU/llvm.memcpy.ll b/test/CodeGen/AMDGPU/llvm.memcpy.ll

index 7b4db55..4068c02 100644 (file)
--- a/test/CodeGen/AMDGPU/llvm.memcpy.ll
+++ b/test/CodeGen/AMDGPU/llvm.memcpy.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
  ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
  
  declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/AMDGPU/llvm.minnum.f16.ll b/test/CodeGen/AMDGPU/llvm.minnum.f16.ll

index 0e93acc..5fcbd1e 100644 (file)
--- a/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
+++ b/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
@@ -1,5 +1,5 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
  
  declare half @llvm.minnum.f16(half %a, half %b)
  declare <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %b)
diff --git a/test/CodeGen/AMDGPU/llvm.rint.f16.ll b/test/CodeGen/AMDGPU/llvm.rint.f16.ll

index 9228208..0e60f27 100644 (file)
--- a/test/CodeGen/AMDGPU/llvm.rint.f16.ll
+++ b/test/CodeGen/AMDGPU/llvm.rint.f16.ll
@@ -1,6 +1,6 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SIVI -check-prefix=VI -check-prefix=GFX89 %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SIVI -check-prefix=VI -check-prefix=GFX89 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s
  
  declare half @llvm.rint.f16(half %a)
  declare <2 x half> @llvm.rint.v2f16(<2 x half> %a)
diff --git a/test/CodeGen/AMDGPU/llvm.round.ll b/test/CodeGen/AMDGPU/llvm.round.ll

index 7e29147..48f1efb 100644 (file)
--- a/test/CodeGen/AMDGPU/llvm.round.ll
+++ b/test/CodeGen/AMDGPU/llvm.round.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
  ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=FUNC %s
  ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 -check-prefix=FUNC %s
  ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
diff --git a/test/CodeGen/AMDGPU/llvm.sin.f16.ll b/test/CodeGen/AMDGPU/llvm.sin.f16.ll

index 08b9d9d..839ff06 100644 (file)
--- a/test/CodeGen/AMDGPU/llvm.sin.f16.ll
+++ b/test/CodeGen/AMDGPU/llvm.sin.f16.ll
@@ -1,5 +1,5 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
  
  declare half @llvm.sin.f16(half %a)
  declare <2 x half> @llvm.sin.v2f16(<2 x half> %a)
diff --git a/test/CodeGen/AMDGPU/llvm.sqrt.f16.ll b/test/CodeGen/AMDGPU/llvm.sqrt.f16.ll

index 0e1358e..48c8251 100644 (file)
--- a/test/CodeGen/AMDGPU/llvm.sqrt.f16.ll
+++ b/test/CodeGen/AMDGPU/llvm.sqrt.f16.ll
@@ -1,5 +1,5 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
  
  declare half @llvm.sqrt.f16(half %a)
  declare <2 x half> @llvm.sqrt.v2f16(<2 x half> %a)
diff --git a/test/CodeGen/AMDGPU/llvm.trunc.f16.ll b/test/CodeGen/AMDGPU/llvm.trunc.f16.ll

index 37ee4e9..a8ff7cb 100644 (file)
--- a/test/CodeGen/AMDGPU/llvm.trunc.f16.ll
+++ b/test/CodeGen/AMDGPU/llvm.trunc.f16.ll
@@ -1,5 +1,5 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
  
  declare half @llvm.trunc.f16(half %a)
  declare <2 x half> @llvm.trunc.v2f16(<2 x half> %a)
diff --git a/test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir b/test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir

index 768acf3..2a431fe 100644 (file)
--- a/test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir
+++ b/test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir
@@ -1,4 +1,4 @@
-# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass machine-scheduler -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -run-pass machine-scheduler -o - %s | FileCheck -check-prefix=GCN %s
  
  # GCN-LABEL: name: cluster_add_addc
  # GCN: S_NOP 0, implicit-def %vcc
diff --git a/test/CodeGen/AMDGPU/merge-stores.ll b/test/CodeGen/AMDGPU/merge-stores.ll

index 59aa89f..e26276b 100644 (file)
--- a/test/CodeGen/AMDGPU/merge-stores.ll
+++ b/test/CodeGen/AMDGPU/merge-stores.ll
@@ -1,5 +1,5 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=bonaire -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=bonaire -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s
  
  ; This test is mostly to test DAG store merging, so disable the vectorizer.
  ; Run with devices with different unaligned load restrictions.
diff --git a/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll b/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll

index 7a4a451..7343dd6 100644 (file)
--- a/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll
+++ b/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-promote-alloca < %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s
  
  declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i32, i1) #0
  declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i32, i1) #0
diff --git a/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll b/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll

index 9f22f20..653e259 100644 (file)
--- a/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll
+++ b/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll
@@ -1,5 +1,5 @@
-; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mattr=+promote-alloca < %s | FileCheck -check-prefix=NOOPTS -check-prefix=ALL %s
-; RUN: llc -O1 -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mattr=+promote-alloca < %s | FileCheck -check-prefix=OPTS -check-prefix=ALL %s
+; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -mattr=+promote-alloca < %s | FileCheck -check-prefix=NOOPTS -check-prefix=ALL %s
+; RUN: llc -O1 -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -mattr=+promote-alloca < %s | FileCheck -check-prefix=OPTS -check-prefix=ALL %s
  
  ; ALL-LABEL: {{^}}promote_alloca_i32_array_array:
  ; NOOPTS: workgroup_group_segment_byte_size = 0{{$}}
diff --git a/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll b/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll

index bf3bc49..137b41d 100644 (file)
--- a/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll
+++ b/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefix=GCN %s
  
  ; This shows that the amount of LDS estimate is sensitive to the order
  ; of the LDS globals.
diff --git a/test/CodeGen/AMDGPU/s_addk_i32.ll b/test/CodeGen/AMDGPU/s_addk_i32.ll

index deef24c..ac7a33a 100644 (file)
--- a/test/CodeGen/AMDGPU/s_addk_i32.ll
+++ b/test/CodeGen/AMDGPU/s_addk_i32.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
  ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
  
  ; SI-LABEL: {{^}}s_addk_i32_k0:
diff --git a/test/CodeGen/AMDGPU/s_mulk_i32.ll b/test/CodeGen/AMDGPU/s_mulk_i32.ll

index f6ed540..78f5e8a 100644 (file)
--- a/test/CodeGen/AMDGPU/s_mulk_i32.ll
+++ b/test/CodeGen/AMDGPU/s_mulk_i32.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
  ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
  
  ; SI-LABEL: {{^}}s_mulk_i32_k0:
diff --git a/test/CodeGen/AMDGPU/sad.ll b/test/CodeGen/AMDGPU/sad.ll

index ee56e90..5d981a2 100644 (file)
--- a/test/CodeGen/AMDGPU/sad.ll
+++ b/test/CodeGen/AMDGPU/sad.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
  
  ; GCN-LABEL: {{^}}v_sad_u32_pat1:
  ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
diff --git a/test/CodeGen/AMDGPU/schedule-kernel-arg-loads.ll b/test/CodeGen/AMDGPU/schedule-kernel-arg-loads.ll

index 5c47c16..fd34978 100644 (file)
--- a/test/CodeGen/AMDGPU/schedule-kernel-arg-loads.ll
+++ b/test/CodeGen/AMDGPU/schedule-kernel-arg-loads.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI -check-prefix=GCN %s
  ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=VI -check-prefix=GCN %s
  
  ; FUNC-LABEL: {{^}}cluster_arg_loads:
diff --git a/test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll b/test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll

index 0d19c1e..cae713b 100644 (file)
--- a/test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll
+++ b/test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -misched=gcn-minreg -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-; RUN: llc -march=amdgcn -misched=gcn-max-occupancy-experimental -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -misched=gcn-minreg -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -misched=gcn-max-occupancy-experimental -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
  ; RUN: llc -march=amdgcn -mcpu=fiji -misched=gcn-minreg -verify-machineinstrs < %s | FileCheck -check-prefix=VI %s
  ; RUN: llc -march=amdgcn -mcpu=fiji -misched=gcn-max-occupancy-experimental -verify-machineinstrs < %s | FileCheck -check-prefix=VI %s
  
diff --git a/test/CodeGen/AMDGPU/select-vectors.ll b/test/CodeGen/AMDGPU/select-vectors.ll

index b77ebcf..4f47bff 100644 (file)
--- a/test/CodeGen/AMDGPU/select-vectors.ll
+++ b/test/CodeGen/AMDGPU/select-vectors.ll
@@ -1,6 +1,6 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -verify-machineinstrs -march=amdgcn < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -verify-machineinstrs -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -verify-machineinstrs -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
  
  ; Test expansion of scalar selects on vectors.
  ; Evergreen not enabled since it seems to be having problems with doubles.
diff --git a/test/CodeGen/AMDGPU/select.f16.ll b/test/CodeGen/AMDGPU/select.f16.ll

index e79ce3a..ce024ed 100644 (file)
--- a/test/CodeGen/AMDGPU/select.f16.ll
+++ b/test/CodeGen/AMDGPU/select.f16.ll
@@ -1,5 +1,5 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
  
  ; GCN-LABEL: {{^}}select_f16:
  ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
diff --git a/test/CodeGen/AMDGPU/setcc-fneg-constant.ll b/test/CodeGen/AMDGPU/setcc-fneg-constant.ll

index bcaa1aa..22827b2 100644 (file)
--- a/test/CodeGen/AMDGPU/setcc-fneg-constant.ll
+++ b/test/CodeGen/AMDGPU/setcc-fneg-constant.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
  
  ; Test fcmp pred (fneg x), c -> fcmp (swapped pred) x, -c combine.
  
diff --git a/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll b/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll

index 2f9eed4..72767d2 100644 (file)
--- a/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll
+++ b/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll
@@ -1,4 +1,4 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
  
  ; Extract the high bit of the 1st quarter
  ; GCN-LABEL: {{^}}v_uextract_bit_31_i128:
diff --git a/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll b/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll

index 670287b..dae22ec 100644 (file)
--- a/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll
+++ b/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll
@@ -1,4 +1,5 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; FIXME: Fails with -enable-var-scope
  
  ; Make sure 64-bit BFE pattern does a 32-bit BFE on the relevant half.
  
diff --git a/test/CodeGen/AMDGPU/shl_add_constant.ll b/test/CodeGen/AMDGPU/shl_add_constant.ll

index 9da4bc0..26cdae3 100644 (file)
--- a/test/CodeGen/AMDGPU/shl_add_constant.ll
+++ b/test/CodeGen/AMDGPU/shl_add_constant.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
  
  declare i32 @llvm.amdgcn.workitem.id.x() #1
  
@@ -56,7 +56,7 @@ define amdgpu_kernel void @shl_2_add_999_i32(i32 addrspace(1)* %out, i32 addrspa
  ; FUNC-LABEL: {{^}}test_add_shl_add_constant:
  ; SI-DAG: s_load_dword [[X:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
  ; SI-DAG: s_load_dword [[Y:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: s_lshl_b32 [[SHL3:s[0-9]+]], [[X]], 3
+; SI-DAG: s_lshl_b32 [[SHL3:s[0-9]+]], [[X]], 3
  ; SI: s_add_i32 [[RESULT:s[0-9]+]], [[SHL3]], [[Y]]
  ; SI: s_addk_i32 [[RESULT]], 0x3d8
  ; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
diff --git a/test/CodeGen/AMDGPU/sitofp.f16.ll b/test/CodeGen/AMDGPU/sitofp.f16.ll

index 0bcef99..31d3466 100644 (file)
--- a/test/CodeGen/AMDGPU/sitofp.f16.ll
+++ b/test/CodeGen/AMDGPU/sitofp.f16.ll
@@ -1,5 +1,5 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
  
  ; GCN-LABEL: {{^}}sitofp_i16_to_f16
  ; GCN: buffer_load_{{sshort|ushort}} v[[A_I16:[0-9]+]]
diff --git a/test/CodeGen/AMDGPU/trunc.ll b/test/CodeGen/AMDGPU/trunc.ll

index da038f4..b390958 100644 (file)
--- a/test/CodeGen/AMDGPU/trunc.ll
+++ b/test/CodeGen/AMDGPU/trunc.ll
@@ -1,6 +1,6 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=fiji -verify-machineinstrs< %s | FileCheck -check-prefix=GCN -check-prefix=VI  %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs< %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -verify-machineinstrs< %s | FileCheck -check-prefix=GCN -check-prefix=VI  %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s
  
  declare i32 @llvm.r600.read.tidig.x() nounwind readnone
  
diff --git a/test/CodeGen/AMDGPU/udivrem.ll b/test/CodeGen/AMDGPU/udivrem.ll

index 9507a49..b89f326 100644 (file)
--- a/test/CodeGen/AMDGPU/udivrem.ll
+++ b/test/CodeGen/AMDGPU/udivrem.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s
  ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s
  ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s
  
diff --git a/test/CodeGen/AMDGPU/uitofp.f16.ll b/test/CodeGen/AMDGPU/uitofp.f16.ll

index eaa1d07..0e7250f 100644 (file)
--- a/test/CodeGen/AMDGPU/uitofp.f16.ll
+++ b/test/CodeGen/AMDGPU/uitofp.f16.ll
@@ -1,5 +1,5 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
  
  ; GCN-LABEL: {{^}}uitofp_i16_to_f16
  ; GCN: buffer_load_ushort v[[A_I16:[0-9]+]]
diff --git a/test/CodeGen/AMDGPU/v_mac_f16.ll b/test/CodeGen/AMDGPU/v_mac_f16.ll

index 46c9b7e..fe5e5a1 100644 (file)
--- a/test/CodeGen/AMDGPU/v_mac_f16.ll
+++ b/test/CodeGen/AMDGPU/v_mac_f16.ll
@@ -1,4 +1,4 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tahiti -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
  ; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
  
  ; GCN-LABEL: {{^}}mac_f16:
diff --git a/test/CodeGen/AMDGPU/vector-extract-insert.ll b/test/CodeGen/AMDGPU/vector-extract-insert.ll

index ab2bfcf..e3027d0 100644 (file)
--- a/test/CodeGen/AMDGPU/vector-extract-insert.ll
+++ b/test/CodeGen/AMDGPU/vector-extract-insert.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
  
  ; Test that when extracting the same unknown vector index from an
  ; insertelement the dynamic indexing is folded away.
@@ -81,4 +81,4 @@ define amdgpu_kernel void @extract_insert_same_dynelt_v4f32(float addrspace(1)*
  }
  
  attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
-\ No newline at end of file
+attributes #1 = { nounwind }
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Mon, 7 Aug 2017 14:58:04 +0000 (14:58 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Mon, 7 Aug 2017 14:58:04 +0000 (14:58 +0000)
lib/Target/AMDGPU/AMDGPU.td		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUSubtarget.cpp		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUSubtarget.h		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUTargetMachine.cpp		patch \| blob \| history
lib/Target/AMDGPU/FLATInstructions.td		patch \| blob \| history
lib/Target/AMDGPU/Processors.td		patch \| blob \| history
lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp		patch \| blob \| history
test/CodeGen/AMDGPU/addrspacecast.ll		patch \| blob \| history
test/CodeGen/AMDGPU/array-ptr-calc-i32.ll		patch \| blob \| history
test/CodeGen/AMDGPU/bitcast-vector-extract.ll		patch \| blob \| history
test/CodeGen/AMDGPU/br_cc.f16.ll		patch \| blob \| history
test/CodeGen/AMDGPU/branch-relaxation.ll		patch \| blob \| history
test/CodeGen/AMDGPU/callee-special-input-sgprs.ll		patch \| blob \| history
test/CodeGen/AMDGPU/callee-special-input-vgprs.ll		patch \| blob \| history
test/CodeGen/AMDGPU/captured-frame-index.ll		patch \| blob \| history
test/CodeGen/AMDGPU/collapse-endcf.ll		patch \| blob \| history
test/CodeGen/AMDGPU/ctpop.ll		patch \| blob \| history
test/CodeGen/AMDGPU/ctpop64.ll		patch \| blob \| history
test/CodeGen/AMDGPU/cvt_f32_ubyte.ll		patch \| blob \| history
test/CodeGen/AMDGPU/fabs.f16.ll		patch \| blob \| history
test/CodeGen/AMDGPU/fadd.f16.ll		patch \| blob \| history
test/CodeGen/AMDGPU/fcopysign.f16.ll		patch \| blob \| history
test/CodeGen/AMDGPU/fence-amdgiz.ll		patch \| blob \| history
test/CodeGen/AMDGPU/fmax3.f64.ll		patch \| blob \| history
test/CodeGen/AMDGPU/fmul.f16.ll		patch \| blob \| history
test/CodeGen/AMDGPU/fpext.f16.ll		patch \| blob \| history
test/CodeGen/AMDGPU/fptosi.f16.ll		patch \| blob \| history
test/CodeGen/AMDGPU/fptoui.f16.ll		patch \| blob \| history
test/CodeGen/AMDGPU/frame-index-amdgiz.ll		patch \| blob \| history
test/CodeGen/AMDGPU/frame-index-elimination.ll		patch \| blob \| history
test/CodeGen/AMDGPU/fsub.f16.ll		patch \| blob \| history
test/CodeGen/AMDGPU/half.ll		patch \| blob \| history
test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll		patch \| blob \| history
test/CodeGen/AMDGPU/indirect-addressing-si.ll		patch \| blob \| history
test/CodeGen/AMDGPU/insert_vector_elt.ll		patch \| blob \| history
test/CodeGen/AMDGPU/llvm.ceil.f16.ll		patch \| blob \| history
test/CodeGen/AMDGPU/llvm.cos.f16.ll		patch \| blob \| history
test/CodeGen/AMDGPU/llvm.exp2.f16.ll		patch \| blob \| history
test/CodeGen/AMDGPU/llvm.floor.f16.ll		patch \| blob \| history
test/CodeGen/AMDGPU/llvm.fma.f16.ll		patch \| blob \| history
test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll		patch \| blob \| history
test/CodeGen/AMDGPU/llvm.log2.f16.ll		patch \| blob \| history
test/CodeGen/AMDGPU/llvm.maxnum.f16.ll		patch \| blob \| history
test/CodeGen/AMDGPU/llvm.memcpy.ll		patch \| blob \| history
test/CodeGen/AMDGPU/llvm.minnum.f16.ll		patch \| blob \| history
test/CodeGen/AMDGPU/llvm.rint.f16.ll		patch \| blob \| history
test/CodeGen/AMDGPU/llvm.round.ll		patch \| blob \| history
test/CodeGen/AMDGPU/llvm.sin.f16.ll		patch \| blob \| history
test/CodeGen/AMDGPU/llvm.sqrt.f16.ll		patch \| blob \| history
test/CodeGen/AMDGPU/llvm.trunc.f16.ll		patch \| blob \| history
test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir		patch \| blob \| history
test/CodeGen/AMDGPU/merge-stores.ll		patch \| blob \| history
test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll		patch \| blob \| history
test/CodeGen/AMDGPU/promote-alloca-no-opts.ll		patch \| blob \| history
test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll		patch \| blob \| history
test/CodeGen/AMDGPU/s_addk_i32.ll		patch \| blob \| history
test/CodeGen/AMDGPU/s_mulk_i32.ll		patch \| blob \| history
test/CodeGen/AMDGPU/sad.ll		patch \| blob \| history
test/CodeGen/AMDGPU/schedule-kernel-arg-loads.ll		patch \| blob \| history
test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll		patch \| blob \| history
test/CodeGen/AMDGPU/select-vectors.ll		patch \| blob \| history
test/CodeGen/AMDGPU/select.f16.ll		patch \| blob \| history
test/CodeGen/AMDGPU/setcc-fneg-constant.ll		patch \| blob \| history
test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll		patch \| blob \| history
test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll		patch \| blob \| history
test/CodeGen/AMDGPU/shl_add_constant.ll		patch \| blob \| history
test/CodeGen/AMDGPU/sitofp.f16.ll		patch \| blob \| history
test/CodeGen/AMDGPU/trunc.ll		patch \| blob \| history
test/CodeGen/AMDGPU/udivrem.ll		patch \| blob \| history
test/CodeGen/AMDGPU/uitofp.f16.ll		patch \| blob \| history
test/CodeGen/AMDGPU/v_mac_f16.ll		patch \| blob \| history
test/CodeGen/AMDGPU/vector-extract-insert.ll		patch \| blob \| history