From a174f0da62f1cad36d21c040bf37bfdd291b28cf Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 5 Jan 2020 12:22:21 -0500 Subject: [PATCH] AMDGPU/GlobalISel: Add pre-legalize combiner pass Just copy the AArch64 pass as-is for now, except for removing the memcpy handling. --- llvm/lib/Target/AMDGPU/AMDGPU.h | 4 + llvm/lib/Target/AMDGPU/AMDGPUCombine.td | 15 +++ llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 1 + .../Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp | 149 +++++++++++++++++++++ llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 7 + llvm/lib/Target/AMDGPU/CMakeLists.txt | 3 + .../AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll | 114 ++++++---------- .../AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll | 80 +++++------ .../AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll | 4 +- .../AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll | 4 +- 10 files changed, 263 insertions(+), 118 deletions(-) create mode 100644 llvm/lib/Target/AMDGPU/AMDGPUCombine.td create mode 100644 llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index fbed51de0ea..621a93d4587 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -27,6 +27,10 @@ class TargetOptions; class PassRegistry; class Module; +// GlobalISel passes +void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &); +FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone); + // R600 Passes FunctionPass *createR600VectorRegMerger(); FunctionPass *createR600ExpandSpecialInstrsPass(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td new file mode 100644 index 00000000000..d8b3b89605e --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td @@ -0,0 +1,15 @@ +//=- AMDGPUCombine.td - Define AMDGPU Combine Rules ----------*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +include "llvm/Target/GlobalISel/Combine.td" + +def AMDGPUPreLegalizerCombinerHelper: GICombinerHelper< + "AMDGPUGenPreLegalizerCombinerHelper", [all_combines, + elide_br_by_inverting_cond]> { + let DisableRuleOption = "amdgpuprelegalizercombiner-disable-rule"; +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index afb08a23f2c..1a8134788f8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// include "AMDGPU.td" +include "AMDGPUCombine.td" def sd_vsrc0 : ComplexPattern; def gi_vsrc0 : diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp new file mode 100644 index 00000000000..7298fb2317a --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp @@ -0,0 +1,149 @@ +//=== lib/CodeGen/GlobalISel/AMDGPUPreLegalizerCombiner.cpp ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass does combining of machine instructions at the generic MI level, +// before the legalizer. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUTargetMachine.h" +#include "llvm/CodeGen/GlobalISel/Combiner.h" +#include "llvm/CodeGen/GlobalISel/CombinerHelper.h" +#include "llvm/CodeGen/GlobalISel/CombinerInfo.h" +#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" +#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "amdgpu-prelegalizer-combiner" + +using namespace llvm; +using namespace MIPatternMatch; + +#define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS +#include "AMDGPUGenGICombiner.inc" +#undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS + +namespace { +#define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H +#include "AMDGPUGenGICombiner.inc" +#undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H + +class AMDGPUPreLegalizerCombinerInfo : public CombinerInfo { + GISelKnownBits *KB; + MachineDominatorTree *MDT; + +public: + AMDGPUGenPreLegalizerCombinerHelper Generated; + + AMDGPUPreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize, + GISelKnownBits *KB, MachineDominatorTree *MDT) + : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, + /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize), + KB(KB), MDT(MDT) { + if (!Generated.parseCommandLineOption()) + report_fatal_error("Invalid rule identifier"); + } + + virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI, + MachineIRBuilder &B) const override; +}; + +bool AMDGPUPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, + MachineInstr &MI, + MachineIRBuilder &B) const { + CombinerHelper Helper(Observer, B, KB, MDT); + + if (Generated.tryCombineAll(Observer, MI, B, Helper)) + return true; + + switch (MI.getOpcode()) { + case TargetOpcode::G_CONCAT_VECTORS: + return Helper.tryCombineConcatVectors(MI); + case TargetOpcode::G_SHUFFLE_VECTOR: + return Helper.tryCombineShuffleVector(MI); + } + + return false; +} + +#define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP +#include "AMDGPUGenGICombiner.inc" +#undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP + +// Pass boilerplate +// ================ + +class AMDGPUPreLegalizerCombiner : public MachineFunctionPass { +public: + static char ID; + + AMDGPUPreLegalizerCombiner(bool IsOptNone = false); + + StringRef getPassName() const override { return "AMDGPUPreLegalizerCombiner"; } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override; +private: + bool IsOptNone; +}; +} // end anonymous namespace + +void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.setPreservesCFG(); + getSelectionDAGFallbackAnalysisUsage(AU); + AU.addRequired(); + AU.addPreserved(); + if (!IsOptNone) { + AU.addRequired(); + AU.addPreserved(); + } + MachineFunctionPass::getAnalysisUsage(AU); +} + +AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone) + : MachineFunctionPass(ID), IsOptNone(IsOptNone) { + initializeAMDGPUPreLegalizerCombinerPass(*PassRegistry::getPassRegistry()); +} + +bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + auto *TPC = &getAnalysis(); + const Function &F = MF.getFunction(); + bool EnableOpt = + MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F); + GISelKnownBits *KB = &getAnalysis().get(MF); + MachineDominatorTree *MDT = + IsOptNone ? nullptr : &getAnalysis(); + AMDGPUPreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), + F.hasMinSize(), KB, MDT); + Combiner C(PCInfo, TPC); + return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); +} + +char AMDGPUPreLegalizerCombiner::ID = 0; +INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE, + "Combine AMDGPU machine instrs before legalization", + false, false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) +INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner, DEBUG_TYPE, + "Combine AMDGPU machine instrs before legalization", false, + false) + +namespace llvm { +FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone) { + return new AMDGPUPreLegalizerCombiner(IsOptNone); +} +} // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index eb30d659bf0..0b0d1696732 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -217,6 +217,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeAMDGPULowerKernelAttributesPass(*PR); initializeAMDGPULowerIntrinsicsPass(*PR); initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(*PR); + initializeAMDGPUPreLegalizerCombinerPass(*PR); initializeAMDGPUPromoteAllocaPass(*PR); initializeAMDGPUCodeGenPreparePass(*PR); initializeAMDGPUPropagateAttributesEarlyPass(*PR); @@ -617,6 +618,7 @@ public: bool addILPOpts() override; bool addInstSelector() override; bool addIRTranslator() override; + void addPreLegalizeMachineIR() override; bool addLegalizeMachineIR() override; bool addRegBankSelect() override; bool addGlobalInstructionSelect() override; @@ -895,6 +897,11 @@ bool GCNPassConfig::addIRTranslator() { return false; } +void GCNPassConfig::addPreLegalizeMachineIR() { + bool IsOptNone = getOptLevel() == CodeGenOpt::None; + addPass(createAMDGPUPreLegalizeCombiner(IsOptNone)); +} + bool GCNPassConfig::addLegalizeMachineIR() { addPass(new Legalizer()); return false; diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt index 0b8eb4b25ae..3bbf2c34695 100644 --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -15,6 +15,8 @@ tablegen(LLVM AMDGPUGenSubtargetInfo.inc -gen-subtarget) set(LLVM_TARGET_DEFINITIONS AMDGPUGISel.td) tablegen(LLVM AMDGPUGenGlobalISel.inc -gen-global-isel) +tablegen(LLVM AMDGPUGenGICombiner.inc -gen-global-isel-combiner + -combiners="AMDGPUPreLegalizerCombinerHelper") set(LLVM_TARGET_DEFINITIONS R600.td) tablegen(LLVM R600GenAsmWriter.inc -gen-asm-writer) @@ -58,6 +60,7 @@ add_llvm_target(AMDGPUCodeGen AMDGPUMacroFusion.cpp AMDGPUMCInstLower.cpp AMDGPUOpenCLEnqueuedBlockLowering.cpp + AMDGPUPreLegalizerCombiner.cpp AMDGPUPromoteAlloca.cpp AMDGPUPropagateAttributes.cpp AMDGPURegisterBankInfo.cpp diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll index d441571f48c..d0b43b93d43 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll @@ -46,7 +46,6 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 ad ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm -; ; GFX9-LABEL: lds_atomic_dec_ret_i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -97,7 +96,6 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm -; ; GFX9-LABEL: lds_atomic_dec_ret_i32_offset: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -138,7 +136,6 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) noun ; VI-NEXT: v_mov_b32_e32 v1, s0 ; VI-NEXT: ds_dec_rtn_u32 v0, v1, v0 ; VI-NEXT: s_endpgm -; ; GFX9-LABEL: lds_atomic_dec_noret_i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24 @@ -173,7 +170,6 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %pt ; VI-NEXT: v_mov_b32_e32 v1, s0 ; VI-NEXT: ds_dec_rtn_u32 v0, v1, v0 ; VI-NEXT: s_endpgm -; ; GFX9-LABEL: lds_atomic_dec_noret_i32_offset: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24 @@ -216,7 +212,6 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm -; ; GFX9-LABEL: global_atomic_dec_ret_i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -267,7 +262,6 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32_offset(i32 addrspace(1)* %o ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm -; ; GFX9-LABEL: global_atomic_dec_ret_i32_offset: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -309,7 +303,6 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32(i32 addrspace(1)* %ptr) n ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: flat_atomic_dec v0, v[0:1], v2 glc ; VI-NEXT: s_endpgm -; ; GFX9-LABEL: global_atomic_dec_noret_i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -347,7 +340,6 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32_offset(i32 addrspace(1)* ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: flat_atomic_dec v0, v[0:1], v2 glc ; VI-NEXT: s_endpgm -; ; GFX9-LABEL: global_atomic_dec_noret_i32_offset: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -414,7 +406,6 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32_offset_addr64(i32 addrspace ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_store_dword v[2:3], v0 ; VI-NEXT: s_endpgm -; ; GFX9-LABEL: global_atomic_dec_ret_i32_offset_addr64: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v0 @@ -488,7 +479,6 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32_offset_addr64(i32 addrspa ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: flat_atomic_dec v0, v[0:1], v2 glc ; VI-NEXT: s_endpgm -; ; GFX9-LABEL: global_atomic_dec_noret_i32_offset_addr64: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v0 @@ -543,7 +533,6 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32(i32* %out, i32* %ptr) #0 { ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm -; ; GFX9-LABEL: flat_atomic_dec_ret_i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -594,7 +583,6 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(i32* %out, i32* %ptr) ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm -; ; GFX9-LABEL: flat_atomic_dec_ret_i32_offset: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -636,7 +624,6 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32(i32* %ptr) nounwind { ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: flat_atomic_dec v0, v[0:1], v2 glc ; VI-NEXT: s_endpgm -; ; GFX9-LABEL: flat_atomic_dec_noret_i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -674,7 +661,6 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(i32* %ptr) nounwind ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: flat_atomic_dec v0, v[0:1], v2 glc ; VI-NEXT: s_endpgm -; ; GFX9-LABEL: flat_atomic_dec_noret_i32_offset: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -741,7 +727,6 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(i32* %out, i32* ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_store_dword v[2:3], v0 ; VI-NEXT: s_endpgm -; ; GFX9-LABEL: flat_atomic_dec_ret_i32_offset_addr64: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v0 @@ -815,7 +800,6 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(i32* %ptr) #0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: flat_atomic_dec v0, v[0:1], v2 glc ; VI-NEXT: s_endpgm -; ; GFX9-LABEL: flat_atomic_dec_noret_i32_offset_addr64: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v0 @@ -872,7 +856,6 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64(i64* %out, i64* %ptr) #0 { ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm -; ; GFX9-LABEL: flat_atomic_dec_ret_i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -926,7 +909,6 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(i64* %out, i64* %ptr) ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm -; ; GFX9-LABEL: flat_atomic_dec_ret_i64_offset: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -971,7 +953,6 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64(i64* %ptr) nounwind { ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc ; VI-NEXT: s_endpgm -; ; GFX9-LABEL: flat_atomic_dec_noret_i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1012,7 +993,6 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(i64* %ptr) nounwind ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc ; VI-NEXT: s_endpgm -; ; GFX9-LABEL: flat_atomic_dec_noret_i64_offset: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1082,7 +1062,6 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(i64* %out, i64* ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm -; ; GFX9-LABEL: flat_atomic_dec_ret_i64_offset_addr64: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v0 @@ -1159,7 +1138,6 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(i64* %ptr) #0 ; VI-NEXT: v_mov_b32_e32 v3, 0 ; VI-NEXT: flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc ; VI-NEXT: s_endpgm -; ; GFX9-LABEL: flat_atomic_dec_noret_i64_offset_addr64: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v0 @@ -1192,42 +1170,41 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(i64* %ptr) #0 define amdgpu_kernel void @atomic_dec_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { ; CI-LABEL: atomic_dec_shl_base_lds_0: ; CI: ; %bb.0: -; CI-NEXT: v_mul_lo_u32 v1, 4, v0 +; CI-NEXT: v_mul_lo_u32 v5, 4, v0 ; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; CI-NEXT: v_add_i32_e32 v2, vcc, 2, v0 +; CI-NEXT: v_mov_b32_e32 v6, 9 ; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: v_add_i32_e32 v0, vcc, 0, v1 -; CI-NEXT: v_add_i32_e32 v0, vcc, 8, v0 -; CI-NEXT: v_mov_b32_e32 v1, 9 -; CI-NEXT: ds_dec_rtn_u32 v3, v0, v1 +; CI-NEXT: v_add_i32_e32 v5, vcc, 0, v5 +; CI-NEXT: v_add_i32_e32 v5, vcc, 8, v5 +; CI-NEXT: ds_dec_rtn_u32 v5, v5, v6 +; CI-NEXT: v_add_i32_e32 v4, vcc, 2, v0 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v0, s2 +; CI-NEXT: v_mov_b32_e32 v3, s1 ; CI-NEXT: v_mov_b32_e32 v1, s3 -; CI-NEXT: flat_store_dword v[0:1], v2 -; CI-NEXT: v_mov_b32_e32 v0, s0 -; CI-NEXT: v_mov_b32_e32 v1, s1 -; CI-NEXT: flat_store_dword v[0:1], v3 +; CI-NEXT: v_mov_b32_e32 v2, s0 +; CI-NEXT: flat_store_dword v[0:1], v4 +; CI-NEXT: flat_store_dword v[2:3], v5 ; CI-NEXT: s_endpgm ; ; VI-LABEL: atomic_dec_shl_base_lds_0: ; VI: ; %bb.0: -; VI-NEXT: v_mul_lo_u32 v1, 4, v0 +; VI-NEXT: v_mul_lo_u32 v5, 4, v0 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-NEXT: v_add_u32_e32 v2, vcc, 2, v0 +; VI-NEXT: v_mov_b32_e32 v6, 9 ; VI-NEXT: s_mov_b32 m0, -1 -; VI-NEXT: v_add_u32_e32 v0, vcc, 0, v1 -; VI-NEXT: v_add_u32_e32 v0, vcc, 8, v0 -; VI-NEXT: v_mov_b32_e32 v1, 9 -; VI-NEXT: ds_dec_rtn_u32 v3, v0, v1 +; VI-NEXT: v_add_u32_e32 v5, vcc, 0, v5 +; VI-NEXT: v_add_u32_e32 v5, vcc, 8, v5 +; VI-NEXT: ds_dec_rtn_u32 v5, v5, v6 +; VI-NEXT: v_add_u32_e32 v4, vcc, 2, v0 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s2 +; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: flat_store_dword v[0:1], v2 -; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: flat_store_dword v[0:1], v3 +; VI-NEXT: v_mov_b32_e32 v2, s0 +; VI-NEXT: flat_store_dword v[0:1], v4 +; VI-NEXT: flat_store_dword v[2:3], v5 ; VI-NEXT: s_endpgm -; ; GFX9-LABEL: atomic_dec_shl_base_lds_0: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mul_lo_u32 v1, 4, v0 @@ -1286,7 +1263,6 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 ad ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm -; ; GFX9-LABEL: lds_atomic_dec_ret_i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -1340,7 +1316,6 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm -; ; GFX9-LABEL: lds_atomic_dec_ret_i64_offset: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 @@ -1384,7 +1359,6 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) noun ; VI-NEXT: v_mov_b32_e32 v2, s0 ; VI-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1] ; VI-NEXT: s_endpgm -; ; GFX9-LABEL: lds_atomic_dec_noret_i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24 @@ -1422,7 +1396,6 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %pt ; VI-NEXT: v_mov_b32_e32 v2, s0 ; VI-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1] ; VI-NEXT: s_endpgm -; ; GFX9-LABEL: lds_atomic_dec_noret_i64_offset: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24 @@ -1468,7 +1441,6 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm -; ; GFX9-LABEL: global_atomic_dec_ret_i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -1522,7 +1494,6 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64_offset(i64 addrspace(1)* %o ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm -; ; GFX9-LABEL: global_atomic_dec_ret_i64_offset: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -1567,7 +1538,6 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64(i64 addrspace(1)* %ptr) n ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc ; VI-NEXT: s_endpgm -; ; GFX9-LABEL: global_atomic_dec_noret_i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1608,7 +1578,6 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64_offset(i64 addrspace(1)* ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc ; VI-NEXT: s_endpgm -; ; GFX9-LABEL: global_atomic_dec_noret_i64_offset: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1678,7 +1647,6 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_addr64(i64 addrspace ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm -; ; GFX9-LABEL: global_atomic_dec_ret_i64_offset_addr64: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v0 @@ -1755,7 +1723,6 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_addr64(i64 addrspa ; VI-NEXT: v_mov_b32_e32 v3, 0 ; VI-NEXT: flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc ; VI-NEXT: s_endpgm -; ; GFX9-LABEL: global_atomic_dec_noret_i64_offset_addr64: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v0 @@ -1788,44 +1755,43 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_addr64(i64 addrspa define amdgpu_kernel void @atomic_dec_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { ; CI-LABEL: atomic_dec_shl_base_lds_0_i64: ; CI: ; %bb.0: -; CI-NEXT: v_mul_lo_u32 v1, 8, v0 +; CI-NEXT: v_mul_lo_u32 v7, 8, v0 ; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; CI-NEXT: v_add_i32_e32 v4, vcc, 2, v0 -; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: v_add_i32_e32 v0, vcc, 0, v1 -; CI-NEXT: v_add_i32_e32 v2, vcc, 16, v0 +; CI-NEXT: v_add_i32_e32 v6, vcc, 2, v0 ; CI-NEXT: v_mov_b32_e32 v0, 9 +; CI-NEXT: v_add_i32_e32 v7, vcc, 0, v7 ; CI-NEXT: v_mov_b32_e32 v1, 0 -; CI-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1] +; CI-NEXT: v_add_i32_e32 v7, vcc, 16, v7 +; CI-NEXT: s_mov_b32 m0, -1 +; CI-NEXT: ds_dec_rtn_u64 v[0:1], v7, v[0:1] ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v2, s2 +; CI-NEXT: v_mov_b32_e32 v5, s1 ; CI-NEXT: v_mov_b32_e32 v3, s3 -; CI-NEXT: flat_store_dword v[2:3], v4 -; CI-NEXT: v_mov_b32_e32 v3, s1 -; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; CI-NEXT: v_mov_b32_e32 v4, s0 +; CI-NEXT: flat_store_dword v[2:3], v6 +; CI-NEXT: flat_store_dwordx2 v[4:5], v[0:1] ; CI-NEXT: s_endpgm ; ; VI-LABEL: atomic_dec_shl_base_lds_0_i64: ; VI: ; %bb.0: -; VI-NEXT: v_mul_lo_u32 v1, 8, v0 +; VI-NEXT: v_mul_lo_u32 v7, 8, v0 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-NEXT: v_add_u32_e32 v4, vcc, 2, v0 -; VI-NEXT: s_mov_b32 m0, -1 -; VI-NEXT: v_add_u32_e32 v0, vcc, 0, v1 -; VI-NEXT: v_add_u32_e32 v2, vcc, 16, v0 +; VI-NEXT: v_add_u32_e32 v6, vcc, 2, v0 ; VI-NEXT: v_mov_b32_e32 v0, 9 +; VI-NEXT: v_add_u32_e32 v7, vcc, 0, v7 ; VI-NEXT: v_mov_b32_e32 v1, 0 -; VI-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1] +; VI-NEXT: v_add_u32_e32 v7, vcc, 16, v7 +; VI-NEXT: s_mov_b32 m0, -1 +; VI-NEXT: ds_dec_rtn_u64 v[0:1], v7, v[0:1] ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v2, s2 +; VI-NEXT: v_mov_b32_e32 v5, s1 ; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: flat_store_dword v[2:3], v4 -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; VI-NEXT: v_mov_b32_e32 v4, s0 +; VI-NEXT: flat_store_dword v[2:3], v6 +; VI-NEXT: flat_store_dwordx2 v[4:5], v[0:1] ; VI-NEXT: s_endpgm -; ; GFX9-LABEL: atomic_dec_shl_base_lds_0_i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mul_lo_u32 v3, 8, v0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll index 176c7f19502..64dc688cae4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll @@ -523,40 +523,40 @@ define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_addr64(i32 addrspa define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { ; CI-LABEL: atomic_inc_shl_base_lds_0_i32: ; CI: ; %bb.0: -; CI-NEXT: v_mul_lo_u32 v1, 4, v0 +; CI-NEXT: v_mul_lo_u32 v5, 4, v0 ; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; CI-NEXT: v_add_i32_e32 v2, vcc, 2, v0 +; CI-NEXT: v_mov_b32_e32 v6, 9 ; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: v_add_i32_e32 v0, vcc, 0, v1 -; CI-NEXT: v_add_i32_e32 v0, vcc, 8, v0 -; CI-NEXT: v_mov_b32_e32 v1, 9 -; CI-NEXT: ds_inc_rtn_u32 v3, v0, v1 +; CI-NEXT: v_add_i32_e32 v5, vcc, 0, v5 +; CI-NEXT: v_add_i32_e32 v5, vcc, 8, v5 +; CI-NEXT: ds_inc_rtn_u32 v5, v5, v6 +; CI-NEXT: v_add_i32_e32 v4, vcc, 2, v0 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v0, s2 +; CI-NEXT: v_mov_b32_e32 v3, s1 ; CI-NEXT: v_mov_b32_e32 v1, s3 -; CI-NEXT: flat_store_dword v[0:1], v2 -; CI-NEXT: v_mov_b32_e32 v0, s0 -; CI-NEXT: v_mov_b32_e32 v1, s1 -; CI-NEXT: flat_store_dword v[0:1], v3 +; CI-NEXT: v_mov_b32_e32 v2, s0 +; CI-NEXT: flat_store_dword v[0:1], v4 +; CI-NEXT: flat_store_dword v[2:3], v5 ; CI-NEXT: s_endpgm ; ; VI-LABEL: atomic_inc_shl_base_lds_0_i32: ; VI: ; %bb.0: -; VI-NEXT: v_mul_lo_u32 v1, 4, v0 +; VI-NEXT: v_mul_lo_u32 v5, 4, v0 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-NEXT: v_add_u32_e32 v2, vcc, 2, v0 +; VI-NEXT: v_mov_b32_e32 v6, 9 ; VI-NEXT: s_mov_b32 m0, -1 -; VI-NEXT: v_add_u32_e32 v0, vcc, 0, v1 -; VI-NEXT: v_add_u32_e32 v0, vcc, 8, v0 -; VI-NEXT: v_mov_b32_e32 v1, 9 -; VI-NEXT: ds_inc_rtn_u32 v3, v0, v1 +; VI-NEXT: v_add_u32_e32 v5, vcc, 0, v5 +; VI-NEXT: v_add_u32_e32 v5, vcc, 8, v5 +; VI-NEXT: ds_inc_rtn_u32 v5, v5, v6 +; VI-NEXT: v_add_u32_e32 v4, vcc, 2, v0 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s2 +; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v1, s3 -; VI-NEXT: flat_store_dword v[0:1], v2 -; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: flat_store_dword v[0:1], v3 +; VI-NEXT: v_mov_b32_e32 v2, s0 +; VI-NEXT: flat_store_dword v[0:1], v4 +; VI-NEXT: flat_store_dword v[2:3], v5 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: atomic_inc_shl_base_lds_0_i32: @@ -1446,42 +1446,42 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(i32* %ptr) #0 define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { ; CI-LABEL: atomic_inc_shl_base_lds_0_i64: ; CI: ; %bb.0: -; CI-NEXT: v_mul_lo_u32 v1, 8, v0 +; CI-NEXT: v_mul_lo_u32 v7, 8, v0 ; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; CI-NEXT: v_add_i32_e32 v4, vcc, 2, v0 -; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: v_add_i32_e32 v0, vcc, 0, v1 -; CI-NEXT: v_add_i32_e32 v2, vcc, 16, v0 +; CI-NEXT: v_add_i32_e32 v6, vcc, 2, v0 ; CI-NEXT: v_mov_b32_e32 v0, 9 +; CI-NEXT: v_add_i32_e32 v7, vcc, 0, v7 ; CI-NEXT: v_mov_b32_e32 v1, 0 -; CI-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1] +; CI-NEXT: v_add_i32_e32 v7, vcc, 16, v7 +; CI-NEXT: s_mov_b32 m0, -1 +; CI-NEXT: ds_inc_rtn_u64 v[0:1], v7, v[0:1] ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v2, s2 +; CI-NEXT: v_mov_b32_e32 v5, s1 ; CI-NEXT: v_mov_b32_e32 v3, s3 -; CI-NEXT: flat_store_dword v[2:3], v4 -; CI-NEXT: v_mov_b32_e32 v3, s1 -; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; CI-NEXT: v_mov_b32_e32 v4, s0 +; CI-NEXT: flat_store_dword v[2:3], v6 +; CI-NEXT: flat_store_dwordx2 v[4:5], v[0:1] ; CI-NEXT: s_endpgm ; ; VI-LABEL: atomic_inc_shl_base_lds_0_i64: ; VI: ; %bb.0: -; VI-NEXT: v_mul_lo_u32 v1, 8, v0 +; VI-NEXT: v_mul_lo_u32 v7, 8, v0 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-NEXT: v_add_u32_e32 v4, vcc, 2, v0 -; VI-NEXT: s_mov_b32 m0, -1 -; VI-NEXT: v_add_u32_e32 v0, vcc, 0, v1 -; VI-NEXT: v_add_u32_e32 v2, vcc, 16, v0 +; VI-NEXT: v_add_u32_e32 v6, vcc, 2, v0 ; VI-NEXT: v_mov_b32_e32 v0, 9 +; VI-NEXT: v_add_u32_e32 v7, vcc, 0, v7 ; VI-NEXT: v_mov_b32_e32 v1, 0 -; VI-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1] +; VI-NEXT: v_add_u32_e32 v7, vcc, 16, v7 +; VI-NEXT: s_mov_b32 m0, -1 +; VI-NEXT: ds_inc_rtn_u64 v[0:1], v7, v[0:1] ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v2, s2 +; VI-NEXT: v_mov_b32_e32 v5, s1 ; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: flat_store_dword v[2:3], v4 -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; VI-NEXT: v_mov_b32_e32 v4, s0 +; VI-NEXT: flat_store_dword v[2:3], v6 +; VI-NEXT: flat_store_dwordx2 v[4:5], v[0:1] ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: atomic_inc_shl_base_lds_0_i64: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll index 29c9729d2e3..0164678bbf8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll @@ -8,11 +8,11 @@ define amdgpu_kernel void @test_wave32(i32 %arg0, [8 x i32], i32 %saved) { ; GCN-NEXT: s_load_dword s0, s[4:5], 0x24 ; GCN-NEXT: ; implicit-def: $vcc_hi ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_cmp_eq_u32 s1, 0 +; GCN-NEXT: s_cmp_lg_u32 s1, 0 ; GCN-NEXT: s_cselect_b32 s1, 1, 0 ; GCN-NEXT: s_and_b32 s1, s1, 1 ; GCN-NEXT: s_cmp_lg_u32 s1, 0 -; GCN-NEXT: s_cbranch_scc0 BB0_2 +; GCN-NEXT: s_cbranch_scc1 BB0_2 ; GCN-NEXT: ; %bb.1: ; %mid ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: global_store_dword v[0:1], v0, off diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll index c4ae325ba8e..b02a296e2bd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll @@ -7,11 +7,11 @@ define amdgpu_kernel void @test_wave64(i32 %arg0, i64 %saved) { ; GCN-NEXT: s_load_dword s2, s[4:5], 0x0 ; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_cmp_eq_u32 s2, 0 +; GCN-NEXT: s_cmp_lg_u32 s2, 0 ; GCN-NEXT: s_cselect_b32 s2, 1, 0 ; GCN-NEXT: s_and_b32 s2, s2, 1 ; GCN-NEXT: s_cmp_lg_u32 s2, 0 -; GCN-NEXT: s_cbranch_scc0 BB0_2 +; GCN-NEXT: s_cbranch_scc1 BB0_2 ; GCN-NEXT: ; %bb.1: ; %mid ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: global_store_dword v[0:1], v0, off -- 2.11.0