From 7a3f751cbc0d9be18d92ec1b6917ced4f0cd84b8 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Mon, 11 Jun 2018 16:50:49 +0000 Subject: [PATCH] [AMDGPU] Do not consider indirect acces through phi for wave limiter Rational: if there is indirect access that is usually an issue because load is not ready by the use. However, if use is inside a loop and load is outside that is potentially an issue for a first iteration only. Differential Revision: https://reviews.llvm.org/D47740 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@334420 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp | 6 ------ test/CodeGen/AMDGPU/perfhint.ll | 26 ++++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp b/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp index d307f818d7b..3cfdccc9fe5 100644 --- a/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp +++ b/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp @@ -198,12 +198,6 @@ bool AMDGPUPerfHint::isIndirectAccess(const Instruction *Inst) const { continue; } - if (auto Phi = dyn_cast(V)) { - for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) - WorkSet.insert(Phi->getIncomingValue(I)); - continue; - } - LLVM_DEBUG(dbgs() << " dropped\n"); } diff --git a/test/CodeGen/AMDGPU/perfhint.ll b/test/CodeGen/AMDGPU/perfhint.ll index 06dc7ad4898..a8990be7f19 100644 --- a/test/CodeGen/AMDGPU/perfhint.ll +++ b/test/CodeGen/AMDGPU/perfhint.ll @@ -82,4 +82,30 @@ bb: ret void } +; GCN-LABEL: {{^}}test_indirect_through_phi: +; MemoryBound: 0 +; WaveLimiterHint : 0 +define amdgpu_kernel void @test_indirect_through_phi(float addrspace(1)* %arg) { +bb: + %load = load float, float addrspace(1)* %arg, align 8 + %load.f = bitcast float %load to i32 + %n = tail call i32 @llvm.amdgcn.workitem.id.x() + br label %bb1 + +bb1: ; preds = %bb1, %bb + %phi = phi i32 [ %load.f, %bb ], [ %and2, %bb1 ] + %ind = phi i32 [ 0, %bb ], [ %inc2, %bb1 ] + %and1 = and i32 %phi, %n + %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %and1 + store float %load, float addrspace(1)* %gep, align 4 + %inc1 = add nsw i32 %phi, 1310720 + %and2 = and i32 %inc1, %n + %inc2 = add nuw nsw i32 %ind, 1 + %cmp = icmp eq i32 %inc2, 1024 + br i1 %cmp, label %bb2, label %bb1 + +bb2: ; preds = %bb1 + ret void +} + declare i32 @llvm.amdgcn.workitem.id.x() -- 2.11.0