From 0a52a6cacca6030374fca7087e8556b382cca0e3 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Wed, 27 Jan 2021 11:35:54 +0100 Subject: [PATCH] drm/amdgpu: add wave limit functionality for gfx8,9 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Wave limiting can be use to load balance high priority compute jobs along with gfx jobs. When enabled, this will reserve ~75% of waves for compute jobs. We do not need this from gfx10 onwards because >=gfx10 has asynchronous compute tunneling to replace wave limit requirement. Signed-off-by: Nirmoy Das Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 18 +++++++++++++++++- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 21 ++++++++++++++++++++- 3 files changed, 38 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 2ada80ce42f5..56acec1075ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -197,6 +197,7 @@ struct amdgpu_ring_funcs { void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid); int (*preempt_ib)(struct amdgpu_ring *ring); void (*emit_mem_sync)(struct amdgpu_ring *ring); + void (*emit_wave_limit)(struct amdgpu_ring *ring, bool enable); }; struct amdgpu_ring { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index b0284c4659ba..bdfd29a22b3d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -29,6 +29,7 @@ #include "amdgpu.h" #include "amdgpu_gfx.h" +#include "amdgpu_ring.h" #include "vi.h" #include "vi_structs.h" #include "vid.h" @@ -6845,6 +6846,19 @@ static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ } +#define mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT 0x07ffffff +static void gfx_v8_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable) +{ + uint32_t val; + + /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit + * number of gfx waves. Setting 5 bit will make sure gfx only gets + * around 25% of gpu resources. + */ + val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT; + amdgpu_ring_emit_wreg(ring, mmSPI_WCL_PIPE_PERCENT_GFX, val); +} + static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { .name = "gfx_v8_0", .early_init = gfx_v8_0_early_init, @@ -6928,7 +6942,8 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */ 7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ - 7, /* gfx_v8_0_emit_mem_sync_compute */ + 7 + /* gfx_v8_0_emit_mem_sync_compute */ + 5, /* gfx_v8_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */ .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */ .emit_ib = gfx_v8_0_ring_emit_ib_compute, .emit_fence = gfx_v8_0_ring_emit_fence_compute, @@ -6942,6 +6957,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v8_0_ring_emit_wreg, .emit_mem_sync = gfx_v8_0_emit_mem_sync_compute, + .emit_wave_limit = gfx_v8_0_emit_wave_limit, }; static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 7b13f9dd965a..027997e95e46 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -52,6 +52,7 @@ #include "asic_reg/pwr/pwr_10_0_offset.h" #include "asic_reg/pwr/pwr_10_0_sh_mask.h" +#include "asic_reg/gc/gc_9_0_default.h" #define GFX9_NUM_GFX_RINGS 1 #define GFX9_MEC_HPD_SIZE 4096 @@ -6667,6 +6668,22 @@ static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ } +static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t val; + + + /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit + * number of gfx waves. Setting 5 bit will make sure gfx only gets + * around 25% of gpu resources. + */ + val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT; + amdgpu_ring_emit_wreg(ring, + SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX), + val); +} + static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { .name = "gfx_v9_0", .early_init = gfx_v9_0_early_init, @@ -6756,7 +6773,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 2 + /* gfx_v9_0_ring_emit_vm_flush */ 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ - 7, /* gfx_v9_0_emit_mem_sync */ + 7 + /* gfx_v9_0_emit_mem_sync */ + 5, /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */ .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ .emit_ib = gfx_v9_0_ring_emit_ib_compute, .emit_fence = gfx_v9_0_ring_emit_fence, @@ -6772,6 +6790,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, .emit_mem_sync = gfx_v9_0_emit_mem_sync, + .emit_wave_limit = gfx_v9_0_emit_wave_limit, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { -- 2.11.0