From aa44beb5f0155d13a24edc0ae94fc8ad1196132b Mon Sep 17 00:00:00 2001 From: Jane Jian Date: Mon, 22 Aug 2022 15:20:24 +0800 Subject: [PATCH] drm/amdgpu/vcn: Add sriov VCN v4_0 unified queue support MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Enable unified queue support for sriov, abandon all previous multi-queue settings Acked-by: Christian König Signed-off-by: Jane Jian Reviewed-by: Ruijing Dong Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 5 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 13 ++ drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 297 ++++++++++++++++++++++++-- 3 files changed, 300 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 06b2d1830091..9fa2a5ceb77d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1908,10 +1908,9 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(4, 0, 0): case IP_VERSION(4, 0, 2): case IP_VERSION(4, 0, 4): - if (!amdgpu_sriov_vf(adev)) { - amdgpu_device_ip_block_add(adev, &vcn_v4_0_ip_block); + amdgpu_device_ip_block_add(adev, &vcn_v4_0_ip_block); + if (!amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &jpeg_v4_0_ip_block); - } break; default: dev_err(adev->dev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 60c608144480..80b7a6cfd026 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -161,6 +161,7 @@ #define AMDGPU_VCN_SW_RING_FLAG (1 << 9) #define AMDGPU_VCN_FW_LOGGING_FLAG (1 << 10) #define AMDGPU_VCN_SMU_VERSION_INFO_FLAG (1 << 11) +#define AMDGPU_VCN_VF_RB_SETUP_FLAG (1 << 12) #define AMDGPU_VCN_IB_FLAG_DECODE_BUFFER 0x00000001 #define AMDGPU_VCN_CMD_FLAG_MSG_BUFFER 0x00000001 @@ -317,12 +318,24 @@ struct amdgpu_fw_shared { struct amdgpu_fw_shared_smu_interface_info smu_interface_info; }; +struct amdgpu_fw_shared_rb_setup { + uint32_t is_rb_enabled_flags; + uint32_t rb_addr_lo; + uint32_t rb_addr_hi; + uint32_t rb_size; + uint32_t rb4_addr_lo; + uint32_t rb4_addr_hi; + uint32_t rb4_size; + uint32_t reserved[6]; +}; + struct amdgpu_vcn4_fw_shared { uint32_t present_flag_0; uint8_t pad[12]; struct amdgpu_fw_shared_unified_queue_struct sq; uint8_t pad1[8]; struct amdgpu_fw_shared_fw_logging fw_log; + struct amdgpu_fw_shared_rb_setup rb_setup; }; struct amdgpu_vcn_fwlog { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index fb2d74f30448..09c89faa8c27 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -30,6 +30,7 @@ #include "soc15d.h" #include "soc15_hw_ip.h" #include "vcn_v2_0.h" +#include "mmsch_v4_0.h" #include "vcn/vcn_4_0_0_offset.h" #include "vcn/vcn_4_0_0_sh_mask.h" @@ -45,6 +46,8 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +#define VCN_HARVEST_MMSCH 0 + #define RDECODE_MSG_CREATE 0x00000000 #define RDECODE_MESSAGE_CREATE 0x00000001 @@ -53,12 +56,14 @@ static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN1 }; +static int vcn_v4_0_start_sriov(struct amdgpu_device *adev); static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev); static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev); static int vcn_v4_0_set_powergating_state(void *handle, enum amd_powergating_state state); static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, struct dpg_pause_state *new_state); +static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring); /** * vcn_v4_0_early_init - set function pointers @@ -71,6 +76,9 @@ static int vcn_v4_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (amdgpu_sriov_vf(adev)) + adev->vcn.harvest_config = VCN_HARVEST_MMSCH; + /* re-use enc ring as unified ring */ adev->vcn.num_enc_rings = 1; @@ -92,6 +100,7 @@ static int vcn_v4_0_sw_init(void *handle) struct amdgpu_ring *ring; struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i, r; + int vcn_doorbell_index = 0; r = amdgpu_vcn_sw_init(adev); if (r) @@ -103,6 +112,12 @@ static int vcn_v4_0_sw_init(void *handle) if (r) return r; + if (amdgpu_sriov_vf(adev)) { + vcn_doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 - MMSCH_DOORBELL_OFFSET; + /* get DWORD offset */ + vcn_doorbell_index = vcn_doorbell_index << 1; + } + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { volatile struct amdgpu_vcn4_fw_shared *fw_shared; @@ -119,7 +134,10 @@ static int vcn_v4_0_sw_init(void *handle) ring = &adev->vcn.inst[i].ring_enc[0]; ring->use_doorbell = true; - ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i; + if (amdgpu_sriov_vf(adev)) + ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1) + 1; + else + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i; sprintf(ring->name, "vcn_unified_%d", i); @@ -132,10 +150,19 @@ static int vcn_v4_0_sw_init(void *handle) fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); fw_shared->sq.is_enabled = 1; + if (amdgpu_sriov_vf(adev)) + fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG); + if (amdgpu_vcnfw_log) amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); } + if (amdgpu_sriov_vf(adev)) { + r = amdgpu_virt_alloc_mm_table(adev); + if (r) + return r; + } + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v4_0_pause_dpg_mode; @@ -169,6 +196,9 @@ static int vcn_v4_0_sw_fini(void *handle) drm_dev_exit(idx); } + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_free_mm_table(adev); + r = amdgpu_vcn_suspend(adev); if (r) return r; @@ -191,18 +221,42 @@ static int vcn_v4_0_hw_init(void *handle) struct amdgpu_ring *ring; int i, r; - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; + if (amdgpu_sriov_vf(adev)) { + r = vcn_v4_0_start_sriov(adev); + if (r) + goto done; - ring = &adev->vcn.inst[i].ring_enc[0]; + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; - adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, - ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i), i); + ring = &adev->vcn.inst[i].ring_enc[0]; + if (amdgpu_vcn_is_disabled_vcn(adev, VCN_ENCODE_RING, i)) { + ring->sched.ready = false; + ring->no_scheduler = true; + dev_info(adev->dev, "ring %s is disabled by hypervisor\n", ring->name); + } else { + ring->wptr = 0; + ring->wptr_old = 0; + vcn_v4_0_unified_ring_set_wptr(ring); + ring->sched.ready = true; + } + } + } else { + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; - r = amdgpu_ring_test_helper(ring); - if (r) - goto done; + ring = &adev->vcn.inst[i].ring_enc[0]; + + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i), i); + + r = amdgpu_ring_test_helper(ring); + if (r) + goto done; + + } } done: @@ -230,12 +284,14 @@ static int vcn_v4_0_hw_fini(void *handle) for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; - - if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || + if (!amdgpu_sriov_vf(adev)) { + if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || (adev->vcn.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(VCN, i, regUVD_STATUS))) { vcn_v4_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + } } + } return 0; @@ -1107,6 +1163,214 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) return 0; } +static int vcn_v4_0_start_sriov(struct amdgpu_device *adev) +{ + int i; + struct amdgpu_ring *ring_enc; + uint64_t cache_addr; + uint64_t rb_enc_addr; + uint64_t ctx_addr; + uint32_t param, resp, expected; + uint32_t offset, cache_size; + uint32_t tmp, timeout; + + struct amdgpu_mm_table *table = &adev->virt.mm_table; + uint32_t *table_loc; + uint32_t table_size; + uint32_t size, size_dw; + uint32_t init_status; + uint32_t enabled_vcn; + + struct mmsch_v4_0_cmd_direct_write + direct_wt = { {0} }; + struct mmsch_v4_0_cmd_direct_read_modify_write + direct_rd_mod_wt = { {0} }; + struct mmsch_v4_0_cmd_end end = { {0} }; + struct mmsch_v4_0_init_header header; + + volatile struct amdgpu_vcn4_fw_shared *fw_shared; + volatile struct amdgpu_fw_shared_rb_setup *rb_setup; + + direct_wt.cmd_header.command_type = + MMSCH_COMMAND__DIRECT_REG_WRITE; + direct_rd_mod_wt.cmd_header.command_type = + MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; + end.cmd_header.command_type = + MMSCH_COMMAND__END; + + header.version = MMSCH_VERSION; + header.total_size = sizeof(struct mmsch_v4_0_init_header) >> 2; + for (i = 0; i < AMDGPU_MAX_VCN_INSTANCES; i++) { + header.inst[i].init_status = 0; + header.inst[i].table_offset = 0; + header.inst[i].table_size = 0; + } + + table_loc = (uint32_t *)table->cpu_addr; + table_loc += header.total_size; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + table_size = 0; + + MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_STATUS), + ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); + + cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi); + offset = 0; + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_VCPU_CACHE_OFFSET0), + 0); + } else { + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[i].gpu_addr)); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[i].gpu_addr)); + offset = cache_size; + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + } + + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_VCPU_CACHE_SIZE0), + cache_size); + + cache_addr = adev->vcn.inst[i].gpu_addr + offset; + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + lower_32_bits(cache_addr)); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + upper_32_bits(cache_addr)); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_VCPU_CACHE_OFFSET1), + 0); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_VCPU_CACHE_SIZE1), + AMDGPU_VCN_STACK_SIZE); + + cache_addr = adev->vcn.inst[i].gpu_addr + offset + + AMDGPU_VCN_STACK_SIZE; + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + lower_32_bits(cache_addr)); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + upper_32_bits(cache_addr)); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_VCPU_CACHE_OFFSET2), + 0); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_VCPU_CACHE_SIZE2), + AMDGPU_VCN_CONTEXT_SIZE); + + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + rb_setup = &fw_shared->rb_setup; + + ring_enc = &adev->vcn.inst[i].ring_enc[0]; + ring_enc->wptr = 0; + rb_enc_addr = ring_enc->gpu_addr; + + rb_setup->is_rb_enabled_flags |= RB_ENABLED; + rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr); + rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr); + rb_setup->rb_size = ring_enc->ring_size / 4; + fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG); + + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr)); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr)); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + regUVD_VCPU_NONCACHE_SIZE0), + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared))); + + /* add end packet */ + MMSCH_V4_0_INSERT_END(); + + /* refine header */ + header.inst[i].init_status = 0; + header.inst[i].table_offset = header.total_size; + header.inst[i].table_size = table_size; + header.total_size += table_size; + } + + /* Update init table header in memory */ + size = sizeof(struct mmsch_v4_0_init_header); + table_loc = (uint32_t *)table->cpu_addr; + memcpy((void *)table_loc, &header, size); + + /* message MMSCH (in VCN[0]) to initialize this client + * 1, write to mmsch_vf_ctx_addr_lo/hi register with GPU mc addr + * of memory descriptor location + */ + ctx_addr = table->gpu_addr; + WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr)); + WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr)); + + /* 2, update vmid of descriptor */ + tmp = RREG32_SOC15(VCN, 0, regMMSCH_VF_VMID); + tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK; + /* use domain0 for MM scheduler */ + tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); + WREG32_SOC15(VCN, 0, regMMSCH_VF_VMID, tmp); + + /* 3, notify mmsch about the size of this descriptor */ + size = header.total_size; + WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_SIZE, size); + + /* 4, set resp to zero */ + WREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_RESP, 0); + + /* 5, kick off the initialization and wait until + * MMSCH_VF_MAILBOX_RESP becomes non-zero + */ + param = 0x00000001; + WREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_HOST, param); + tmp = 0; + timeout = 1000; + resp = 0; + expected = MMSCH_VF_MAILBOX_RESP__OK; + while (resp != expected) { + resp = RREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_RESP); + if (resp != 0) + break; + + udelay(10); + tmp = tmp + 10; + if (tmp >= timeout) { + DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\ + " waiting for regMMSCH_VF_MAILBOX_RESP "\ + "(expected=0x%08x, readback=0x%08x)\n", + tmp, expected, resp); + return -EBUSY; + } + } + enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 0) ? 1 : 0; + init_status = ((struct mmsch_v4_0_init_header *)(table_loc))->inst[enabled_vcn].init_status; + if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE + && init_status != MMSCH_VF_ENGINE_STATUS__PASS) + DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init "\ + "status for VCN%x: 0x%x\n", resp, enabled_vcn, init_status); + + return 0; +} + /** * vcn_v4_0_stop_dpg_mode - VCN stop with dpg mode * @@ -1596,6 +1860,15 @@ static int vcn_v4_0_set_powergating_state(void *handle, enum amd_powergating_sta struct amdgpu_device *adev = (struct amdgpu_device *)handle; int ret; + /* for SRIOV, guest should not control VCN Power-gating + * MMSCH FW should control Power-gating and clock-gating + * guest should avoid touching CGC and PG + */ + if (amdgpu_sriov_vf(adev)) { + adev->vcn.cur_state = AMD_PG_STATE_UNGATE; + return 0; + } + if(state == adev->vcn.cur_state) return 0; -- 2.11.0