OSDN Git Service

drm/amdgpu/jpeg: enable jpeg v4_0 for sriov
authorJane Jian <Jane.Jian@amd.com>
Fri, 8 Jul 2022 10:07:38 +0000 (18:07 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 22 Mar 2023 04:47:59 +0000 (00:47 -0400)
- skip direct jpeg registers read&write since it is not allowed
- reset Doorbell range layout for sriov

Signed-off-by: Jane Jian <Jane.Jian@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h

index 77a8b05..6edaa43 100644 (file)
@@ -1948,9 +1948,8 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
                case IP_VERSION(4, 0, 2):
                case IP_VERSION(4, 0, 4):
                        amdgpu_device_ip_block_add(adev, &vcn_v4_0_ip_block);
-                       if (!amdgpu_sriov_vf(adev))
-                               amdgpu_device_ip_block_add(adev, &jpeg_v4_0_ip_block);
-                       break;
+                       amdgpu_device_ip_block_add(adev, &jpeg_v4_0_ip_block);
+                       return 0;
                default:
                        dev_err(adev->dev,
                                "Failed to add vcn/jpeg ip block(UVD_HWIP:0x%x)\n",
index 479d9bc..b07c000 100644 (file)
@@ -118,6 +118,10 @@ int amdgpu_jpeg_dec_ring_test_ring(struct amdgpu_ring *ring)
        unsigned i;
        int r;
 
+       /* JPEG in SRIOV does not support direct register read/write */
+       if (amdgpu_sriov_vf(adev))
+               return 0;
+
        WREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch, 0xCAFEDEAD);
        r = amdgpu_ring_alloc(ring, 3);
        if (r)
@@ -202,17 +206,18 @@ int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
        } else {
                r = 0;
        }
-
-       for (i = 0; i < adev->usec_timeout; i++) {
-               tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch);
-               if (tmp == 0xDEADBEEF)
-                       break;
-               udelay(1);
+       if (!amdgpu_sriov_vf(adev)) {
+               for (i = 0; i < adev->usec_timeout; i++) {
+                       tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch);
+                       if (tmp == 0xDEADBEEF)
+                               break;
+                       udelay(1);
+               }
+
+               if (i >= adev->usec_timeout)
+                       r = -ETIMEDOUT;
        }
 
-       if (i >= adev->usec_timeout)
-               r = -ETIMEDOUT;
-
        dma_fence_put(fence);
 error:
        return r;
index 3129094..5f2a034 100644 (file)
@@ -28,6 +28,7 @@
 #include "soc15d.h"
 #include "jpeg_v2_0.h"
 #include "jpeg_v4_0.h"
+#include "mmsch_v4_0.h"
 
 #include "vcn/vcn_4_0_0_offset.h"
 #include "vcn/vcn_4_0_0_sh_mask.h"
 
 #define regUVD_JPEG_PITCH_INTERNAL_OFFSET                  0x401f
 
+static int jpeg_v4_0_start_sriov(struct amdgpu_device *adev);
 static void jpeg_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev);
 static void jpeg_v4_0_set_irq_funcs(struct amdgpu_device *adev);
 static int jpeg_v4_0_set_powergating_state(void *handle,
                                enum amd_powergating_state state);
 static void jpeg_v4_0_set_ras_funcs(struct amdgpu_device *adev);
 
+static void jpeg_v4_0_dec_ring_set_wptr(struct amdgpu_ring *ring);
+
 /**
  * jpeg_v4_0_early_init - set function pointers
  *
@@ -103,7 +107,8 @@ static int jpeg_v4_0_sw_init(void *handle)
 
        ring = &adev->jpeg.inst->ring_dec;
        ring->use_doorbell = true;
-       ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1;
+       ring->doorbell_index = amdgpu_sriov_vf(adev) ? (((adev->doorbell_index.vcn.vcn_ring0_1) << 1) + 4) : ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1);
+
        sprintf(ring->name, "jpeg_dec");
        r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0,
                             AMDGPU_RING_PRIO_DEFAULT, NULL);
@@ -153,16 +158,26 @@ static int jpeg_v4_0_hw_init(void *handle)
        struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec;
        int r;
 
-       adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
-                                       (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
+       if (amdgpu_sriov_vf(adev)) {
+               r = jpeg_v4_0_start_sriov(adev);
+               if (r)
+                       return r;
+               ring->wptr = 0;
+               ring->wptr_old = 0;
+               jpeg_v4_0_dec_ring_set_wptr(ring);
+               ring->sched.ready = true;
+       } else {
+               adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+                                               (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
 
-       WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL,
-               ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
-               VCN_JPEG_DB_CTRL__EN_MASK);
+               WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL,
+                       ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
+                       VCN_JPEG_DB_CTRL__EN_MASK);
 
-       r = amdgpu_ring_test_helper(ring);
-       if (r)
-               return r;
+               r = amdgpu_ring_test_helper(ring);
+               if (r)
+                       return r;
+       }
 
        DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully.\n");
 
@@ -181,11 +196,11 @@ static int jpeg_v4_0_hw_fini(void *handle)
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
        cancel_delayed_work_sync(&adev->vcn.idle_work);
-
-       if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
-             RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS))
-               jpeg_v4_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
-
+       if (!amdgpu_sriov_vf(adev)) {
+               if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
+                       RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS))
+                       jpeg_v4_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+       }
        amdgpu_irq_put(adev, &adev->jpeg.inst->irq, 0);
 
        return 0;
@@ -390,6 +405,120 @@ static int jpeg_v4_0_start(struct amdgpu_device *adev)
        return 0;
 }
 
+static int jpeg_v4_0_start_sriov(struct amdgpu_device *adev)
+{
+       struct amdgpu_ring *ring;
+       uint64_t ctx_addr;
+       uint32_t param, resp, expected;
+       uint32_t tmp, timeout;
+
+       struct amdgpu_mm_table *table = &adev->virt.mm_table;
+       uint32_t *table_loc;
+       uint32_t table_size;
+       uint32_t size, size_dw;
+       uint32_t init_status;
+
+       struct mmsch_v4_0_cmd_direct_write
+               direct_wt = { {0} };
+       struct mmsch_v4_0_cmd_end end = { {0} };
+       struct mmsch_v4_0_init_header header;
+
+       direct_wt.cmd_header.command_type =
+               MMSCH_COMMAND__DIRECT_REG_WRITE;
+       end.cmd_header.command_type =
+               MMSCH_COMMAND__END;
+
+       header.version = MMSCH_VERSION;
+       header.total_size = sizeof(struct mmsch_v4_0_init_header) >> 2;
+
+       header.jpegdec.init_status = 0;
+       header.jpegdec.table_offset = 0;
+       header.jpegdec.table_size = 0;
+
+       table_loc = (uint32_t *)table->cpu_addr;
+       table_loc += header.total_size;
+
+       table_size = 0;
+
+       ring = &adev->jpeg.inst->ring_dec;
+
+       MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(JPEG, 0,
+               regUVD_LMI_JRBC_RB_64BIT_BAR_LOW),
+               lower_32_bits(ring->gpu_addr));
+       MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(JPEG, 0,
+               regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH),
+               upper_32_bits(ring->gpu_addr));
+       MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(JPEG, 0,
+               regUVD_JRBC_RB_SIZE), ring->ring_size / 4);
+
+       /* add end packet */
+       MMSCH_V4_0_INSERT_END();
+
+       /* refine header */
+       header.jpegdec.init_status = 0;
+       header.jpegdec.table_offset = header.total_size;
+       header.jpegdec.table_size = table_size;
+       header.total_size += table_size;
+
+       /* Update init table header in memory */
+       size = sizeof(struct mmsch_v4_0_init_header);
+       table_loc = (uint32_t *)table->cpu_addr;
+       memcpy((void *)table_loc, &header, size);
+
+       /* message MMSCH (in VCN[0]) to initialize this client
+        * 1, write to mmsch_vf_ctx_addr_lo/hi register with GPU mc addr
+        * of memory descriptor location
+        */
+       ctx_addr = table->gpu_addr;
+       WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
+       WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
+
+       /* 2, update vmid of descriptor */
+       tmp = RREG32_SOC15(VCN, 0, regMMSCH_VF_VMID);
+       tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+       /* use domain0 for MM scheduler */
+       tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+       WREG32_SOC15(VCN, 0, regMMSCH_VF_VMID, tmp);
+
+       /* 3, notify mmsch about the size of this descriptor */
+       size = header.total_size;
+       WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_SIZE, size);
+
+       /* 4, set resp to zero */
+       WREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_RESP, 0);
+
+       /* 5, kick off the initialization and wait until
+        * MMSCH_VF_MAILBOX_RESP becomes non-zero
+        */
+       param = 0x00000001;
+       WREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_HOST, param);
+       tmp = 0;
+       timeout = 1000;
+       resp = 0;
+       expected = MMSCH_VF_MAILBOX_RESP__OK;
+       init_status = ((struct mmsch_v4_0_init_header *)(table_loc))->jpegdec.init_status;
+       while (resp != expected) {
+               resp = RREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_RESP);
+
+               if (resp != 0)
+                       break;
+               udelay(10);
+               tmp = tmp + 10;
+               if (tmp >= timeout) {
+                       DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
+                               " waiting for regMMSCH_VF_MAILBOX_RESP "\
+                               "(expected=0x%08x, readback=0x%08x)\n",
+                               tmp, expected, resp);
+                       return -EBUSY;
+               }
+       }
+       if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE && init_status != MMSCH_VF_ENGINE_STATUS__PASS)
+               DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init status for jpeg: %x\n", resp, init_status);
+
+       return 0;
+
+}
+
 /**
  * jpeg_v4_0_stop - stop JPEG block
  *
@@ -513,6 +642,11 @@ static int jpeg_v4_0_set_powergating_state(void *handle,
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
        int ret;
 
+       if (amdgpu_sriov_vf(adev)) {
+               adev->jpeg.cur_state = AMD_PG_STATE_UNGATE;
+               return 0;
+       }
+
        if (state == adev->jpeg.cur_state)
                return 0;
 
index 0312c71..83653a5 100644 (file)
 #define MMSCH_VF_MAILBOX_RESP__OK 0x1
 #define MMSCH_VF_MAILBOX_RESP__INCOMPLETE 0x2
 
+#define MMSCH_VF_ENGINE_STATUS__PASS 0x1
+
+#define MMSCH_VF_MAILBOX_RESP__OK 0x1
+#define MMSCH_VF_MAILBOX_RESP__INCOMPLETE 0x2
+
 enum mmsch_v4_0_command_type {
        MMSCH_COMMAND__DIRECT_REG_WRITE = 0,
        MMSCH_COMMAND__DIRECT_REG_POLLING = 2,