OSDN Git Service

drm/amdgpu: revise RLCG access path
authorMonk Liu <Monk.Liu@amd.com>
Tue, 10 Mar 2020 12:28:45 +0000 (20:28 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 16 Mar 2020 20:17:55 +0000 (16:17 -0400)
what changed:
1)provide new implementation interface for the rlcg access path
2)put SQ_CMD/SQ_IND_INDEX to GFX9 RLCG path to let debugfs's reg_op
function can access reg that need RLCG path help

now even debugfs's reg_op can used to dump wave.

tested-by: Monk Liu <monk.liu@amd.com>
tested-by: Zhou pengju <pengju.zhou@amd.com>
Signed-off-by: Zhou pengju <pengju.zhou@amd.com>
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Reviewed-by: Emily Deng <Emily.Deng@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
drivers/gpu/drm/amd/amdgpu/soc15.h
drivers/gpu/drm/amd/amdgpu/soc15_common.h

index 9e90cba..2992a49 100644 (file)
@@ -994,6 +994,8 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
                        uint32_t acc_flags);
 void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
                    uint32_t acc_flags);
+void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
+                   uint32_t acc_flags);
 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value);
 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset);
 
index 02bb1be..c0f9a65 100644 (file)
@@ -179,7 +179,7 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
                } else {
                        r = get_user(value, (uint32_t *)buf);
                        if (!r)
-                               WREG32(*pos >> 2, value);
+                               amdgpu_mm_wreg_mmio_rlc(adev, *pos >> 2, value, 0);
                }
                if (r) {
                        result = r;
index d6751b3..6f469fa 100644 (file)
@@ -306,6 +306,26 @@ void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
                BUG();
 }
 
+void static inline amdgpu_mm_wreg_mmio(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags)
+{
+       trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
+
+       if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
+               writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
+       else {
+               unsigned long flags;
+
+               spin_lock_irqsave(&adev->mmio_idx_lock, flags);
+               writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
+               writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
+               spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
+       }
+
+       if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
+               udelay(500);
+       }
+}
+
 /**
  * amdgpu_mm_wreg - write to a memory mapped IO register
  *
@@ -319,8 +339,6 @@ void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
 void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
                    uint32_t acc_flags)
 {
-       trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
-
        if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
                adev->last_mm_index = v;
        }
@@ -328,20 +346,26 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
        if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)))
                return amdgpu_kiq_wreg(adev, reg, v);
 
-       if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
-               writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
-       else {
-               unsigned long flags;
+       amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags);
+}
 
-               spin_lock_irqsave(&adev->mmio_idx_lock, flags);
-               writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
-               writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
-               spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
-       }
+/*
+ * amdgpu_mm_wreg_mmio_rlc -  write register either with mmio or with RLC path if in range
+ *
+ * this function is invoked only the debugfs register access
+ * */
+void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
+                   uint32_t acc_flags)
+{
+       if (amdgpu_sriov_fullaccess(adev) &&
+               adev->gfx.rlc.funcs &&
+               adev->gfx.rlc.funcs->is_rlcg_access_range) {
 
-       if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
-               udelay(500);
+               if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
+                       return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v);
        }
+
+       amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags);
 }
 
 /**
index 52509c2..60bb3e8 100644 (file)
@@ -127,6 +127,8 @@ struct amdgpu_rlc_funcs {
        void (*reset)(struct amdgpu_device *adev);
        void (*start)(struct amdgpu_device *adev);
        void (*update_spm_vmid)(struct amdgpu_device *adev, unsigned vmid);
+       void (*rlcg_wreg)(struct amdgpu_device *adev, u32 offset, u32 v);
+       bool (*is_rlcg_access_range)(struct amdgpu_device *adev, uint32_t reg);
 };
 
 struct amdgpu_rlc {
index daaf909..f0128f7 100644 (file)
@@ -270,6 +270,9 @@ struct amdgpu_virt {
 #define amdgpu_sriov_runtime(adev) \
 ((adev)->virt.caps & AMDGPU_SRIOV_CAPS_RUNTIME)
 
+#define amdgpu_sriov_fullaccess(adev) \
+(amdgpu_sriov_vf((adev)) && !amdgpu_sriov_runtime((adev)))
+
 #define amdgpu_passthrough(adev) \
 ((adev)->virt.caps & AMDGPU_PASSTHROUGH_MODE)
 
index 614e910..89a28ab 100644 (file)
@@ -224,6 +224,49 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_2[] =
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00800000)
 };
 
+static void gfx_v10_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v)
+{
+       static void *scratch_reg0;
+       static void *scratch_reg1;
+       static void *scratch_reg2;
+       static void *scratch_reg3;
+       static void *spare_int;
+       static uint32_t grbm_cntl;
+       static uint32_t grbm_idx;
+       uint32_t i = 0;
+       uint32_t retries = 50000;
+
+       scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
+       scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
+       scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;
+       scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;
+       spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;
+
+       grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;
+       grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX;
+
+       if (amdgpu_sriov_runtime(adev)) {
+               pr_err("shoudn't call rlcg write register during runtime\n");
+               return;
+       }
+
+       writel(v, scratch_reg0);
+       writel(offset | 0x80000000, scratch_reg1);
+       writel(1, spare_int);
+       for (i = 0; i < retries; i++) {
+               u32 tmp;
+
+               tmp = readl(scratch_reg1);
+               if (!(tmp & 0x80000000))
+                       break;
+
+               udelay(10);
+       }
+
+       if (i >= retries)
+               pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset);
+}
+
 static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] =
 {
        /* Pending on emulation bring up */
@@ -4247,6 +4290,33 @@ static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
        WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
 }
 
+static bool gfx_v10_0_check_rlcg_range(struct amdgpu_device *adev,
+                                       uint32_t offset,
+                                       struct soc15_reg_rlcg *entries, int arr_size)
+{
+       int i;
+       uint32_t reg;
+
+       if (!entries)
+               return false;
+
+       for (i = 0; i < arr_size; i++) {
+               const struct soc15_reg_rlcg *entry;
+
+               entry = &entries[i];
+               reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
+               if (offset == reg)
+                       return true;
+       }
+
+       return false;
+}
+
+static bool gfx_v10_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
+{
+       return gfx_v10_0_check_rlcg_range(adev, offset, NULL, 0);
+}
+
 static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs = {
        .is_rlc_enabled = gfx_v10_0_is_rlc_enabled,
        .set_safe_mode = gfx_v10_0_set_safe_mode,
@@ -4258,7 +4328,9 @@ static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs = {
        .stop = gfx_v10_0_rlc_stop,
        .reset = gfx_v10_0_rlc_reset,
        .start = gfx_v10_0_rlc_start,
-       .update_spm_vmid = gfx_v10_0_update_spm_vmid
+       .update_spm_vmid = gfx_v10_0_update_spm_vmid,
+       .rlcg_wreg = gfx_v10_rlcg_wreg,
+       .is_rlcg_access_range = gfx_v10_0_is_rlcg_access_range,
 };
 
 static int gfx_v10_0_set_powergating_state(void *handle,
index d5afd7a..ce02245 100644 (file)
@@ -697,6 +697,11 @@ static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
 };
 
+static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
+       {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
+       {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
+};
+
 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
 {
        mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
@@ -721,6 +726,63 @@ static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
        mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 };
 
+void gfx_v9_0_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v)
+{
+       static void *scratch_reg0;
+       static void *scratch_reg1;
+       static void *scratch_reg2;
+       static void *scratch_reg3;
+       static void *spare_int;
+       static uint32_t grbm_cntl;
+       static uint32_t grbm_idx;
+       bool shadow;
+
+       scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
+       scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
+       scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;
+       scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;
+       spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;
+
+       grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;
+       grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX;
+
+       if (amdgpu_sriov_runtime(adev)) {
+               pr_err("shoudn't call rlcg write register during runtime\n");
+               return;
+       }
+
+       if (offset == grbm_cntl || offset == grbm_idx)
+               shadow = true;
+
+       if (shadow) {
+               if (offset  == grbm_cntl)
+                       writel(v, scratch_reg2);
+               else if (offset == grbm_idx)
+                       writel(v, scratch_reg3);
+
+               writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
+       } else {
+               uint32_t i = 0;
+               uint32_t retries = 50000;
+
+               writel(v, scratch_reg0);
+               writel(offset | 0x80000000, scratch_reg1);
+               writel(1, spare_int);
+               for (i = 0; i < retries; i++) {
+                       u32 tmp;
+
+                       tmp = readl(scratch_reg1);
+                       if (!(tmp & 0x80000000))
+                               break;
+
+                       udelay(10);
+               }
+               if (i >= retries)
+                       pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset);
+       }
+
+}
+
 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
@@ -1921,7 +1983,7 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
 
 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
 {
-       WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
+       WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
                (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
                (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
                (address << SQ_IND_INDEX__INDEX__SHIFT) |
@@ -1933,7 +1995,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
                           uint32_t wave, uint32_t thread,
                           uint32_t regno, uint32_t num, uint32_t *out)
 {
-       WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
+       WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
                (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
                (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
                (regno << SQ_IND_INDEX__INDEX__SHIFT) |
@@ -4908,6 +4970,35 @@ static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
        WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
 }
 
+static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
+                                       uint32_t offset,
+                                       struct soc15_reg_rlcg *entries, int arr_size)
+{
+       int i;
+       uint32_t reg;
+
+       if (!entries)
+               return false;
+
+       for (i = 0; i < arr_size; i++) {
+               const struct soc15_reg_rlcg *entry;
+
+               entry = &entries[i];
+               reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
+               if (offset == reg)
+                       return true;
+       }
+
+       return false;
+}
+
+static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
+{
+       return gfx_v9_0_check_rlcg_range(adev, offset,
+                                       (void *)rlcg_access_gc_9_0,
+                                       ARRAY_SIZE(rlcg_access_gc_9_0));
+}
+
 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
        .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
        .set_safe_mode = gfx_v9_0_set_safe_mode,
@@ -4920,7 +5011,9 @@ static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
        .stop = gfx_v9_0_rlc_stop,
        .reset = gfx_v9_0_rlc_reset,
        .start = gfx_v9_0_rlc_start,
-       .update_spm_vmid = gfx_v9_0_update_spm_vmid
+       .update_spm_vmid = gfx_v9_0_update_spm_vmid,
+       .rlcg_wreg = gfx_v9_0_rlcg_wreg,
+       .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
 };
 
 static int gfx_v9_0_set_powergating_state(void *handle,
index d0fb7a6..b03f950 100644 (file)
@@ -42,6 +42,13 @@ struct soc15_reg_golden {
        u32     or_mask;
 };
 
+struct soc15_reg_rlcg {
+       u32     hwip;
+       u32     instance;
+       u32     segment;
+       u32     reg;
+};
+
 struct soc15_reg_entry {
        uint32_t hwip;
        uint32_t inst;
index 19e870c..c893c64 100644 (file)
                }                                               \
        } while (0)
 
-#define AMDGPU_VIRT_SUPPORT_RLC_PRG_REG(a) (amdgpu_sriov_vf((a)) && !amdgpu_sriov_runtime((a)))
 #define WREG32_RLC(reg, value) \
        do {                                                    \
-               if (AMDGPU_VIRT_SUPPORT_RLC_PRG_REG(adev)) {    \
+               if (amdgpu_sriov_fullaccess(adev)) {    \
                        uint32_t i = 0; \
                        uint32_t retries = 50000;       \
                        uint32_t r0 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0;   \
@@ -98,7 +97,7 @@
 #define WREG32_SOC15_RLC_SHADOW(ip, inst, reg, value) \
        do {                                                    \
                uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\
-               if (AMDGPU_VIRT_SUPPORT_RLC_PRG_REG(adev)) {    \
+               if (amdgpu_sriov_fullaccess(adev)) {    \
                        uint32_t r2 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2;   \
                        uint32_t r3 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3;   \
                        uint32_t grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;   \