OSDN Git Service

drm/amdgpu: Added RAS UMC error query support for Arcturus
authorJohn Clements <john.clements@amd.com>
Wed, 11 Dec 2019 02:18:55 +0000 (10:18 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 11 Dec 2019 20:22:07 +0000 (15:22 -0500)
Updated UMC 6.1 function set to support UMC 6.1.1 and 6.1.2 devices

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: John Clements <john.clements@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
drivers/gpu/drm/amd/amdgpu/umc_v6_1.h

index a208b28..53dfc82 100644 (file)
@@ -708,11 +708,18 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
                adev->umc.funcs = &umc_v6_0_funcs;
                break;
        case CHIP_VEGA20:
+               adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
+               adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
+               adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
+               adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20;
+               adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
+               adev->umc.funcs = &umc_v6_1_funcs;
+               break;
        case CHIP_ARCTURUS:
                adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
                adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
                adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
-               adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET;
+               adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT;
                adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
                adev->umc.funcs = &umc_v6_1_funcs;
                break;
index 47c4b96..515eb50 100644 (file)
 
 #define smnMCA_UMC0_MCUMC_ADDRT0       0x50f10
 
+/* UMC 6_1_2 register offsets */
+#define mmUMCCH0_0_EccErrCntSel_ARCT                 0x0360
+#define mmUMCCH0_0_EccErrCntSel_ARCT_BASE_IDX        1
+#define mmUMCCH0_0_EccErrCnt_ARCT                    0x0361
+#define mmUMCCH0_0_EccErrCnt_ARCT_BASE_IDX           1
+#define mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT           0x03c2
+#define mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT_BASE_IDX  1
+
 /*
  * (addr / 256) * 8192, the higher 26 bits in ErrorAddr
  * is the index of 8KB block
@@ -95,12 +103,25 @@ static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev,
        uint64_t mc_umc_status;
        uint32_t mc_umc_status_addr;
 
-       ecc_err_cnt_sel_addr =
-               SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
-       ecc_err_cnt_addr =
-               SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
-       mc_umc_status_addr =
-               SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+       if (adev->asic_type == CHIP_ARCTURUS) {
+               /* UMC 6_1_2 registers */
+
+               ecc_err_cnt_sel_addr =
+                       SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel_ARCT);
+               ecc_err_cnt_addr =
+                       SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt_ARCT);
+               mc_umc_status_addr =
+                       SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT);
+       } else {
+               /* UMC 6_1_1 registers */
+
+               ecc_err_cnt_sel_addr =
+                       SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
+               ecc_err_cnt_addr =
+                       SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
+               mc_umc_status_addr =
+                       SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+       }
 
        /* select the lower chip and check the error count */
        ecc_err_cnt_sel = RREG32(ecc_err_cnt_sel_addr + umc_reg_offset);
@@ -141,8 +162,17 @@ static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev
        uint64_t mc_umc_status;
        uint32_t mc_umc_status_addr;
 
-       mc_umc_status_addr =
-                SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+       if (adev->asic_type == CHIP_ARCTURUS) {
+               /* UMC 6_1_2 registers */
+
+               mc_umc_status_addr =
+                       SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT);
+       } else {
+               /* UMC 6_1_1 registers */
+
+               mc_umc_status_addr =
+                       SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+       }
 
        /* check the MCUMC_STATUS */
        mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset);
@@ -179,8 +209,17 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
        uint64_t mc_umc_status, err_addr, retired_page;
        struct eeprom_table_record *err_rec;
 
-       mc_umc_status_addr =
-               SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+       if (adev->asic_type == CHIP_ARCTURUS) {
+               /* UMC 6_1_2 registers */
+
+               mc_umc_status_addr =
+                       SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT);
+       } else {
+               /* UMC 6_1_1 registers */
+
+               mc_umc_status_addr =
+                       SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+       }
 
        /* skip error address process if -ENOMEM */
        if (!err_data->err_addr) {
@@ -241,10 +280,21 @@ static void umc_v6_1_err_cnt_init_per_channel(struct amdgpu_device *adev,
        uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
        uint32_t ecc_err_cnt_addr;
 
-       ecc_err_cnt_sel_addr =
-               SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
-       ecc_err_cnt_addr =
-               SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
+       if (adev->asic_type == CHIP_ARCTURUS) {
+               /* UMC 6_1_2 registers */
+
+               ecc_err_cnt_sel_addr =
+                       SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel_ARCT);
+               ecc_err_cnt_addr =
+                       SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt_ARCT);
+       } else {
+               /* UMC 6_1_1 registers */
+
+               ecc_err_cnt_sel_addr =
+                       SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
+               ecc_err_cnt_addr =
+                       SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
+       }
 
        /* select the lower chip and check the error count */
        ecc_err_cnt_sel = RREG32(ecc_err_cnt_sel_addr + umc_reg_offset);
index dab9cbd..0ce1d32 100644 (file)
@@ -35,7 +35,8 @@
 /* total channel instances in one umc block */
 #define UMC_V6_1_TOTAL_CHANNEL_NUM     (UMC_V6_1_CHANNEL_INSTANCE_NUM * UMC_V6_1_UMC_INSTANCE_NUM)
 /* UMC regiser per channel offset */
-#define UMC_V6_1_PER_CHANNEL_OFFSET            0x800
+#define UMC_V6_1_PER_CHANNEL_OFFSET_VG20       0x800
+#define UMC_V6_1_PER_CHANNEL_OFFSET_ARCT       0x400
 
 /* EccErrCnt max value */
 #define UMC_V6_1_CE_CNT_MAX            0xffff