OSDN Git Service

drm/amdgpu: clear uncorrectable parity error status bit
authorLe Ma <le.ma@amd.com>
Fri, 22 Nov 2019 10:39:11 +0000 (18:39 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 5 Dec 2019 21:25:35 +0000 (16:25 -0500)
This should be cleared during every nbif uncorrectable error cleanup work.

Signed-off-by: Le Ma <le.ma@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c

index 9a3a65a..bb701db 100644 (file)
@@ -482,10 +482,12 @@ static int nbio_v7_4_init_ras_err_event_athub_interrupt (struct amdgpu_device *a
        return 0;
 }
 
+#define smnPARITY_ERROR_STATUS_UNCORR_GRP2     0x13a20030
+
 static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev,
                                        void *ras_error_status)
 {
-       uint32_t global_sts, central_sts, int_eoi;
+       uint32_t global_sts, central_sts, int_eoi, parity_sts;
        uint32_t corr, fatal, non_fatal;
        struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
 
@@ -494,6 +496,7 @@ static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev,
        fatal = REG_GET_FIELD(global_sts, RAS_GLOBAL_STATUS_LO, ParityErrFatal);
        non_fatal = REG_GET_FIELD(global_sts, RAS_GLOBAL_STATUS_LO,
                                ParityErrNonFatal);
+       parity_sts = RREG32_PCIE(smnPARITY_ERROR_STATUS_UNCORR_GRP2);
 
        if (corr)
                err_data->ce_count++;
@@ -505,6 +508,11 @@ static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev,
                /* clear error status register */
                WREG32_PCIE(smnRAS_GLOBAL_STATUS_LO, global_sts);
 
+               if (fatal)
+                       /* clear parity fatal error indication field */
+                       WREG32_PCIE(smnPARITY_ERROR_STATUS_UNCORR_GRP2,
+                                   parity_sts);
+
                if (REG_GET_FIELD(central_sts, BIFL_RAS_CENTRAL_STATUS,
                                BIFL_RasContller_Intr_Recv)) {
                        /* clear interrupt status register */