OSDN Git Service

drm/amdgpu/vcn: Add VCN ras error query support
authorMohammad Zafar Ziya <Mohammadzafar.ziya@amd.com>
Wed, 23 Mar 2022 04:06:49 +0000 (12:06 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 28 Mar 2022 16:54:39 +0000 (12:54 -0400)
RAS error query support addition for VCN 2.6

V2: removed unused option and corrected comment format
Moved the register definition under header file

V3: poison query status check added.
Removed error query interface

V4: MMSCH poison check option removed, return true/false refactored.

Signed-off-by: Mohammad Zafar Ziya <Mohammadzafar.ziya@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h

index 1e1a3b7..606df88 100644 (file)
@@ -508,6 +508,7 @@ struct amdgpu_ras_block_hw_ops {
        void (*query_ras_error_address)(struct amdgpu_device *adev, void *ras_error_status);
        void (*reset_ras_error_count)(struct amdgpu_device *adev);
        void (*reset_ras_error_status)(struct amdgpu_device *adev);
+       bool (*query_poison_status)(struct amdgpu_device *adev);
 };
 
 /* work flow
index 1bf6729..3e1de8c 100644 (file)
@@ -31,6 +31,7 @@
 #include "soc15d.h"
 #include "vcn_v2_0.h"
 #include "mmsch_v1_0.h"
+#include "vcn_v2_5.h"
 
 #include "vcn/vcn_2_5_offset.h"
 #include "vcn/vcn_2_5_sh_mask.h"
@@ -59,6 +60,7 @@ static int vcn_v2_5_set_powergating_state(void *handle,
 static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
                                int inst_idx, struct dpg_pause_state *new_state);
 static int vcn_v2_5_sriov_start(struct amdgpu_device *adev);
+static void vcn_v2_5_set_ras_funcs(struct amdgpu_device *adev);
 
 static int amdgpu_ih_clientid_vcns[] = {
        SOC15_IH_CLIENTID_VCN,
@@ -100,6 +102,7 @@ static int vcn_v2_5_early_init(void *handle)
        vcn_v2_5_set_dec_ring_funcs(adev);
        vcn_v2_5_set_enc_ring_funcs(adev);
        vcn_v2_5_set_irq_funcs(adev);
+       vcn_v2_5_set_ras_funcs(adev);
 
        return 0;
 }
@@ -1932,3 +1935,71 @@ const struct amdgpu_ip_block_version vcn_v2_6_ip_block =
                .rev = 0,
                .funcs = &vcn_v2_6_ip_funcs,
 };
+
+static uint32_t vcn_v2_6_query_poison_by_instance(struct amdgpu_device *adev,
+                       uint32_t instance, uint32_t sub_block)
+{
+       uint32_t poison_stat = 0, reg_value = 0;
+
+       switch (sub_block) {
+       case AMDGPU_VCN_V2_6_VCPU_VCODEC:
+               reg_value = RREG32_SOC15(VCN, instance, mmUVD_RAS_VCPU_VCODEC_STATUS);
+               poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_VCPU_VCODEC_STATUS, POISONED_PF);
+               break;
+       default:
+               break;
+       };
+
+       if (poison_stat)
+               dev_info(adev->dev, "Poison detected in VCN%d, sub_block%d\n",
+                       instance, sub_block);
+
+       return poison_stat;
+}
+
+static bool vcn_v2_6_query_poison_status(struct amdgpu_device *adev)
+{
+       uint32_t inst, sub;
+       uint32_t poison_stat = 0;
+
+       for (inst = 0; inst < adev->vcn.num_vcn_inst; inst++)
+               for (sub = 0; sub < AMDGPU_VCN_V2_6_MAX_SUB_BLOCK; sub++)
+                       poison_stat +=
+                       vcn_v2_6_query_poison_by_instance(adev, inst, sub);
+
+       return !!poison_stat;
+}
+
+const struct amdgpu_ras_block_hw_ops vcn_v2_6_ras_hw_ops = {
+       .query_poison_status = vcn_v2_6_query_poison_status,
+};
+
+static struct amdgpu_vcn_ras vcn_v2_6_ras = {
+       .ras_block = {
+               .hw_ops = &vcn_v2_6_ras_hw_ops,
+       },
+};
+
+static void vcn_v2_5_set_ras_funcs(struct amdgpu_device *adev)
+{
+       switch (adev->ip_versions[VCN_HWIP][0]) {
+       case IP_VERSION(2, 6, 0):
+               adev->vcn.ras = &vcn_v2_6_ras;
+               break;
+       default:
+               break;
+       }
+
+       if (adev->vcn.ras) {
+               amdgpu_ras_register_ras_block(adev, &adev->vcn.ras->ras_block);
+
+               strcpy(adev->vcn.ras->ras_block.ras_comm.name, "vcn");
+               adev->vcn.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__VCN;
+               adev->vcn.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON;
+               adev->vcn.ras_if = &adev->vcn.ras->ras_block.ras_comm;
+
+               /* If don't define special ras_late_init function, use default ras_late_init */
+               if (!adev->vcn.ras->ras_block.ras_late_init)
+                       adev->vcn.ras->ras_block.ras_late_init = amdgpu_ras_block_late_init;
+       }
+}
index e72f799..1c19af7 100644 (file)
 #ifndef __VCN_V2_5_H__
 #define __VCN_V2_5_H__
 
+enum amdgpu_vcn_v2_6_sub_block {
+       AMDGPU_VCN_V2_6_VCPU_VCODEC = 0,
+
+       AMDGPU_VCN_V2_6_MAX_SUB_BLOCK,
+};
+
 extern const struct amdgpu_ip_block_version vcn_v2_5_ip_block;
 extern const struct amdgpu_ip_block_version vcn_v2_6_ip_block;