OSDN Git Service

drm/amdkfd: fix set kfd node ras properties value
authorStanley.Yang <Stanley.Yang@amd.com>
Mon, 17 Aug 2020 07:48:21 +0000 (15:48 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 26 Aug 2020 20:40:19 +0000 (16:40 -0400)
The ctx->features are new RAS implementation which
is only available for Vega20 and onwards, it is not
available for vega10, vega10 should follow legacy
ECC implementation.

Changed from V1:
    wrap function to initialize kfd node properties

Changed from V2:
    remove wrap function and SDMA SRAM ECC check

Signed-off-by: Stanley.Yang <Stanley.Yang@amd.com>
Reviewed-by: Guchun Chen <guchun.chen@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdkfd/kfd_topology.c

index 3a9d102..4009d2e 100644 (file)
@@ -986,6 +986,7 @@ struct amdgpu_device {
 
        atomic_t                        throttling_logging_enabled;
        struct ratelimit_state          throttling_logging_rs;
+       uint32_t                        ras_features;
 };
 
 static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
index 9b71874..e5ea147 100644 (file)
@@ -1963,6 +1963,17 @@ int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
        return 0;
 }
 
+static int amdgpu_ras_check_asic_type(struct amdgpu_device *adev)
+{
+       if (adev->asic_type != CHIP_VEGA10 &&
+               adev->asic_type != CHIP_VEGA20 &&
+               adev->asic_type != CHIP_ARCTURUS &&
+               adev->asic_type != CHIP_SIENNA_CICHLID)
+               return 1;
+       else
+               return 0;
+}
+
 /*
  * check hardware's ras ability which will be saved in hw_supported.
  * if hardware does not support ras, we can skip some ras initializtion and
@@ -1979,9 +1990,7 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
        *supported = 0;
 
        if (amdgpu_sriov_vf(adev) || !adev->is_atom_fw ||
-           (adev->asic_type != CHIP_VEGA20   &&
-            adev->asic_type != CHIP_ARCTURUS &&
-            adev->asic_type != CHIP_SIENNA_CICHLID))
+               amdgpu_ras_check_asic_type(adev))
                return;
 
        if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
@@ -2003,6 +2012,7 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
 
        *supported = amdgpu_ras_enable == 0 ?
                        0 : *hw_supported & amdgpu_ras_mask;
+       adev->ras_features = *supported;
 }
 
 int amdgpu_ras_init(struct amdgpu_device *adev)
@@ -2025,9 +2035,9 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
 
        amdgpu_ras_check_supported(adev, &con->hw_supported,
                        &con->supported);
-       if (!con->hw_supported) {
+       if (!con->hw_supported || (adev->asic_type == CHIP_VEGA10)) {
                r = 0;
-               goto err_out;
+               goto release_con;
        }
 
        con->features = 0;
@@ -2038,25 +2048,25 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
        if (adev->nbio.funcs->init_ras_controller_interrupt) {
                r = adev->nbio.funcs->init_ras_controller_interrupt(adev);
                if (r)
-                       goto err_out;
+                       goto release_con;
        }
 
        if (adev->nbio.funcs->init_ras_err_event_athub_interrupt) {
                r = adev->nbio.funcs->init_ras_err_event_athub_interrupt(adev);
                if (r)
-                       goto err_out;
+                       goto release_con;
        }
 
        if (amdgpu_ras_fs_init(adev)) {
                r = -EINVAL;
-               goto err_out;
+               goto release_con;
        }
 
        dev_info(adev->dev, "RAS INFO: ras initialized successfully, "
                        "hardware ability[%x] ras_mask[%x]\n",
                        con->hw_supported, con->supported);
        return 0;
-err_out:
+release_con:
        amdgpu_ras_set_context(adev, NULL);
        kfree(con);
 
index 5e8eb78..2b31c30 100644 (file)
@@ -1239,7 +1239,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
        void *crat_image = NULL;
        size_t image_size = 0;
        int proximity_domain;
-       struct amdgpu_ras *ctx;
+       struct amdgpu_device *adev;
 
        INIT_LIST_HEAD(&temp_topology_device_list);
 
@@ -1404,19 +1404,17 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
                dev->node_props.max_waves_per_simd = 10;
        }
 
-       ctx = amdgpu_ras_get_context((struct amdgpu_device *)(dev->gpu->kgd));
-       if (ctx) {
-               /* kfd only concerns sram ecc on GFX/SDMA and HBM ecc on UMC */
-               dev->node_props.capability |=
-                       (((ctx->features & BIT(AMDGPU_RAS_BLOCK__SDMA)) != 0) ||
-                        ((ctx->features & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0)) ?
-                       HSA_CAP_SRAM_EDCSUPPORTED : 0;
-               dev->node_props.capability |= ((ctx->features & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
-                       HSA_CAP_MEM_EDCSUPPORTED : 0;
-
-               dev->node_props.capability |= (ctx->features != 0) ?
+       adev = (struct amdgpu_device *)(dev->gpu->kgd);
+       /* kfd only concerns sram ecc on GFX and HBM ecc on UMC */
+       dev->node_props.capability |=
+               ((adev->ras_features & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ?
+               HSA_CAP_SRAM_EDCSUPPORTED : 0;
+       dev->node_props.capability |= ((adev->ras_features & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
+               HSA_CAP_MEM_EDCSUPPORTED : 0;
+
+       if (adev->asic_type != CHIP_VEGA10)
+               dev->node_props.capability |= (adev->ras_features != 0) ?
                        HSA_CAP_RASEVENTNOTIFY : 0;
-       }
 
        kfd_debug_print_topology();