drm/amdgpu: initialize ras structures for xgmi block (v2)

author Hawking Zhang <Hawking.Zhang@amd.com>

Tue, 10 Sep 2019 03:13:39 +0000 (11:13 +0800)

committer Alex Deucher <alexander.deucher@amd.com>

Mon, 16 Sep 2019 15:08:51 +0000 (10:08 -0500)
author Hawking Zhang <Hawking.Zhang@amd.com>
Tue, 10 Sep 2019 03:13:39 +0000 (11:13 +0800)
committer Alex Deucher <alexander.deucher@amd.com>
Mon, 16 Sep 2019 15:08:51 +0000 (10:08 -0500)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h

index d3be51b..b36d4c6 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -123,6 +123,7 @@ struct amdgpu_xgmi {
         /* gpu list in the same hive */
         struct list_head head;
         bool supported;
+       struct ras_common_if *ras_if;
  };
  
  struct amdgpu_gmc {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c

index 65aae75..7f6f2e9 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -25,6 +25,7 @@
  #include "amdgpu.h"
  #include "amdgpu_xgmi.h"
  #include "amdgpu_smu.h"
+#include "amdgpu_ras.h"
  #include "df/df_3_6_offset.h"
  
  static DEFINE_MUTEX(xgmi_mutex);
@@ -437,3 +438,38 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
                 mutex_unlock(&hive->hive_lock);
         }
  }
+
+int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev)
+{
+       int r;
+       struct ras_ih_if ih_info = {
+               .cb = NULL,
+       };
+       struct ras_fs_if fs_info = {
+               .sysfs_name = "xgmi_wafl_err_count",
+               .debugfs_name = "xgmi_wafl_err_inject",
+       };
+
+       if (!adev->gmc.xgmi.supported ||
+           adev->gmc.xgmi.num_physical_nodes == 0)
+               return 0;
+
+       if (!adev->gmc.xgmi.ras_if) {
+               adev->gmc.xgmi.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
+               if (!adev->gmc.xgmi.ras_if)
+                       return -ENOMEM;
+               adev->gmc.xgmi.ras_if->block = AMDGPU_RAS_BLOCK__XGMI_WAFL;
+               adev->gmc.xgmi.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+               adev->gmc.xgmi.ras_if->sub_block_index = 0;
+               strcpy(adev->gmc.xgmi.ras_if->name, "xgmi_wafl");
+       }
+       ih_info.head = fs_info.head = *adev->gmc.xgmi.ras_if;
+       r = amdgpu_ras_late_init(adev, adev->gmc.xgmi.ras_if,
+                                &fs_info, &ih_info);
+       if (r || !amdgpu_ras_is_supported(adev, adev->gmc.xgmi.ras_if->block)) {
+               kfree(adev->gmc.xgmi.ras_if);
+               adev->gmc.xgmi.ras_if = NULL;
+       }
+
+       return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h

index fbcee31..9023789 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -42,6 +42,7 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
  int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
  int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
                 struct amdgpu_device *peer_adev);
+int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev);
  
  static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
                 struct amdgpu_device *bo_adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

index 167e916..a991a6c 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -55,6 +55,7 @@
  #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
  
  #include "amdgpu_ras.h"
+#include "amdgpu_xgmi.h"
  
  /* add these here since we already include dce12 headers and these are for DCN */
  #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION                                                          0x055d
@@ -808,7 +809,8 @@ static int gmc_v9_0_ecc_late_init(void *handle)
                 if (r)
                         return r;
         }
-       return 0;
+
+       return amdgpu_xgmi_ras_late_init(adev);
  }
  
  static int gmc_v9_0_late_init(void *handle)
author	Hawking Zhang <Hawking.Zhang@amd.com>
	Tue, 10 Sep 2019 03:13:39 +0000 (11:13 +0800)
committer	Alex Deucher <alexander.deucher@amd.com>
	Mon, 16 Sep 2019 15:08:51 +0000 (10:08 -0500)
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c		patch \| blob \| history