drm/amdgpu: skip reset other device in the same hive if it's SRIOV VF

author Zhigang Luo <zhigang.luo@amd.com>

Fri, 26 Nov 2021 17:16:45 +0000 (12:16 -0500)

committer Alex Deucher <alexander.deucher@amd.com>

Mon, 13 Dec 2021 21:32:34 +0000 (16:32 -0500)
author Zhigang Luo <zhigang.luo@amd.com>
Fri, 26 Nov 2021 17:16:45 +0000 (12:16 -0500)
committer Alex Deucher <alexander.deucher@amd.com>
Mon, 13 Dec 2021 21:32:34 +0000 (16:32 -0500)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index a1c1446..25a9e52 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4747,7 +4747,7 @@ static int amdgpu_device_lock_hive_adev(struct amdgpu_device *adev, struct amdgp
  {
         struct amdgpu_device *tmp_adev = NULL;
  
-       if (adev->gmc.xgmi.num_physical_nodes > 1) {
+       if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
                 if (!hive) {
                         dev_err(adev->dev, "Hive is NULL while device has multiple xgmi nodes");
                         return -ENODEV;
@@ -4959,7 +4959,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
          * We always reset all schedulers for device and all devices for XGMI
          * hive so that should take care of them too.
          */
-       hive = amdgpu_get_xgmi_hive(adev);
+       if (!amdgpu_sriov_vf(adev))
+               hive = amdgpu_get_xgmi_hive(adev);
         if (hive) {
                 if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) {
                         DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
@@ -5000,7 +5001,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
          * to put adev in the 1st position.
          */
         INIT_LIST_HEAD(&device_list);
-       if (adev->gmc.xgmi.num_physical_nodes > 1) {
+       if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
                 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
                         list_add_tail(&tmp_adev->reset_list, &device_list);
                 if (!list_is_first(&adev->reset_list, &device_list))
author	Zhigang Luo <zhigang.luo@amd.com>
	Fri, 26 Nov 2021 17:16:45 +0000 (12:16 -0500)
committer	Alex Deucher <alexander.deucher@amd.com>
	Mon, 13 Dec 2021 21:32:34 +0000 (16:32 -0500)