OSDN Git Service

drm/amdgpu: Put drm_dev_enter/exit outside hot codepath
authorxinhui pan <xinhui.pan@amd.com>
Wed, 15 Sep 2021 01:08:28 +0000 (09:08 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 23 Sep 2021 19:17:29 +0000 (15:17 -0400)
We hit soft hang while doing memory pressure test on one numa system.
After a qucik look, this is because kfd invalid/valid userptr memory
frequently with process_info lock hold.
Looks like update page table mapping use too much cpu time.

perf top says below,
75.81%  [kernel]       [k] __srcu_read_unlock
 6.19%  [amdgpu]       [k] amdgpu_gmc_set_pte_pde
 3.56%  [kernel]       [k] __srcu_read_lock
 2.20%  [amdgpu]       [k] amdgpu_vm_cpu_update
 2.20%  [kernel]       [k] __sg_page_iter_dma_next
 2.15%  [drm]          [k] drm_dev_enter
 1.70%  [drm]          [k] drm_prime_sg_to_dma_addr_array
 1.18%  [kernel]       [k] __sg_alloc_table_from_pages
 1.09%  [drm]          [k] drm_dev_exit

So move drm_dev_enter/exit outside gmc code, instead let caller do it.
They are gart_unbind, gart_map, vm_clear_bo, vm_update_pdes and
gmc_init_pdb0. vm_bo_update_mapping already calls it.

Signed-off-by: xinhui pan <xinhui.pan@amd.com>
Reviewed-and-tested-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 76efd5f..d7e4f46 100644 (file)
@@ -34,6 +34,7 @@
 #include <asm/set_memory.h>
 #endif
 #include "amdgpu.h"
+#include <drm/drm_drv.h>
 
 /*
  * GART
@@ -230,12 +231,16 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
        u64 page_base;
        /* Starting from VEGA10, system bit must be 0 to mean invalid. */
        uint64_t flags = 0;
+       int idx;
 
        if (!adev->gart.ready) {
                WARN(1, "trying to unbind memory from uninitialized GART !\n");
                return -EINVAL;
        }
 
+       if (!drm_dev_enter(&adev->ddev, &idx))
+               return 0;
+
        t = offset / AMDGPU_GPU_PAGE_SIZE;
        p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
        for (i = 0; i < pages; i++, p++) {
@@ -254,6 +259,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
        for (i = 0; i < adev->num_vmhubs; i++)
                amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
 
+       drm_dev_exit(idx);
        return 0;
 }
 
@@ -276,12 +282,16 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
 {
        uint64_t page_base;
        unsigned i, j, t;
+       int idx;
 
        if (!adev->gart.ready) {
                WARN(1, "trying to bind memory to uninitialized GART !\n");
                return -EINVAL;
        }
 
+       if (!drm_dev_enter(&adev->ddev, &idx))
+               return 0;
+
        t = offset / AMDGPU_GPU_PAGE_SIZE;
 
        for (i = 0; i < pages; i++) {
@@ -291,6 +301,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
                        page_base += AMDGPU_GPU_PAGE_SIZE;
                }
        }
+       drm_dev_exit(idx);
        return 0;
 }
 
index 9ff600a..a0dec7f 100644 (file)
@@ -153,10 +153,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
 {
        void __iomem *ptr = (void *)cpu_pt_addr;
        uint64_t value;
-       int idx;
-
-       if (!drm_dev_enter(&adev->ddev, &idx))
-               return 0;
 
        /*
         * The following is for PTE only. GART does not have PDEs.
@@ -165,8 +161,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
        value |= flags;
        writeq(value, ptr + (gpu_page_idx * 8));
 
-       drm_dev_exit(idx);
-
        return 0;
 }
 
@@ -749,6 +743,10 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
                adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
        u64 vram_end = vram_addr + vram_size;
        u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo);
+       int idx;
+
+       if (!drm_dev_enter(&adev->ddev, &idx))
+               return;
 
        flags |= AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
        flags |= AMDGPU_PTE_WRITEABLE;
@@ -770,6 +768,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
        flags |= AMDGPU_PDE_BFS(0) | AMDGPU_PTE_SNOOPED;
        /* Requires gart_ptb_gpu_pa to be 4K aligned */
        amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, gart_ptb_gpu_pa, flags);
+       drm_dev_exit(idx);
 }
 
 /**
index 6b15cad..a1ddf74 100644 (file)
@@ -800,7 +800,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
        struct amdgpu_bo *bo = &vmbo->bo;
        unsigned entries, ats_entries;
        uint64_t addr;
-       int r;
+       int r, idx;
 
        /* Figure out our place in the hierarchy */
        if (ancestor->parent) {
@@ -845,9 +845,12 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
                        return r;
        }
 
+       if (!drm_dev_enter(&adev->ddev, &idx))
+               return -ENODEV;
+
        r = vm->update_funcs->map_table(vmbo);
        if (r)
-               return r;
+               goto exit;
 
        memset(&params, 0, sizeof(params));
        params.adev = adev;
@@ -856,7 +859,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 
        r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
        if (r)
-               return r;
+               goto exit;
 
        addr = 0;
        if (ats_entries) {
@@ -872,7 +875,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
                r = vm->update_funcs->update(&params, vmbo, addr, 0, ats_entries,
                                             value, flags);
                if (r)
-                       return r;
+                       goto exit;
 
                addr += ats_entries * 8;
        }
@@ -895,10 +898,13 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
                r = vm->update_funcs->update(&params, vmbo, addr, 0, entries,
                                             value, flags);
                if (r)
-                       return r;
+                       goto exit;
        }
 
-       return vm->update_funcs->commit(&params, NULL);
+       r = vm->update_funcs->commit(&params, NULL);
+exit:
+       drm_dev_exit(idx);
+       return r;
 }
 
 /**
@@ -1384,11 +1390,14 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
                          struct amdgpu_vm *vm, bool immediate)
 {
        struct amdgpu_vm_update_params params;
-       int r;
+       int r, idx;
 
        if (list_empty(&vm->relocated))
                return 0;
 
+       if (!drm_dev_enter(&adev->ddev, &idx))
+               return -ENODEV;
+
        memset(&params, 0, sizeof(params));
        params.adev = adev;
        params.vm = vm;
@@ -1396,7 +1405,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
 
        r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
        if (r)
-               return r;
+               goto exit;
 
        while (!list_empty(&vm->relocated)) {
                struct amdgpu_vm_bo_base *entry;
@@ -1414,10 +1423,13 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
        r = vm->update_funcs->commit(&params, &vm->last_update);
        if (r)
                goto error;
+       drm_dev_exit(idx);
        return 0;
 
 error:
        amdgpu_vm_invalidate_pds(adev, vm);
+exit:
+       drm_dev_exit(idx);
        return r;
 }