drm/amdgpu: Override MTYPE per page on GFXv9.4.3 APUs

author Felix Kuehling <Felix.Kuehling@amd.com>

Tue, 21 Feb 2023 22:44:18 +0000 (17:44 -0500)

committer Alex Deucher <alexander.deucher@amd.com>

Fri, 9 Jun 2023 13:59:08 +0000 (09:59 -0400)
author Felix Kuehling <Felix.Kuehling@amd.com>
Tue, 21 Feb 2023 22:44:18 +0000 (17:44 -0500)
committer Alex Deucher <alexander.deucher@amd.com>
Fri, 9 Jun 2023 13:59:08 +0000 (09:59 -0400)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h

index 43357d6..6794edd 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -148,6 +148,10 @@ struct amdgpu_gmc_funcs {
         void (*get_vm_pte)(struct amdgpu_device *adev,
                            struct amdgpu_bo_va_mapping *mapping,
                            uint64_t *flags);
+       /* override per-page pte flags */
+       void (*override_vm_pte_flags)(struct amdgpu_device *dev,
+                                     struct amdgpu_vm *vm,
+                                     uint64_t addr, uint64_t *flags);
         /* get the amount of memory used by the vbios for pre-OS console */
         unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev);
  
@@ -336,6 +340,9 @@ struct amdgpu_gmc {
  #define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
  #define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
  #define amdgpu_gmc_get_vm_pte(adev, mapping, flags) (adev)->gmc.gmc_funcs->get_vm_pte((adev), (mapping), (flags))
+#define amdgpu_gmc_override_vm_pte_flags(adev, vm, addr, pte_flags)    \
+       (adev)->gmc.gmc_funcs->override_vm_pte_flags                    \
+               ((adev), (vm), (addr), (pte_flags))
  #define amdgpu_gmc_get_vbios_fb_size(adev) (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev))
  
  /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c

index bc5d126..60b1da9 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -786,13 +786,14 @@ static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params,
                                        uint64_t pe, uint64_t addr,
                                        unsigned int count, uint32_t incr,
                                        uint64_t flags)
-
  {
+       struct amdgpu_device *adev = params->adev;
+
         if (level != AMDGPU_VM_PTB) {
                 flags |= AMDGPU_PDE_PTE;
-               amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags);
+               amdgpu_gmc_get_vm_pde(adev, level, &addr, &flags);
  
-       } else if (params->adev->asic_type >= CHIP_VEGA10 &&
+       } else if (adev->asic_type >= CHIP_VEGA10 &&
                    !(flags & AMDGPU_PTE_VALID) &&
                    !(flags & AMDGPU_PTE_PRT)) {
  
@@ -800,6 +801,21 @@ static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params,
                 flags |= AMDGPU_PTE_EXECUTABLE;
         }
  
+       /* APUs mapping system memory may need different MTYPEs on different
+        * NUMA nodes. Only do this for contiguous ranges that can be assumed
+        * to be on the same NUMA node.
+        */
+       if ((flags & AMDGPU_PTE_SYSTEM) && (adev->flags & AMD_IS_APU) &&
+           adev->gmc.gmc_funcs->override_vm_pte_flags &&
+           num_possible_nodes() > 1) {
+               if (!params->pages_addr)
+                       amdgpu_gmc_override_vm_pte_flags(adev, params->vm,
+                                                        addr, &flags);
+               else
+                       dev_dbg(adev->dev,
+                               "override_vm_pte_flags skipped: non-contiguous\n");
+       }
+
         params->vm->update_funcs->update(params, pt, pe, addr, count, incr,
                                          flags);
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

index cf976b5..c64a69f 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1297,6 +1297,69 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
                                              mapping, flags);
  }
  
+static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev,
+                                          struct amdgpu_vm *vm,
+                                          uint64_t addr, uint64_t *flags)
+{
+       int local_node, nid;
+
+       /* Only GFX 9.4.3 APUs associate GPUs with NUMA nodes. Local system
+        * memory can use more efficient MTYPEs.
+        */
+       if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 3))
+               return;
+
+       /* Only direct-mapped memory allows us to determine the NUMA node from
+        * the DMA address.
+        */
+       if (!adev->ram_is_direct_mapped) {
+               dev_dbg(adev->dev, "RAM is not direct mapped\n");
+               return;
+       }
+
+       /* Only override mappings with MTYPE_NC, which is the safe default for
+        * cacheable memory.
+        */
+       if ((*flags & AMDGPU_PTE_MTYPE_VG10_MASK) !=
+           AMDGPU_PTE_MTYPE_VG10(MTYPE_NC)) {
+               dev_dbg(adev->dev, "MTYPE is not NC\n");
+               return;
+       }
+
+       /* TODO: memory partitions. mem_id is hard-coded to 0 for now.
+        * FIXME: Only supported on native mode for now. For carve-out, the
+        * NUMA affinity of the GPU/VM needs to come from the PCI info because
+        * memory partitions are not associated with different NUMA nodes.
+        */
+       if (adev->gmc.is_app_apu) {
+               local_node = adev->gmc.mem_partitions[/*vm->mem_id*/0].numa.node;
+       } else {
+               dev_dbg(adev->dev, "Only native mode APU is supported.\n");
+               return;
+       }
+
+       /* Only handle real RAM. Mappings of PCIe resources don't have struct
+        * page or NUMA nodes.
+        */
+       if (!page_is_ram(addr >> PAGE_SHIFT)) {
+               dev_dbg(adev->dev, "Page is not RAM.\n");
+               return;
+       }
+       nid = pfn_to_nid(addr >> PAGE_SHIFT);
+       dev_dbg(adev->dev, "vm->mem_id=%d, local_node=%d, nid=%d\n",
+               /*vm->mem_id*/0, local_node, nid);
+       if (nid == local_node) {
+               unsigned int mtype_local =
+                       amdgpu_use_mtype_cc_wa ? MTYPE_CC : MTYPE_RW;
+               uint64_t old_flags = *flags;
+
+               *flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
+                        AMDGPU_PTE_MTYPE_VG10(mtype_local);
+               dev_dbg(adev->dev, "flags updated from %llx to %llx\n",
+                       old_flags, *flags);
+       }
+}
+
  static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
  {
         u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
@@ -1368,6 +1431,7 @@ static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
         .map_mtype = gmc_v9_0_map_mtype,
         .get_vm_pde = gmc_v9_0_get_vm_pde,
         .get_vm_pte = gmc_v9_0_get_vm_pte,
+       .override_vm_pte_flags = gmc_v9_0_override_vm_pte_flags,
         .get_vbios_fb_size = gmc_v9_0_get_vbios_fb_size,
         .query_mem_partition_mode = &gmc_v9_0_query_memory_partition,
  };
author	Felix Kuehling <Felix.Kuehling@amd.com>
	Tue, 21 Feb 2023 22:44:18 +0000 (17:44 -0500)
committer	Alex Deucher <alexander.deucher@amd.com>
	Fri, 9 Jun 2023 13:59:08 +0000 (09:59 -0400)
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c		patch \| blob \| history