OSDN Git Service

drm/radeon: use an intervall tree to manage the VMA v2
authorAlex Deucher <alexander.deucher@amd.com>
Wed, 30 Jul 2014 15:49:56 +0000 (11:49 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 5 Aug 2014 12:53:51 +0000 (08:53 -0400)
Scales much better than scanning the address range linearly.

v2: store pfn instead of address

Signed-off-by: Christian König <christian.koenig@amd.com>
Tested-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/Kconfig
drivers/gpu/drm/radeon/radeon.h
drivers/gpu/drm/radeon/radeon_gem.c
drivers/gpu/drm/radeon/radeon_trace.h
drivers/gpu/drm/radeon/radeon_vm.c

index 31894c8..b066bb3 100644 (file)
@@ -114,6 +114,7 @@ config DRM_RADEON
        select POWER_SUPPLY
        select HWMON
        select BACKLIGHT_CLASS_DEVICE
+       select INTERVAL_TREE
        help
          Choose this option if you have an ATI Radeon graphics card.  There
          are both PCI and AGP versions.  You don't need to choose this to
index 31dda41..56fc7d2 100644 (file)
@@ -64,6 +64,7 @@
 #include <linux/wait.h>
 #include <linux/list.h>
 #include <linux/kref.h>
+#include <linux/interval_tree.h>
 
 #include <ttm/ttm_bo_api.h>
 #include <ttm/ttm_bo_driver.h>
@@ -447,14 +448,12 @@ struct radeon_mman {
 struct radeon_bo_va {
        /* protected by bo being reserved */
        struct list_head                bo_list;
-       uint64_t                        soffset;
-       uint64_t                        eoffset;
        uint32_t                        flags;
        uint64_t                        addr;
        unsigned                        ref_count;
 
        /* protected by vm mutex */
-       struct list_head                vm_list;
+       struct interval_tree_node       it;
        struct list_head                vm_status;
 
        /* constant after initialization */
@@ -877,7 +876,7 @@ struct radeon_vm_pt {
 };
 
 struct radeon_vm {
-       struct list_head                va;
+       struct rb_root                  va;
        unsigned                        id;
 
        /* BOs moved, but not yet updated in the PT */
index 99e4e0c..bfd7e1b 100644 (file)
@@ -496,9 +496,9 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data,
 
        switch (args->operation) {
        case RADEON_VA_MAP:
-               if (bo_va->soffset) {
+               if (bo_va->it.start) {
                        args->operation = RADEON_VA_RESULT_VA_EXIST;
-                       args->offset = bo_va->soffset;
+                       args->offset = bo_va->it.start * RADEON_GPU_PAGE_SIZE;
                        goto out;
                }
                r = radeon_vm_bo_set_addr(rdev, bo_va, args->offset, args->flags);
index cd781f3..9db74a9 100644 (file)
@@ -72,8 +72,8 @@ TRACE_EVENT(radeon_vm_bo_update,
                             ),
 
            TP_fast_assign(
-                          __entry->soffset = bo_va->soffset;
-                          __entry->eoffset = bo_va->eoffset;
+                          __entry->soffset = bo_va->it.start;
+                          __entry->eoffset = bo_va->it.last + 1;
                           __entry->flags = bo_va->flags;
                           ),
            TP_printk("soffs=%010llx, eoffs=%010llx, flags=%08x",
index 906c8ae..39bc5c2 100644 (file)
@@ -326,17 +326,15 @@ struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev,
        }
        bo_va->vm = vm;
        bo_va->bo = bo;
-       bo_va->soffset = 0;
-       bo_va->eoffset = 0;
+       bo_va->it.start = 0;
+       bo_va->it.last = 0;
        bo_va->flags = 0;
        bo_va->addr = 0;
        bo_va->ref_count = 1;
        INIT_LIST_HEAD(&bo_va->bo_list);
-       INIT_LIST_HEAD(&bo_va->vm_list);
        INIT_LIST_HEAD(&bo_va->vm_status);
 
        mutex_lock(&vm->mutex);
-       list_add(&bo_va->vm_list, &vm->va);
        list_add_tail(&bo_va->bo_list, &bo->va);
        mutex_unlock(&vm->mutex);
 
@@ -420,11 +418,9 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
                          uint32_t flags)
 {
        uint64_t size = radeon_bo_size(bo_va->bo);
-       uint64_t eoffset, last_offset = 0;
        struct radeon_vm *vm = bo_va->vm;
-       struct radeon_bo_va *tmp;
-       struct list_head *head;
        unsigned last_pfn, pt_idx;
+       uint64_t eoffset;
        int r;
 
        if (soffset) {
@@ -446,51 +442,48 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
        }
 
        mutex_lock(&vm->mutex);
-       head = &vm->va;
-       last_offset = 0;
-       list_for_each_entry(tmp, &vm->va, vm_list) {
-               if (bo_va == tmp) {
-                       /* skip over currently modified bo */
-                       continue;
+       if (bo_va->it.start || bo_va->it.last) {
+               if (bo_va->addr) {
+                       /* add a clone of the bo_va to clear the old address */
+                       struct radeon_bo_va *tmp;
+                       tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
+                       tmp->it.start = bo_va->it.start;
+                       tmp->it.last = bo_va->it.last;
+                       tmp->vm = vm;
+                       tmp->addr = bo_va->addr;
+                       list_add(&tmp->vm_status, &vm->freed);
                }
 
-               if (soffset >= last_offset && eoffset <= tmp->soffset) {
-                       /* bo can be added before this one */
-                       break;
-               }
-               if (eoffset > tmp->soffset && soffset < tmp->eoffset) {
-                       /* bo and tmp overlap, invalid offset */
-                       dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n",
-                               bo_va->bo, (unsigned)bo_va->soffset, tmp->bo,
-                               (unsigned)tmp->soffset, (unsigned)tmp->eoffset);
-                       mutex_unlock(&vm->mutex);
-                       return -EINVAL;
-               }
-               last_offset = tmp->eoffset;
-               head = &tmp->vm_list;
+               interval_tree_remove(&bo_va->it, &vm->va);
+               bo_va->it.start = 0;
+               bo_va->it.last = 0;
        }
 
-       if (bo_va->soffset) {
-               /* add a clone of the bo_va to clear the old address */
-               tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
-               if (!tmp) {
+       soffset /= RADEON_GPU_PAGE_SIZE;
+       eoffset /= RADEON_GPU_PAGE_SIZE;
+       if (soffset || eoffset) {
+               struct interval_tree_node *it;
+               it = interval_tree_iter_first(&vm->va, soffset, eoffset - 1);
+               if (it) {
+                       struct radeon_bo_va *tmp;
+                       tmp = container_of(it, struct radeon_bo_va, it);
+                       /* bo and tmp overlap, invalid offset */
+                       dev_err(rdev->dev, "bo %p va 0x%010Lx conflict with "
+                               "(bo %p 0x%010lx 0x%010lx)\n", bo_va->bo,
+                               soffset, tmp->bo, tmp->it.start, tmp->it.last);
                        mutex_unlock(&vm->mutex);
-                       return -ENOMEM;
+                       return -EINVAL;
                }
-               tmp->soffset = bo_va->soffset;
-               tmp->eoffset = bo_va->eoffset;
-               tmp->vm = vm;
-               list_add(&tmp->vm_status, &vm->freed);
+               bo_va->it.start = soffset;
+               bo_va->it.last = eoffset - 1;
+               interval_tree_insert(&bo_va->it, &vm->va);
        }
 
-       bo_va->soffset = soffset;
-       bo_va->eoffset = eoffset;
        bo_va->flags = flags;
        bo_va->addr = 0;
-       list_move(&bo_va->vm_list, head);
 
-       soffset = (soffset / RADEON_GPU_PAGE_SIZE) >> radeon_vm_block_size;
-       eoffset = (eoffset / RADEON_GPU_PAGE_SIZE) >> radeon_vm_block_size;
+       soffset >>= radeon_vm_block_size;
+       eoffset >>= radeon_vm_block_size;
 
        BUG_ON(eoffset >= radeon_vm_num_pdes(rdev));
 
@@ -778,9 +771,6 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
        unsigned count = 0;
        uint64_t addr;
 
-       start = start / RADEON_GPU_PAGE_SIZE;
-       end = end / RADEON_GPU_PAGE_SIZE;
-
        /* walk over the address space and update the page tables */
        for (addr = start; addr < end; ) {
                uint64_t pt_idx = addr >> radeon_vm_block_size;
@@ -847,7 +837,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
        uint64_t addr;
        int r;
 
-       if (!bo_va->soffset) {
+       if (!bo_va->it.start) {
                dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n",
                        bo_va->bo, vm);
                return -EINVAL;
@@ -881,7 +871,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
 
        trace_radeon_vm_bo_update(bo_va);
 
-       nptes = (bo_va->eoffset - bo_va->soffset) / RADEON_GPU_PAGE_SIZE;
+       nptes = bo_va->it.last - bo_va->it.start + 1;
 
        /* padding, etc. */
        ndw = 64;
@@ -906,8 +896,9 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
                return r;
        ib.length_dw = 0;
 
-       radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset,
-                             addr, radeon_vm_page_flags(bo_va->flags));
+       radeon_vm_update_ptes(rdev, vm, &ib, bo_va->it.start,
+                             bo_va->it.last + 1, addr,
+                             radeon_vm_page_flags(bo_va->flags));
 
        radeon_semaphore_sync_to(ib.semaphore, vm->fence);
        r = radeon_ib_schedule(rdev, &ib, NULL);
@@ -993,7 +984,7 @@ void radeon_vm_bo_rmv(struct radeon_device *rdev,
        list_del(&bo_va->bo_list);
 
        mutex_lock(&vm->mutex);
-       list_del(&bo_va->vm_list);
+       interval_tree_remove(&bo_va->it, &vm->va);
        list_del(&bo_va->vm_status);
 
        if (bo_va->addr) {
@@ -1051,7 +1042,7 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
        vm->last_flush = NULL;
        vm->last_id_use = NULL;
        mutex_init(&vm->mutex);
-       INIT_LIST_HEAD(&vm->va);
+       vm->va = RB_ROOT;
        INIT_LIST_HEAD(&vm->invalidated);
        INIT_LIST_HEAD(&vm->freed);
 
@@ -1096,11 +1087,11 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
        struct radeon_bo_va *bo_va, *tmp;
        int i, r;
 
-       if (!list_empty(&vm->va)) {
+       if (!RB_EMPTY_ROOT(&vm->va)) {
                dev_err(rdev->dev, "still active bo inside vm\n");
        }
-       list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) {
-               list_del_init(&bo_va->vm_list);
+       rbtree_postorder_for_each_entry_safe(bo_va, tmp, &vm->va, it.rb) {
+               interval_tree_remove(&bo_va->it, &vm->va);
                r = radeon_bo_reserve(bo_va->bo, false);
                if (!r) {
                        list_del_init(&bo_va->bo_list);