From 6d2f2944e95e504a7d33385eeeb9bb7fcca72592 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Christian=20K=C3=B6nig?= Date: Thu, 20 Feb 2014 13:42:17 +0100 Subject: [PATCH] drm/radeon: use normal BOs for the page tables v4 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit No need to make it more complicated than necessary, just allocate the page tables as normal BO and flush whenever the address change. v2: update comments and function name v3: squash bug fixes, page directory and tables patch v4: rebased on Mareks changes Signed-off-by: Christian König --- drivers/gpu/drm/radeon/radeon.h | 23 +- drivers/gpu/drm/radeon/radeon_cs.c | 44 +-- drivers/gpu/drm/radeon/radeon_device.c | 1 - drivers/gpu/drm/radeon/radeon_kms.c | 4 +- drivers/gpu/drm/radeon/radeon_vm.c | 492 +++++++++++++++------------------ 5 files changed, 269 insertions(+), 295 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 644d922990ef..c31e3c2c1b3a 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -857,17 +857,22 @@ struct radeon_mec { #define R600_PTE_READABLE (1 << 5) #define R600_PTE_WRITEABLE (1 << 6) +struct radeon_vm_pt { + struct radeon_bo *bo; + uint64_t addr; +}; + struct radeon_vm { - struct list_head list; struct list_head va; unsigned id; /* contains the page directory */ - struct radeon_sa_bo *page_directory; + struct radeon_bo *page_directory; uint64_t pd_gpu_addr; + unsigned max_pde_used; /* array of page tables, one for each page directory entry */ - struct radeon_sa_bo **page_tables; + struct radeon_vm_pt *page_tables; struct mutex mutex; /* last fence for cs using this vm */ @@ -880,9 +885,7 @@ struct radeon_vm { struct radeon_vm_manager { struct mutex lock; - struct list_head lru_vm; struct radeon_fence *active[RADEON_NUM_VM]; - struct radeon_sa_manager sa_manager; uint32_t max_pfn; /* number of VMIDs */ unsigned nvm; @@ -1011,6 +1014,7 @@ struct radeon_cs_parser { unsigned nrelocs; struct radeon_cs_reloc *relocs; struct radeon_cs_reloc **relocs_ptr; + struct radeon_bo_list *vm_bos; struct list_head validated; unsigned dma_reloc_idx; /* indices of various chunks */ @@ -2798,10 +2802,11 @@ extern void radeon_program_register_sequence(struct radeon_device *rdev, */ int radeon_vm_manager_init(struct radeon_device *rdev); void radeon_vm_manager_fini(struct radeon_device *rdev); -void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm); +int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm); void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm); -int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm); -void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm); +struct radeon_bo_list *radeon_vm_get_bos(struct radeon_device *rdev, + struct radeon_vm *vm, + struct list_head *head); struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, struct radeon_vm *vm, int ring); void radeon_vm_flush(struct radeon_device *rdev, @@ -2811,6 +2816,8 @@ void radeon_vm_fence(struct radeon_device *rdev, struct radeon_vm *vm, struct radeon_fence *fence); uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr); +int radeon_vm_update_page_directory(struct radeon_device *rdev, + struct radeon_vm *vm); int radeon_vm_bo_update(struct radeon_device *rdev, struct radeon_vm *vm, struct radeon_bo *bo, diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index f92df2e8ebdd..420c28dd6a1c 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -168,6 +168,10 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) radeon_cs_buckets_get_list(&buckets, &p->validated); + if (p->cs_flags & RADEON_CS_USE_VM) + p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm, + &p->validated); + return radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring); } @@ -401,6 +405,7 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo kfree(parser->track); kfree(parser->relocs); kfree(parser->relocs_ptr); + kfree(parser->vm_bos); for (i = 0; i < parser->nchunks; i++) drm_free_large(parser->chunks[i].kdata); kfree(parser->chunks); @@ -440,24 +445,32 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev, return r; } -static int radeon_bo_vm_update_pte(struct radeon_cs_parser *parser, +static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p, struct radeon_vm *vm) { - struct radeon_device *rdev = parser->rdev; - struct radeon_bo_list *lobj; - struct radeon_bo *bo; - int r; + struct radeon_device *rdev = p->rdev; + int i, r; - r = radeon_vm_bo_update(rdev, vm, rdev->ring_tmp_bo.bo, &rdev->ring_tmp_bo.bo->tbo.mem); - if (r) { + r = radeon_vm_update_page_directory(rdev, vm); + if (r) return r; - } - list_for_each_entry(lobj, &parser->validated, tv.head) { - bo = lobj->bo; - r = radeon_vm_bo_update(parser->rdev, vm, bo, &bo->tbo.mem); - if (r) { + + r = radeon_vm_bo_update(rdev, vm, rdev->ring_tmp_bo.bo, + &rdev->ring_tmp_bo.bo->tbo.mem); + if (r) + return r; + + for (i = 0; i < p->nrelocs; i++) { + struct radeon_bo *bo; + + /* ignore duplicates */ + if (p->relocs_ptr[i] != &p->relocs[i]) + continue; + + bo = p->relocs[i].robj; + r = radeon_vm_bo_update(rdev, vm, bo, &bo->tbo.mem); + if (r) return r; - } } return 0; } @@ -491,10 +504,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, mutex_lock(&rdev->vm_manager.lock); mutex_lock(&vm->mutex); - r = radeon_vm_alloc_pt(rdev, vm); - if (r) { - goto out; - } r = radeon_bo_vm_update_pte(parser, vm); if (r) { goto out; @@ -512,7 +521,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, } out: - radeon_vm_add_to_lru(rdev, vm); mutex_unlock(&vm->mutex); mutex_unlock(&rdev->vm_manager.lock); return r; diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index fa7841b9d7b6..e58dbabd7d7d 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1198,7 +1198,6 @@ int radeon_device_init(struct radeon_device *rdev, * Max GPUVM size for cayman and SI is 40 bits. */ rdev->vm_manager.max_pfn = 1 << 20; - INIT_LIST_HEAD(&rdev->vm_manager.lru_vm); /* Set asic functions */ r = radeon_asic_init(rdev); diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 7a810d0796a0..37b1deacb5b1 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -559,7 +559,9 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) return -ENOMEM; } - radeon_vm_init(rdev, &fpriv->vm); + r = radeon_vm_init(rdev, &fpriv->vm); + if (r) + return r; r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); if (r) diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 516017689793..44b6918a87ba 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -84,85 +84,19 @@ static unsigned radeon_vm_directory_size(struct radeon_device *rdev) */ int radeon_vm_manager_init(struct radeon_device *rdev) { - struct radeon_vm *vm; - struct radeon_bo_va *bo_va; int r; - unsigned size; if (!rdev->vm_manager.enabled) { - /* allocate enough for 2 full VM pts */ - size = radeon_vm_directory_size(rdev); - size += rdev->vm_manager.max_pfn * 8; - size *= 2; - r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager, - RADEON_GPU_PAGE_ALIGN(size), - RADEON_VM_PTB_ALIGN_SIZE, - RADEON_GEM_DOMAIN_VRAM); - if (r) { - dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n", - (rdev->vm_manager.max_pfn * 8) >> 10); - return r; - } - r = radeon_asic_vm_init(rdev); if (r) return r; rdev->vm_manager.enabled = true; - - r = radeon_sa_bo_manager_start(rdev, &rdev->vm_manager.sa_manager); - if (r) - return r; - } - - /* restore page table */ - list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) { - if (vm->page_directory == NULL) - continue; - - list_for_each_entry(bo_va, &vm->va, vm_list) { - bo_va->valid = false; - } } return 0; } /** - * radeon_vm_free_pt - free the page table for a specific vm - * - * @rdev: radeon_device pointer - * @vm: vm to unbind - * - * Free the page table of a specific vm (cayman+). - * - * Global and local mutex must be lock! - */ -static void radeon_vm_free_pt(struct radeon_device *rdev, - struct radeon_vm *vm) -{ - struct radeon_bo_va *bo_va; - int i; - - if (!vm->page_directory) - return; - - list_del_init(&vm->list); - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); - - list_for_each_entry(bo_va, &vm->va, vm_list) { - bo_va->valid = false; - } - - if (vm->page_tables == NULL) - return; - - for (i = 0; i < radeon_vm_num_pdes(rdev); i++) - radeon_sa_bo_free(rdev, &vm->page_tables[i], vm->fence); - - kfree(vm->page_tables); -} - -/** * radeon_vm_manager_fini - tear down the vm manager * * @rdev: radeon_device pointer @@ -171,155 +105,59 @@ static void radeon_vm_free_pt(struct radeon_device *rdev, */ void radeon_vm_manager_fini(struct radeon_device *rdev) { - struct radeon_vm *vm, *tmp; int i; if (!rdev->vm_manager.enabled) return; mutex_lock(&rdev->vm_manager.lock); - /* free all allocated page tables */ - list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) { - mutex_lock(&vm->mutex); - radeon_vm_free_pt(rdev, vm); - mutex_unlock(&vm->mutex); - } - for (i = 0; i < RADEON_NUM_VM; ++i) { + for (i = 0; i < RADEON_NUM_VM; ++i) radeon_fence_unref(&rdev->vm_manager.active[i]); - } radeon_asic_vm_fini(rdev); - mutex_unlock(&rdev->vm_manager.lock); - - radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager); - radeon_sa_bo_manager_fini(rdev, &rdev->vm_manager.sa_manager); rdev->vm_manager.enabled = false; + mutex_unlock(&rdev->vm_manager.lock); } /** - * radeon_vm_evict - evict page table to make room for new one - * - * @rdev: radeon_device pointer - * @vm: VM we want to allocate something for + * radeon_vm_get_bos - add the vm BOs to a validation list * - * Evict a VM from the lru, making sure that it isn't @vm. (cayman+). - * Returns 0 for success, -ENOMEM for failure. + * @vm: vm providing the BOs + * @head: head of validation list * - * Global and local mutex must be locked! + * Add the page directory to the list of BOs to + * validate for command submission (cayman+). */ -static int radeon_vm_evict(struct radeon_device *rdev, struct radeon_vm *vm) +struct radeon_bo_list *radeon_vm_get_bos(struct radeon_device *rdev, + struct radeon_vm *vm, + struct list_head *head) { - struct radeon_vm *vm_evict; + struct radeon_bo_list *list; + unsigned i, idx, size; - if (list_empty(&rdev->vm_manager.lru_vm)) - return -ENOMEM; - - vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, - struct radeon_vm, list); - if (vm_evict == vm) - return -ENOMEM; - - mutex_lock(&vm_evict->mutex); - radeon_vm_free_pt(rdev, vm_evict); - mutex_unlock(&vm_evict->mutex); - return 0; -} - -/** - * radeon_vm_alloc_pt - allocates a page table for a VM - * - * @rdev: radeon_device pointer - * @vm: vm to bind - * - * Allocate a page table for the requested vm (cayman+). - * Returns 0 for success, error for failure. - * - * Global and local mutex must be locked! - */ -int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) -{ - unsigned pd_size, pd_entries, pts_size; - struct radeon_ib ib; - int r; - - if (vm == NULL) { - return -EINVAL; - } - - if (vm->page_directory != NULL) { - return 0; - } - - pd_size = radeon_vm_directory_size(rdev); - pd_entries = radeon_vm_num_pdes(rdev); - -retry: - r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, - &vm->page_directory, pd_size, - RADEON_VM_PTB_ALIGN_SIZE, false); - if (r == -ENOMEM) { - r = radeon_vm_evict(rdev, vm); - if (r) - return r; - goto retry; - - } else if (r) { - return r; - } - - vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->page_directory); - - /* Initially clear the page directory */ - r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, - NULL, pd_entries * 2 + 64); - if (r) { - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); - return r; - } - - ib.length_dw = 0; + size = (radeon_vm_num_pdes(rdev) + 1) * sizeof(struct radeon_bo_list); + list = kmalloc(size, GFP_KERNEL); + if (!list) + return NULL; - radeon_asic_vm_set_page(rdev, &ib, vm->pd_gpu_addr, - 0, pd_entries, 0, 0); + /* add the vm page table to the list */ + list[0].bo = vm->page_directory; + list[0].domain = RADEON_GEM_DOMAIN_VRAM; + list[0].alt_domain = RADEON_GEM_DOMAIN_VRAM; + list[0].tv.bo = &vm->page_directory->tbo; + list_add(&list[0].tv.head, head); - radeon_semaphore_sync_to(ib.semaphore, vm->fence); - r = radeon_ib_schedule(rdev, &ib, NULL); - if (r) { - radeon_ib_free(rdev, &ib); - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); - return r; - } - radeon_fence_unref(&vm->fence); - vm->fence = radeon_fence_ref(ib.fence); - radeon_ib_free(rdev, &ib); - radeon_fence_unref(&vm->last_flush); - - /* allocate page table array */ - pts_size = radeon_vm_num_pdes(rdev) * sizeof(struct radeon_sa_bo *); - vm->page_tables = kzalloc(pts_size, GFP_KERNEL); + for (i = 0, idx = 1; i <= vm->max_pde_used; i++) { + if (!vm->page_tables[i].bo) + continue; - if (vm->page_tables == NULL) { - DRM_ERROR("Cannot allocate memory for page table array\n"); - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); - return -ENOMEM; + list[idx].bo = vm->page_tables[i].bo; + list[idx].domain = RADEON_GEM_DOMAIN_VRAM; + list[idx].alt_domain = RADEON_GEM_DOMAIN_VRAM; + list[idx].tv.bo = &list[idx].bo->tbo; + list_add(&list[idx++].tv.head, head); } - return 0; -} - -/** - * radeon_vm_add_to_lru - add VMs page table to LRU list - * - * @rdev: radeon_device pointer - * @vm: vm to add to LRU - * - * Add the allocated page table to the LRU list (cayman+). - * - * Global mutex must be locked! - */ -void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm) -{ - list_del_init(&vm->list); - list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); + return list; } /** @@ -393,10 +231,14 @@ void radeon_vm_flush(struct radeon_device *rdev, struct radeon_vm *vm, int ring) { + uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory); + /* if we can't remember our last VM flush then flush now! */ /* XXX figure out why we have to flush all the time */ - if (!vm->last_flush || true) + if (!vm->last_flush || true || pd_addr != vm->pd_gpu_addr) { + vm->pd_gpu_addr = pd_addr; radeon_ring_vm_flush(rdev, ring, vm); + } } /** @@ -496,6 +338,63 @@ struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev, } /** + * radeon_vm_clear_bo - initially clear the page dir/table + * + * @rdev: radeon_device pointer + * @bo: bo to clear + */ +static int radeon_vm_clear_bo(struct radeon_device *rdev, + struct radeon_bo *bo) +{ + struct ttm_validate_buffer tv; + struct ww_acquire_ctx ticket; + struct list_head head; + struct radeon_ib ib; + unsigned entries; + uint64_t addr; + int r; + + memset(&tv, 0, sizeof(tv)); + tv.bo = &bo->tbo; + + INIT_LIST_HEAD(&head); + list_add(&tv.head, &head); + + r = ttm_eu_reserve_buffers(&ticket, &head); + if (r) + return r; + + r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); + if (r) + goto error; + + addr = radeon_bo_gpu_offset(bo); + entries = radeon_bo_size(bo) / 8; + + r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, + NULL, entries * 2 + 64); + if (r) + goto error; + + ib.length_dw = 0; + + radeon_asic_vm_set_page(rdev, &ib, addr, 0, entries, 0, 0); + + r = radeon_ib_schedule(rdev, &ib, NULL); + if (r) + goto error; + + ttm_eu_fence_buffer_objects(&ticket, &head, ib.fence); + radeon_ib_free(rdev, &ib); + + return 0; + +error: + ttm_eu_backoff_reservation(&ticket, &head); + return r; +} + +/** * radeon_vm_bo_set_addr - set bos virtual address inside a vm * * @rdev: radeon_device pointer @@ -519,7 +418,8 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, struct radeon_vm *vm = bo_va->vm; struct radeon_bo_va *tmp; struct list_head *head; - unsigned last_pfn; + unsigned last_pfn, pt_idx; + int r; if (soffset) { /* make sure object fit at this offset */ @@ -570,8 +470,53 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, bo_va->valid = false; list_move(&bo_va->vm_list, head); + soffset = (soffset / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; + eoffset = (eoffset / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; + + if (eoffset > vm->max_pde_used) + vm->max_pde_used = eoffset; + + radeon_bo_unreserve(bo_va->bo); + + /* walk over the address space and allocate the page tables */ + for (pt_idx = soffset; pt_idx <= eoffset; ++pt_idx) { + struct radeon_bo *pt; + + if (vm->page_tables[pt_idx].bo) + continue; + + /* drop mutex to allocate and clear page table */ + mutex_unlock(&vm->mutex); + + r = radeon_bo_create(rdev, RADEON_VM_PTE_COUNT * 8, + RADEON_GPU_PAGE_SIZE, false, + RADEON_GEM_DOMAIN_VRAM, NULL, &pt); + if (r) + return r; + + r = radeon_vm_clear_bo(rdev, pt); + if (r) { + radeon_bo_unref(&pt); + radeon_bo_reserve(bo_va->bo, false); + return r; + } + + /* aquire mutex again */ + mutex_lock(&vm->mutex); + if (vm->page_tables[pt_idx].bo) { + /* someone else allocated the pt in the meantime */ + mutex_unlock(&vm->mutex); + radeon_bo_unref(&pt); + mutex_lock(&vm->mutex); + continue; + } + + vm->page_tables[pt_idx].addr = 0; + vm->page_tables[pt_idx].bo = pt; + } + mutex_unlock(&vm->mutex); - return 0; + return radeon_bo_reserve(bo_va->bo, false); } /** @@ -631,58 +576,53 @@ static uint32_t radeon_vm_page_flags(uint32_t flags) * * Global and local mutex must be locked! */ -static int radeon_vm_update_pdes(struct radeon_device *rdev, - struct radeon_vm *vm, - struct radeon_ib *ib, - uint64_t start, uint64_t end) +int radeon_vm_update_page_directory(struct radeon_device *rdev, + struct radeon_vm *vm) { static const uint32_t incr = RADEON_VM_PTE_COUNT * 8; + uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory); uint64_t last_pde = ~0, last_pt = ~0; - unsigned count = 0; - uint64_t pt_idx; + unsigned count = 0, pt_idx, ndw; + struct radeon_ib ib; int r; - start = (start / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; - end = (end / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; + /* padding, etc. */ + ndw = 64; + + /* assume the worst case */ + ndw += vm->max_pde_used * 12; + + /* update too big for an IB */ + if (ndw > 0xfffff) + return -ENOMEM; + + r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); + if (r) + return r; + ib.length_dw = 0; /* walk over the address space and update the page directory */ - for (pt_idx = start; pt_idx <= end; ++pt_idx) { + for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) { + struct radeon_bo *bo = vm->page_tables[pt_idx].bo; uint64_t pde, pt; - if (vm->page_tables[pt_idx]) + if (bo == NULL) continue; -retry: - r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, - &vm->page_tables[pt_idx], - RADEON_VM_PTE_COUNT * 8, - RADEON_GPU_PAGE_SIZE, false); - - if (r == -ENOMEM) { - r = radeon_vm_evict(rdev, vm); - if (r) - return r; - goto retry; - } else if (r) { - return r; - } - - pde = vm->pd_gpu_addr + pt_idx * 8; - - pt = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); + pt = radeon_bo_gpu_offset(bo); + if (vm->page_tables[pt_idx].addr == pt) + continue; + vm->page_tables[pt_idx].addr = pt; + pde = pd_addr + pt_idx * 8; if (((last_pde + 8 * count) != pde) || ((last_pt + incr * count) != pt)) { if (count) { - radeon_asic_vm_set_page(rdev, ib, last_pde, + radeon_asic_vm_set_page(rdev, &ib, last_pde, last_pt, count, incr, R600_PTE_VALID); - - count *= RADEON_VM_PTE_COUNT; - radeon_asic_vm_set_page(rdev, ib, last_pt, 0, - count, 0, 0); } count = 1; @@ -693,14 +633,22 @@ retry: } } - if (count) { - radeon_asic_vm_set_page(rdev, ib, last_pde, last_pt, count, + if (count) + radeon_asic_vm_set_page(rdev, &ib, last_pde, last_pt, count, incr, R600_PTE_VALID); - count *= RADEON_VM_PTE_COUNT; - radeon_asic_vm_set_page(rdev, ib, last_pt, 0, - count, 0, 0); + if (ib.length_dw != 0) { + radeon_semaphore_sync_to(ib.semaphore, vm->last_id_use); + r = radeon_ib_schedule(rdev, &ib, NULL); + if (r) { + radeon_ib_free(rdev, &ib); + return r; + } + radeon_fence_unref(&vm->fence); + vm->fence = radeon_fence_ref(ib.fence); + radeon_fence_unref(&vm->last_flush); } + radeon_ib_free(rdev, &ib); return 0; } @@ -745,7 +693,7 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, else nptes = RADEON_VM_PTE_COUNT - (addr & mask); - pte = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); + pte = radeon_bo_gpu_offset(vm->page_tables[pt_idx].bo); pte += (addr & mask) * 8; if ((last_pte + 8 * count) != pte) { @@ -795,14 +743,10 @@ int radeon_vm_bo_update(struct radeon_device *rdev, { struct radeon_ib ib; struct radeon_bo_va *bo_va; - unsigned nptes, npdes, ndw; + unsigned nptes, ndw; uint64_t addr; int r; - /* nothing to do if vm isn't bound */ - if (vm->page_directory == NULL) - return 0; - bo_va = radeon_vm_bo_find(vm, bo); if (bo_va == NULL) { dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm); @@ -840,9 +784,6 @@ int radeon_vm_bo_update(struct radeon_device *rdev, nptes = radeon_bo_ngpu_pages(bo); - /* assume two extra pdes in case the mapping overlaps the borders */ - npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2; - /* padding, etc. */ ndw = 64; @@ -857,15 +798,6 @@ int radeon_vm_bo_update(struct radeon_device *rdev, /* reserve space for pte addresses */ ndw += nptes * 2; - /* reserve space for one header for every 2k dwords */ - ndw += (npdes >> 11) * 4; - - /* reserve space for pde addresses */ - ndw += npdes * 2; - - /* reserve space for clearing new page tables */ - ndw += npdes * 2 * RADEON_VM_PTE_COUNT; - /* update too big for an IB */ if (ndw > 0xfffff) return -ENOMEM; @@ -875,12 +807,6 @@ int radeon_vm_bo_update(struct radeon_device *rdev, return r; ib.length_dw = 0; - r = radeon_vm_update_pdes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset); - if (r) { - radeon_ib_free(rdev, &ib); - return r; - } - radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset, addr, radeon_vm_page_flags(bo_va->flags)); @@ -957,15 +883,43 @@ void radeon_vm_bo_invalidate(struct radeon_device *rdev, * * Init @vm fields (cayman+). */ -void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) +int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) { + unsigned pd_size, pd_entries, pts_size; + int r; + vm->id = 0; vm->fence = NULL; vm->last_flush = NULL; vm->last_id_use = NULL; mutex_init(&vm->mutex); - INIT_LIST_HEAD(&vm->list); INIT_LIST_HEAD(&vm->va); + + pd_size = radeon_vm_directory_size(rdev); + pd_entries = radeon_vm_num_pdes(rdev); + + /* allocate page table array */ + pts_size = pd_entries * sizeof(struct radeon_vm_pt); + vm->page_tables = kzalloc(pts_size, GFP_KERNEL); + if (vm->page_tables == NULL) { + DRM_ERROR("Cannot allocate memory for page table array\n"); + return -ENOMEM; + } + + r = radeon_bo_create(rdev, pd_size, RADEON_VM_PTB_ALIGN_SIZE, false, + RADEON_GEM_DOMAIN_VRAM, NULL, + &vm->page_directory); + if (r) + return r; + + r = radeon_vm_clear_bo(rdev, vm->page_directory); + if (r) { + radeon_bo_unref(&vm->page_directory); + vm->page_directory = NULL; + return r; + } + + return 0; } /** @@ -980,12 +934,7 @@ void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) { struct radeon_bo_va *bo_va, *tmp; - int r; - - mutex_lock(&rdev->vm_manager.lock); - mutex_lock(&vm->mutex); - radeon_vm_free_pt(rdev, vm); - mutex_unlock(&rdev->vm_manager.lock); + int i, r; if (!list_empty(&vm->va)) { dev_err(rdev->dev, "still active bo inside vm\n"); @@ -999,8 +948,17 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) kfree(bo_va); } } + + + for (i = 0; i < radeon_vm_num_pdes(rdev); i++) + radeon_bo_unref(&vm->page_tables[i].bo); + kfree(vm->page_tables); + + radeon_bo_unref(&vm->page_directory); + radeon_fence_unref(&vm->fence); radeon_fence_unref(&vm->last_flush); radeon_fence_unref(&vm->last_id_use); - mutex_unlock(&vm->mutex); + + mutex_destroy(&vm->mutex); } -- 2.11.0