X-Git-Url: http://git.osdn.net/view?a=blobdiff_plain;f=intel%2Fintel_bufmgr_gem.c;h=71f140f54dd0e611626124484c2b1214234492ba;hb=5c490bd6854a5b265aaa62ef62926c3ac97092c7;hp=ba38e503c4f5eba6b02b21c2d3476a7c15e3c650;hpb=6e642db7f4a5628ed63ca3c479f06bd6f2ca3893;p=android-x86%2Fexternal-libdrm.git diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c index ba38e503..71f140f5 100644 --- a/intel/intel_bufmgr_gem.c +++ b/intel/intel_bufmgr_gem.c @@ -1,7 +1,7 @@ /************************************************************************** * * Copyright © 2007 Red Hat Inc. - * Copyright © 2007 Intel Corporation + * Copyright © 2007-2012 Intel Corporation * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA * All Rights Reserved. * @@ -48,12 +48,15 @@ #include #include #include -#include #include #include #include #include "errno.h" +#ifndef ETIME +#define ETIME ETIMEDOUT +#endif +#include "libdrm_macros.h" #include "libdrm_lists.h" #include "intel_bufmgr.h" #include "intel_bufmgr_priv.h" @@ -61,6 +64,7 @@ #include "string.h" #include "i915_drm.h" +#include "uthash.h" #ifdef HAVE_VALGRIND #include @@ -70,7 +74,7 @@ #define VG(x) #endif -#define VG_CLEAR(s) VG(memset(&s, 0, sizeof(s))) +#define memclear(s) memset(&s, 0, sizeof(s)) #define DBG(...) do { \ if (bufmgr_gem->bufmgr.debug) \ @@ -78,6 +82,23 @@ } while (0) #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#define MAX2(A, B) ((A) > (B) ? (A) : (B)) + +/** + * upper_32_bits - return bits 32-63 of a number + * @n: the number we're accessing + * + * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress + * the "right shift count >= width of type" warning when that quantity is + * 32-bits. + */ +#define upper_32_bits(n) ((__u32)(((n) >> 16) >> 16)) + +/** + * lower_32_bits - return bits 0-31 of a number + * @n: the number we're accessing + */ +#define lower_32_bits(n) ((__u32)(n)) typedef struct _drm_intel_bo_gem drm_intel_bo_gem; @@ -89,6 +110,8 @@ struct drm_intel_gem_bo_bucket { typedef struct _drm_intel_bufmgr_gem { drm_intel_bufmgr bufmgr; + atomic_t refcount; + int fd; int max_relocs; @@ -106,7 +129,11 @@ typedef struct _drm_intel_bufmgr_gem { int num_buckets; time_t time; - drmMMListHead named; + drmMMListHead managers; + + drm_intel_bo_gem *name_table; + drm_intel_bo_gem *handle_table; + drmMMListHead vma_cache; int vma_count, vma_open, vma_max; @@ -118,9 +145,18 @@ typedef struct _drm_intel_bufmgr_gem { unsigned int has_blt : 1; unsigned int has_relaxed_fencing : 1; unsigned int has_llc : 1; + unsigned int has_wait_timeout : 1; unsigned int bo_reuse : 1; unsigned int no_exec : 1; + unsigned int has_vebox : 1; + unsigned int has_exec_async : 1; bool fenced_relocs; + + struct { + void *ptr; + uint32_t handle; + } userptr_active; + } drm_intel_bufmgr_gem; #define DRM_INTEL_RELOC_FENCE (1<<0) @@ -139,9 +175,13 @@ struct _drm_intel_bo_gem { /** * Kenel-assigned global name for this object + * + * List contains both flink named and prime fd'd objects */ unsigned int global_name; - drmMMListHead name_list; + + UT_hash_handle handle_hh; + UT_hash_handle name_hh; /** * Index of the buffer within the validation list while preparing a @@ -156,6 +196,8 @@ struct _drm_intel_bo_gem { uint32_t swizzle_mode; unsigned long stride; + unsigned long kflags; + time_t free_time; /** Array passed to the DRM containing relocation information. */ @@ -166,10 +208,24 @@ struct _drm_intel_bo_gem { drm_intel_reloc_target *reloc_target_info; /** Number of entries in relocs */ int reloc_count; + /** Array of BOs that are referenced by this buffer and will be softpinned */ + drm_intel_bo **softpin_target; + /** Number softpinned BOs that are referenced by this buffer */ + int softpin_target_count; + /** Maximum amount of softpinned BOs that are referenced by this buffer */ + int softpin_target_size; + /** Mapped address for the buffer, saved across map/unmap cycles */ void *mem_virtual; /** GTT virtual address for the buffer, saved across map/unmap cycles */ void *gtt_virtual; + /** WC CPU address for the buffer, saved across map/unmap cycles */ + void *wc_virtual; + /** + * Virtual address of the buffer allocated by user, used for userptr + * objects only. + */ + void *user_virtual; int map_count; drmMMListHead vma_list; @@ -200,6 +256,20 @@ struct _drm_intel_bo_gem { bool reusable; /** + * Boolean of whether the GPU is definitely not accessing the buffer. + * + * This is only valid when reusable, since non-reusable + * buffers are those that have been shared with other + * processes, so we don't know their state. + */ + bool idle; + + /** + * Boolean of whether this buffer was allocated with userptr + */ + bool is_userptr; + + /** * Size in bytes of this buffer and its relocation descendents. * * Used to avoid costly tree walking in @@ -213,7 +283,7 @@ struct _drm_intel_bo_gem { */ int reloc_tree_fences; - /** Flags that we may need to do the SW_FINSIH ioctl on unmap. */ + /** Flags that we may need to do the SW_FINISH ioctl on unmap. */ bool mapped_cpu_write; }; @@ -239,6 +309,11 @@ static void drm_intel_gem_bo_unreference(drm_intel_bo *bo); static void drm_intel_gem_bo_free(drm_intel_bo *bo); +static inline drm_intel_bo_gem *to_bo_gem(drm_intel_bo *bo) +{ + return (drm_intel_bo_gem *)bo; +} + static unsigned long drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size, uint32_t *tiling_mode) @@ -347,8 +422,9 @@ drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; - if (bo_gem->relocs == NULL) { - DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle, + if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL) { + DBG("%2d: %d %s(%s)\n", i, bo_gem->gem_handle, + bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "", bo_gem->name); continue; } @@ -358,16 +434,36 @@ drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) drm_intel_bo_gem *target_gem = (drm_intel_bo_gem *) target_bo; - DBG("%2d: %d (%s)@0x%08llx -> " - "%d (%s)@0x%08lx + 0x%08x\n", + DBG("%2d: %d %s(%s)@0x%08x %08x -> " + "%d (%s)@0x%08x %08x + 0x%08x\n", i, - bo_gem->gem_handle, bo_gem->name, - (unsigned long long)bo_gem->relocs[j].offset, + bo_gem->gem_handle, + bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "", + bo_gem->name, + upper_32_bits(bo_gem->relocs[j].offset), + lower_32_bits(bo_gem->relocs[j].offset), target_gem->gem_handle, target_gem->name, - target_bo->offset, + upper_32_bits(target_bo->offset64), + lower_32_bits(target_bo->offset64), bo_gem->relocs[j].delta); } + + for (j = 0; j < bo_gem->softpin_target_count; j++) { + drm_intel_bo *target_bo = bo_gem->softpin_target[j]; + drm_intel_bo_gem *target_gem = + (drm_intel_bo_gem *) target_bo; + DBG("%2d: %d %s(%s) -> " + "%d *(%s)@0x%08x %08x\n", + i, + bo_gem->gem_handle, + bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "", + bo_gem->name, + target_gem->gem_handle, + target_gem->name, + upper_32_bits(target_bo->offset64), + lower_32_bits(target_bo->offset64)); + } } } @@ -419,7 +515,7 @@ drm_intel_add_validate_buffer(drm_intel_bo *bo) bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle; bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count; bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs; - bufmgr_gem->exec_objects[index].alignment = 0; + bufmgr_gem->exec_objects[index].alignment = bo->align; bufmgr_gem->exec_objects[index].offset = 0; bufmgr_gem->exec_bos[index] = bo; bufmgr_gem->exec_count++; @@ -431,11 +527,14 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; int index; + unsigned long flags; + + flags = 0; + if (need_fence) + flags |= EXEC_OBJECT_NEEDS_FENCE; if (bo_gem->validate_index != -1) { - if (need_fence) - bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= - EXEC_OBJECT_NEEDS_FENCE; + bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= flags; return; } @@ -461,16 +560,12 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle; bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count; bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs; - bufmgr_gem->exec2_objects[index].alignment = 0; - bufmgr_gem->exec2_objects[index].offset = 0; - bufmgr_gem->exec_bos[index] = bo; - bufmgr_gem->exec2_objects[index].flags = 0; + bufmgr_gem->exec2_objects[index].alignment = bo->align; + bufmgr_gem->exec2_objects[index].offset = bo->offset64; + bufmgr_gem->exec2_objects[index].flags = bo_gem->kflags | flags; bufmgr_gem->exec2_objects[index].rsvd1 = 0; bufmgr_gem->exec2_objects[index].rsvd2 = 0; - if (need_fence) { - bufmgr_gem->exec2_objects[index].flags |= - EXEC_OBJECT_NEEDS_FENCE; - } + bufmgr_gem->exec_bos[index] = bo; bufmgr_gem->exec_count++; } @@ -479,9 +574,10 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) static void drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem, - drm_intel_bo_gem *bo_gem) + drm_intel_bo_gem *bo_gem, + unsigned int alignment) { - int size; + unsigned int size; assert(!bo_gem->used_as_reloc_target); @@ -493,7 +589,7 @@ drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem, */ size = bo_gem->bo.size; if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) { - int min_size; + unsigned int min_size; if (bufmgr_gem->has_relaxed_fencing) { if (bufmgr_gem->gen == 3) @@ -507,10 +603,10 @@ drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem, min_size = size; /* Account for worst-case alignment. */ - size = 2 * min_size; + alignment = MAX2(alignment, min_size); } - bo_gem->reloc_tree_size = size; + bo_gem->reloc_tree_size = size + alignment; } static int @@ -550,12 +646,19 @@ drm_intel_gem_bo_busy(drm_intel_bo *bo) struct drm_i915_gem_busy busy; int ret; - VG_CLEAR(busy); + if (bo_gem->reusable && bo_gem->idle) + return false; + + memclear(busy); busy.handle = bo_gem->gem_handle; ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); - - return (ret == 0 && busy.busy); + if (ret == 0) { + bo_gem->idle = !busy.busy; + return busy.busy; + } else { + return false; + } } static int @@ -564,7 +667,7 @@ drm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem, { struct drm_i915_gem_madvise madv; - VG_CLEAR(madv); + memclear(madv); madv.handle = bo_gem->gem_handle; madv.madv = state; madv.retained = 1; @@ -607,7 +710,8 @@ drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, unsigned long size, unsigned long flags, uint32_t tiling_mode, - unsigned long stride) + unsigned long stride, + unsigned int alignment) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; drm_intel_bo_gem *bo_gem; @@ -649,7 +753,9 @@ retry: bucket->head.prev, head); DRMLISTDEL(&bo_gem->head); alloc_from_cache = true; + bo_gem->bo.align = alignment; } else { + assert(alignment == 0); /* For non-render-target BOs (where we're probably * going to map it first thing in order to fill it * with data), check if the last BO in the cache is @@ -682,30 +788,39 @@ retry: } } } - pthread_mutex_unlock(&bufmgr_gem->lock); if (!alloc_from_cache) { struct drm_i915_gem_create create; bo_gem = calloc(1, sizeof(*bo_gem)); if (!bo_gem) - return NULL; + goto err; + + /* drm_intel_gem_bo_free calls DRMLISTDEL() for an uninitialized + list (vma_list), so better set the list head here */ + DRMINITLISTHEAD(&bo_gem->vma_list); bo_gem->bo.size = bo_size; - VG_CLEAR(create); + memclear(create); create.size = bo_size; ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CREATE, &create); - bo_gem->gem_handle = create.handle; - bo_gem->bo.handle = bo_gem->gem_handle; if (ret != 0) { free(bo_gem); - return NULL; + goto err; } + + bo_gem->gem_handle = create.handle; + HASH_ADD(handle_hh, bufmgr_gem->handle_table, + gem_handle, sizeof(bo_gem->gem_handle), + bo_gem); + + bo_gem->bo.handle = bo_gem->gem_handle; bo_gem->bo.bufmgr = bufmgr; + bo_gem->bo.align = alignment; bo_gem->tiling_mode = I915_TILING_NONE; bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; @@ -713,13 +828,8 @@ retry: if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, tiling_mode, - stride)) { - drm_intel_gem_bo_free(&bo_gem->bo); - return NULL; - } - - DRMINITLISTHEAD(&bo_gem->name_list); - DRMINITLISTHEAD(&bo_gem->vma_list); + stride)) + goto err_free; } bo_gem->name = name; @@ -730,12 +840,19 @@ retry: bo_gem->has_error = false; bo_gem->reusable = true; - drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); + drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, alignment); + pthread_mutex_unlock(&bufmgr_gem->lock); DBG("bo_create: buf %d (%s) %ldb\n", bo_gem->gem_handle, bo_gem->name, size); return &bo_gem->bo; + +err_free: + drm_intel_gem_bo_free(&bo_gem->bo); +err: + pthread_mutex_unlock(&bufmgr_gem->lock); + return NULL; } static drm_intel_bo * @@ -746,7 +863,8 @@ drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, { return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, BO_ALLOC_FOR_RENDER, - I915_TILING_NONE, 0); + I915_TILING_NONE, 0, + alignment); } static drm_intel_bo * @@ -756,7 +874,7 @@ drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, unsigned int alignment) { return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0, - I915_TILING_NONE, 0); + I915_TILING_NONE, 0, 0); } static drm_intel_bo * @@ -808,7 +926,151 @@ drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name, stride = 0; return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags, - tiling, stride); + tiling, stride, 0); +} + +static drm_intel_bo * +drm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr, + const char *name, + void *addr, + uint32_t tiling_mode, + uint32_t stride, + unsigned long size, + unsigned long flags) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; + drm_intel_bo_gem *bo_gem; + int ret; + struct drm_i915_gem_userptr userptr; + + /* Tiling with userptr surfaces is not supported + * on all hardware so refuse it for time being. + */ + if (tiling_mode != I915_TILING_NONE) + return NULL; + + bo_gem = calloc(1, sizeof(*bo_gem)); + if (!bo_gem) + return NULL; + + atomic_set(&bo_gem->refcount, 1); + DRMINITLISTHEAD(&bo_gem->vma_list); + + bo_gem->bo.size = size; + + memclear(userptr); + userptr.user_ptr = (__u64)((unsigned long)addr); + userptr.user_size = size; + userptr.flags = flags; + + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_USERPTR, + &userptr); + if (ret != 0) { + DBG("bo_create_userptr: " + "ioctl failed with user ptr %p size 0x%lx, " + "user flags 0x%lx\n", addr, size, flags); + free(bo_gem); + return NULL; + } + + pthread_mutex_lock(&bufmgr_gem->lock); + + bo_gem->gem_handle = userptr.handle; + bo_gem->bo.handle = bo_gem->gem_handle; + bo_gem->bo.bufmgr = bufmgr; + bo_gem->is_userptr = true; + bo_gem->bo.virtual = addr; + /* Save the address provided by user */ + bo_gem->user_virtual = addr; + bo_gem->tiling_mode = I915_TILING_NONE; + bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; + bo_gem->stride = 0; + + HASH_ADD(handle_hh, bufmgr_gem->handle_table, + gem_handle, sizeof(bo_gem->gem_handle), + bo_gem); + + bo_gem->name = name; + bo_gem->validate_index = -1; + bo_gem->reloc_tree_fences = 0; + bo_gem->used_as_reloc_target = false; + bo_gem->has_error = false; + bo_gem->reusable = false; + + drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0); + pthread_mutex_unlock(&bufmgr_gem->lock); + + DBG("bo_create_userptr: " + "ptr %p buf %d (%s) size %ldb, stride 0x%x, tile mode %d\n", + addr, bo_gem->gem_handle, bo_gem->name, + size, stride, tiling_mode); + + return &bo_gem->bo; +} + +static bool +has_userptr(drm_intel_bufmgr_gem *bufmgr_gem) +{ + int ret; + void *ptr; + long pgsz; + struct drm_i915_gem_userptr userptr; + + pgsz = sysconf(_SC_PAGESIZE); + assert(pgsz > 0); + + ret = posix_memalign(&ptr, pgsz, pgsz); + if (ret) { + DBG("Failed to get a page (%ld) for userptr detection!\n", + pgsz); + return false; + } + + memclear(userptr); + userptr.user_ptr = (__u64)(unsigned long)ptr; + userptr.user_size = pgsz; + +retry: + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr); + if (ret) { + if (errno == ENODEV && userptr.flags == 0) { + userptr.flags = I915_USERPTR_UNSYNCHRONIZED; + goto retry; + } + free(ptr); + return false; + } + + /* We don't release the userptr bo here as we want to keep the + * kernel mm tracking alive for our lifetime. The first time we + * create a userptr object the kernel has to install a mmu_notifer + * which is a heavyweight operation (e.g. it requires taking all + * mm_locks and stop_machine()). + */ + + bufmgr_gem->userptr_active.ptr = ptr; + bufmgr_gem->userptr_active.handle = userptr.handle; + + return true; +} + +static drm_intel_bo * +check_bo_alloc_userptr(drm_intel_bufmgr *bufmgr, + const char *name, + void *addr, + uint32_t tiling_mode, + uint32_t stride, + unsigned long size, + unsigned long flags) +{ + if (has_userptr((drm_intel_bufmgr_gem *)bufmgr)) + bufmgr->bo_alloc_userptr = drm_intel_gem_bo_alloc_userptr; + else + bufmgr->bo_alloc_userptr = NULL; + + return drm_intel_bo_alloc_userptr(bufmgr, name, addr, + tiling_mode, stride, size, flags); } /** @@ -827,7 +1089,6 @@ drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, int ret; struct drm_gem_open open_arg; struct drm_i915_gem_get_tiling get_tiling; - drmMMListHead *list; /* At the moment most applications only have a few named bo. * For instance, in a DRI client only the render buffers passed @@ -835,21 +1096,15 @@ drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, * alternating names for the front/back buffer a linear search * provides a sufficiently fast match. */ - for (list = bufmgr_gem->named.next; - list != &bufmgr_gem->named; - list = list->next) { - bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list); - if (bo_gem->global_name == handle) { - drm_intel_gem_bo_reference(&bo_gem->bo); - return &bo_gem->bo; - } + pthread_mutex_lock(&bufmgr_gem->lock); + HASH_FIND(name_hh, bufmgr_gem->name_table, + &handle, sizeof(handle), bo_gem); + if (bo_gem) { + drm_intel_gem_bo_reference(&bo_gem->bo); + goto out; } - bo_gem = calloc(1, sizeof(*bo_gem)); - if (!bo_gem) - return NULL; - - VG_CLEAR(open_arg); + memclear(open_arg); open_arg.name = handle; ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_OPEN, @@ -857,40 +1112,66 @@ drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, if (ret != 0) { DBG("Couldn't reference %s handle 0x%08x: %s\n", name, handle, strerror(errno)); - free(bo_gem); - return NULL; + bo_gem = NULL; + goto out; + } + /* Now see if someone has used a prime handle to get this + * object from the kernel before by looking through the list + * again for a matching gem_handle + */ + HASH_FIND(handle_hh, bufmgr_gem->handle_table, + &open_arg.handle, sizeof(open_arg.handle), bo_gem); + if (bo_gem) { + drm_intel_gem_bo_reference(&bo_gem->bo); + goto out; } + + bo_gem = calloc(1, sizeof(*bo_gem)); + if (!bo_gem) + goto out; + + atomic_set(&bo_gem->refcount, 1); + DRMINITLISTHEAD(&bo_gem->vma_list); + bo_gem->bo.size = open_arg.size; bo_gem->bo.offset = 0; + bo_gem->bo.offset64 = 0; bo_gem->bo.virtual = NULL; bo_gem->bo.bufmgr = bufmgr; bo_gem->name = name; - atomic_set(&bo_gem->refcount, 1); bo_gem->validate_index = -1; bo_gem->gem_handle = open_arg.handle; bo_gem->bo.handle = open_arg.handle; bo_gem->global_name = handle; bo_gem->reusable = false; - VG_CLEAR(get_tiling); + HASH_ADD(handle_hh, bufmgr_gem->handle_table, + gem_handle, sizeof(bo_gem->gem_handle), bo_gem); + HASH_ADD(name_hh, bufmgr_gem->name_table, + global_name, sizeof(bo_gem->global_name), bo_gem); + + memclear(get_tiling); get_tiling.handle = bo_gem->gem_handle; ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling); - if (ret != 0) { - drm_intel_gem_bo_unreference(&bo_gem->bo); - return NULL; - } + if (ret != 0) + goto err_unref; + bo_gem->tiling_mode = get_tiling.tiling_mode; bo_gem->swizzle_mode = get_tiling.swizzle_mode; /* XXX stride is unknown */ - drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); - - DRMINITLISTHEAD(&bo_gem->vma_list); - DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); + drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0); DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name); +out: + pthread_mutex_unlock(&bufmgr_gem->lock); return &bo_gem->bo; + +err_unref: + drm_intel_gem_bo_free(&bo_gem->bo); + pthread_mutex_unlock(&bufmgr_gem->lock); + return NULL; } static void @@ -904,16 +1185,25 @@ drm_intel_gem_bo_free(drm_intel_bo *bo) DRMLISTDEL(&bo_gem->vma_list); if (bo_gem->mem_virtual) { VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0)); - munmap(bo_gem->mem_virtual, bo_gem->bo.size); + drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size); + bufmgr_gem->vma_count--; + } + if (bo_gem->wc_virtual) { + VG(VALGRIND_FREELIKE_BLOCK(bo_gem->wc_virtual, 0)); + drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size); bufmgr_gem->vma_count--; } if (bo_gem->gtt_virtual) { - munmap(bo_gem->gtt_virtual, bo_gem->bo.size); + drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size); bufmgr_gem->vma_count--; } + if (bo_gem->global_name) + HASH_DELETE(name_hh, bufmgr_gem->name_table, bo_gem); + HASH_DELETE(handle_hh, bufmgr_gem->handle_table, bo_gem); + /* Close this object */ - VG_CLEAR(close); + memclear(close); close.handle = bo_gem->gem_handle; ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close); if (ret != 0) { @@ -932,6 +1222,9 @@ drm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo *bo) if (bo_gem->mem_virtual) VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size); + if (bo_gem->wc_virtual) + VALGRIND_MAKE_MEM_NOACCESS(bo_gem->wc_virtual, bo->size); + if (bo_gem->gtt_virtual) VALGRIND_MAKE_MEM_NOACCESS(bo_gem->gtt_virtual, bo->size); #endif @@ -992,12 +1285,17 @@ static void drm_intel_gem_bo_purge_vma_cache(drm_intel_bufmgr_gem *bufmgr_gem) DRMLISTDELINIT(&bo_gem->vma_list); if (bo_gem->mem_virtual) { - munmap(bo_gem->mem_virtual, bo_gem->bo.size); + drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size); bo_gem->mem_virtual = NULL; bufmgr_gem->vma_count--; } + if (bo_gem->wc_virtual) { + drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size); + bo_gem->wc_virtual = NULL; + bufmgr_gem->vma_count--; + } if (bo_gem->gtt_virtual) { - munmap(bo_gem->gtt_virtual, bo_gem->bo.size); + drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size); bo_gem->gtt_virtual = NULL; bufmgr_gem->vma_count--; } @@ -1011,6 +1309,8 @@ static void drm_intel_gem_bo_close_vma(drm_intel_bufmgr_gem *bufmgr_gem, DRMLISTADDTAIL(&bo_gem->vma_list, &bufmgr_gem->vma_cache); if (bo_gem->mem_virtual) bufmgr_gem->vma_count++; + if (bo_gem->wc_virtual) + bufmgr_gem->vma_count++; if (bo_gem->gtt_virtual) bufmgr_gem->vma_count++; drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); @@ -1023,6 +1323,8 @@ static void drm_intel_gem_bo_open_vma(drm_intel_bufmgr_gem *bufmgr_gem, DRMLISTDEL(&bo_gem->vma_list); if (bo_gem->mem_virtual) bufmgr_gem->vma_count--; + if (bo_gem->wc_virtual) + bufmgr_gem->vma_count--; if (bo_gem->gtt_virtual) bufmgr_gem->vma_count--; drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); @@ -1044,8 +1346,13 @@ drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) time); } } + for (i = 0; i < bo_gem->softpin_target_count; i++) + drm_intel_gem_bo_unreference_locked_timed(bo_gem->softpin_target[i], + time); + bo_gem->kflags = 0; bo_gem->reloc_count = 0; bo_gem->used_as_reloc_target = false; + bo_gem->softpin_target_count = 0; DBG("bo_unreference final: %d (%s)\n", bo_gem->gem_handle, bo_gem->name); @@ -1059,6 +1366,11 @@ drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) free(bo_gem->relocs); bo_gem->relocs = NULL; } + if (bo_gem->softpin_target) { + free(bo_gem->softpin_target); + bo_gem->softpin_target = NULL; + bo_gem->softpin_target_size = 0; + } /* Clear any left-over mappings */ if (bo_gem->map_count) { @@ -1068,8 +1380,6 @@ drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) drm_intel_gem_bo_mark_mmaps_incoherent(bo); } - DRMLISTDEL(&bo_gem->name_list); - bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size); /* Put the buffer into our internal cache for reuse if we can. */ if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL && @@ -1101,7 +1411,8 @@ static void drm_intel_gem_bo_unreference(drm_intel_bo *bo) drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; assert(atomic_read(&bo_gem->refcount) > 0); - if (atomic_dec_and_test(&bo_gem->refcount)) { + + if (atomic_add_unless(&bo_gem->refcount, -1, 1)) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; struct timespec time; @@ -1109,8 +1420,12 @@ static void drm_intel_gem_bo_unreference(drm_intel_bo *bo) clock_gettime(CLOCK_MONOTONIC, &time); pthread_mutex_lock(&bufmgr_gem->lock); - drm_intel_gem_bo_unreference_final(bo, time.tv_sec); - drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time.tv_sec); + + if (atomic_dec_and_test(&bo_gem->refcount)) { + drm_intel_gem_bo_unreference_final(bo, time.tv_sec); + drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time.tv_sec); + } + pthread_mutex_unlock(&bufmgr_gem->lock); } } @@ -1122,6 +1437,12 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) struct drm_i915_gem_set_domain set_domain; int ret; + if (bo_gem->is_userptr) { + /* Return the same user ptr */ + bo->virtual = bo_gem->user_virtual; + return 0; + } + pthread_mutex_lock(&bufmgr_gem->lock); if (bo_gem->map_count++ == 0) @@ -1133,9 +1454,8 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) DBG("bo_map: %d (%s), map_count=%d\n", bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); - VG_CLEAR(mmap_arg); + memclear(mmap_arg); mmap_arg.handle = bo_gem->gem_handle; - mmap_arg.offset = 0; mmap_arg.size = bo->size; ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MMAP, @@ -1157,7 +1477,7 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) bo_gem->mem_virtual); bo->virtual = bo_gem->mem_virtual; - VG_CLEAR(set_domain); + memclear(set_domain); set_domain.handle = bo_gem->gem_handle; set_domain.read_domains = I915_GEM_DOMAIN_CPU; if (write_enable) @@ -1183,14 +1503,15 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) return 0; } -int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) +static int +map_gtt(drm_intel_bo *bo) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; - struct drm_i915_gem_set_domain set_domain; int ret; - pthread_mutex_lock(&bufmgr_gem->lock); + if (bo_gem->is_userptr) + return -EINVAL; if (bo_gem->map_count++ == 0) drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); @@ -1202,7 +1523,7 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n", bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); - VG_CLEAR(mmap_arg); + memclear(mmap_arg); mmap_arg.handle = bo_gem->gem_handle; /* Get the fake offset back... */ @@ -1217,14 +1538,13 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) strerror(errno)); if (--bo_gem->map_count == 0) drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); - pthread_mutex_unlock(&bufmgr_gem->lock); return ret; } /* and mmap it */ - bo_gem->gtt_virtual = mmap(0, bo->size, PROT_READ | PROT_WRITE, - MAP_SHARED, bufmgr_gem->fd, - mmap_arg.offset); + bo_gem->gtt_virtual = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE, + MAP_SHARED, bufmgr_gem->fd, + mmap_arg.offset); if (bo_gem->gtt_virtual == MAP_FAILED) { bo_gem->gtt_virtual = NULL; ret = -errno; @@ -1234,7 +1554,6 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) strerror(errno)); if (--bo_gem->map_count == 0) drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); - pthread_mutex_unlock(&bufmgr_gem->lock); return ret; } } @@ -1244,8 +1563,35 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, bo_gem->gtt_virtual); - /* Now move it to the GTT domain so that the CPU caches are flushed */ - VG_CLEAR(set_domain); + return 0; +} + +int +drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + struct drm_i915_gem_set_domain set_domain; + int ret; + + pthread_mutex_lock(&bufmgr_gem->lock); + + ret = map_gtt(bo); + if (ret) { + pthread_mutex_unlock(&bufmgr_gem->lock); + return ret; + } + + /* Now move it to the GTT domain so that the GPU and CPU + * caches are flushed and the GPU isn't actively using the + * buffer. + * + * The pagefault handler does this domain change for us when + * it has unbound the BO from the GTT, but it's up to us to + * tell it when we're about to use things if we had done + * rendering and it still happens to be bound to the GTT. + */ + memclear(set_domain); set_domain.handle = bo_gem->gem_handle; set_domain.read_domains = I915_GEM_DOMAIN_GTT; set_domain.write_domain = I915_GEM_DOMAIN_GTT; @@ -1265,15 +1611,66 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) return 0; } -static int drm_intel_gem_bo_unmap(drm_intel_bo *bo) +/** + * Performs a mapping of the buffer object like the normal GTT + * mapping, but avoids waiting for the GPU to be done reading from or + * rendering to the buffer. + * + * This is used in the implementation of GL_ARB_map_buffer_range: The + * user asks to create a buffer, then does a mapping, fills some + * space, runs a drawing command, then asks to map it again without + * synchronizing because it guarantees that it won't write over the + * data that the GPU is busy using (or, more specifically, that if it + * does write over the data, it acknowledges that rendering is + * undefined). + */ + +int +drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; +#ifdef HAVE_VALGRIND + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; +#endif + int ret; + + /* If the CPU cache isn't coherent with the GTT, then use a + * regular synchronized mapping. The problem is that we don't + * track where the buffer was last used on the CPU side in + * terms of drm_intel_bo_map vs drm_intel_gem_bo_map_gtt, so + * we would potentially corrupt the buffer even when the user + * does reasonable things. + */ + if (!bufmgr_gem->has_llc) + return drm_intel_gem_bo_map_gtt(bo); + + pthread_mutex_lock(&bufmgr_gem->lock); + + ret = map_gtt(bo); + if (ret == 0) { + drm_intel_gem_bo_mark_mmaps_incoherent(bo); + VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size)); + } + + pthread_mutex_unlock(&bufmgr_gem->lock); + + return ret; +} + +static int drm_intel_gem_bo_unmap(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem; drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; int ret = 0; if (bo == NULL) return 0; + if (bo_gem->is_userptr) + return 0; + + bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + pthread_mutex_lock(&bufmgr_gem->lock); if (bo_gem->map_count <= 0) { @@ -1293,7 +1690,7 @@ static int drm_intel_gem_bo_unmap(drm_intel_bo *bo) * Unlike GTT set domains, this only does work if the * buffer should be scanout-related. */ - VG_CLEAR(sw_finish); + memclear(sw_finish); sw_finish.handle = bo_gem->gem_handle; ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_SW_FINISH, @@ -1304,7 +1701,7 @@ static int drm_intel_gem_bo_unmap(drm_intel_bo *bo) } /* We need to unmap after every innovation as we cannot track - * an open vma for every bo as that will exhaasut the system + * an open vma for every bo as that will exhaust the system * limits and cause later failures. */ if (--bo_gem->map_count == 0) { @@ -1317,7 +1714,8 @@ static int drm_intel_gem_bo_unmap(drm_intel_bo *bo) return ret; } -int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo) +int +drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo) { return drm_intel_gem_bo_unmap(bo); } @@ -1331,7 +1729,10 @@ drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset, struct drm_i915_gem_pwrite pwrite; int ret; - VG_CLEAR(pwrite); + if (bo_gem->is_userptr) + return -EINVAL; + + memclear(pwrite); pwrite.handle = bo_gem->gem_handle; pwrite.offset = offset; pwrite.size = size; @@ -1356,7 +1757,7 @@ drm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id) struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id; int ret; - VG_CLEAR(get_pipe_from_crtc_id); + memclear(get_pipe_from_crtc_id); get_pipe_from_crtc_id.crtc_id = crtc_id; ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID, @@ -1383,7 +1784,10 @@ drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset, struct drm_i915_gem_pread pread; int ret; - VG_CLEAR(pread); + if (bo_gem->is_userptr) + return -EINVAL; + + memclear(pread); pread.handle = bo_gem->gem_handle; pread.offset = offset; pread.size = size; @@ -1409,27 +1813,83 @@ drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo) } /** - * Sets the object to the GTT read and possibly write domain, used by the X - * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt(). + * Waits on a BO for the given amount of time. * - * In combination with drm_intel_gem_bo_pin() and manual fence management, we - * can do tiled pixmaps this way. + * @bo: buffer object to wait for + * @timeout_ns: amount of time to wait in nanoseconds. + * If value is less than 0, an infinite wait will occur. + * + * Returns 0 if the wait was successful ie. the last batch referencing the + * object has completed within the allotted time. Otherwise some negative return + * value describes the error. Of particular interest is -ETIME when the wait has + * failed to yield the desired result. + * + * Similar to drm_intel_gem_bo_wait_rendering except a timeout parameter allows + * the operation to give up after a certain amount of time. Another subtle + * difference is the internal locking semantics are different (this variant does + * not hold the lock for the duration of the wait). This makes the wait subject + * to a larger userspace race window. + * + * The implementation shall wait until the object is no longer actively + * referenced within a batch buffer at the time of the call. The wait will + * not guarantee that the buffer is re-issued via another thread, or an flinked + * handle. Userspace must make sure this race does not occur if such precision + * is important. + * + * Note that some kernels have broken the inifite wait for negative values + * promise, upgrade to latest stable kernels if this is the case. */ -void -drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable) +int +drm_intel_gem_bo_wait(drm_intel_bo *bo, int64_t timeout_ns) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; - struct drm_i915_gem_set_domain set_domain; + struct drm_i915_gem_wait wait; int ret; - VG_CLEAR(set_domain); - set_domain.handle = bo_gem->gem_handle; - set_domain.read_domains = I915_GEM_DOMAIN_GTT; - set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0; - ret = drmIoctl(bufmgr_gem->fd, - DRM_IOCTL_I915_GEM_SET_DOMAIN, - &set_domain); + if (!bufmgr_gem->has_wait_timeout) { + DBG("%s:%d: Timed wait is not supported. Falling back to " + "infinite wait\n", __FILE__, __LINE__); + if (timeout_ns) { + drm_intel_gem_bo_wait_rendering(bo); + return 0; + } else { + return drm_intel_gem_bo_busy(bo) ? -ETIME : 0; + } + } + + memclear(wait); + wait.bo_handle = bo_gem->gem_handle; + wait.timeout_ns = timeout_ns; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); + if (ret == -1) + return -errno; + + return ret; +} + +/** + * Sets the object to the GTT read and possibly write domain, used by the X + * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt(). + * + * In combination with drm_intel_gem_bo_pin() and manual fence management, we + * can do tiled pixmaps this way. + */ +void +drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + struct drm_i915_gem_set_domain set_domain; + int ret; + + memclear(set_domain); + set_domain.handle = bo_gem->gem_handle; + set_domain.read_domains = I915_GEM_DOMAIN_GTT; + set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0; + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_SET_DOMAIN, + &set_domain); if (ret != 0) { DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n", __FILE__, __LINE__, bo_gem->gem_handle, @@ -1442,7 +1902,8 @@ static void drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; - int i; + struct drm_gem_close close_bo; + int i, ret; free(bufmgr_gem->exec2_objects); free(bufmgr_gem->exec_objects); @@ -1465,6 +1926,18 @@ drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr) } } + /* Release userptr bo kept hanging around for optimisation. */ + if (bufmgr_gem->userptr_active.ptr) { + memclear(close_bo); + close_bo.handle = bufmgr_gem->userptr_active.handle; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close_bo); + free(bufmgr_gem->userptr_active.ptr); + if (ret) + fprintf(stderr, + "Failed to release test userptr object! (%d) " + "i915 kernel driver may not be sane!\n", errno); + } + free(bufmgr); } @@ -1515,6 +1988,14 @@ do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, assert(offset <= bo->size - 4); assert((write_domain & (write_domain - 1)) == 0); + /* An object needing a fence is a tiled buffer, so it won't have + * relocs to other buffers. + */ + if (need_fence) { + assert(target_bo_gem->reloc_count == 0); + target_bo_gem->reloc_tree_fences = 1; + } + /* Make sure that we're not adding a reloc to something whose size has * already been accounted for. */ @@ -1522,21 +2003,8 @@ do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, if (target_bo_gem != bo_gem) { target_bo_gem->used_as_reloc_target = true; bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size; + bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences; } - /* An object needing a fence is a tiled buffer, so it won't have - * relocs to other buffers. - */ - if (need_fence) - target_bo_gem->reloc_tree_fences = 1; - bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences; - - bo_gem->relocs[bo_gem->reloc_count].offset = offset; - bo_gem->relocs[bo_gem->reloc_count].delta = target_offset; - bo_gem->relocs[bo_gem->reloc_count].target_handle = - target_bo_gem->gem_handle; - bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains; - bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain; - bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset; bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo; if (target_bo != bo) @@ -1547,21 +2015,81 @@ do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, else bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0; + bo_gem->relocs[bo_gem->reloc_count].offset = offset; + bo_gem->relocs[bo_gem->reloc_count].delta = target_offset; + bo_gem->relocs[bo_gem->reloc_count].target_handle = + target_bo_gem->gem_handle; + bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains; + bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain; + bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64; bo_gem->reloc_count++; return 0; } +static void +drm_intel_gem_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + if (enable) + bo_gem->kflags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS; + else + bo_gem->kflags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS; +} + +static int +drm_intel_gem_bo_add_softpin_target(drm_intel_bo *bo, drm_intel_bo *target_bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; + if (bo_gem->has_error) + return -ENOMEM; + + if (target_bo_gem->has_error) { + bo_gem->has_error = true; + return -ENOMEM; + } + + if (!(target_bo_gem->kflags & EXEC_OBJECT_PINNED)) + return -EINVAL; + if (target_bo_gem == bo_gem) + return -EINVAL; + + if (bo_gem->softpin_target_count == bo_gem->softpin_target_size) { + int new_size = bo_gem->softpin_target_size * 2; + if (new_size == 0) + new_size = bufmgr_gem->max_relocs; + + bo_gem->softpin_target = realloc(bo_gem->softpin_target, new_size * + sizeof(drm_intel_bo *)); + if (!bo_gem->softpin_target) + return -ENOMEM; + + bo_gem->softpin_target_size = new_size; + } + bo_gem->softpin_target[bo_gem->softpin_target_count] = target_bo; + drm_intel_gem_bo_reference(target_bo); + bo_gem->softpin_target_count++; + + return 0; +} + static int drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, drm_intel_bo *target_bo, uint32_t target_offset, uint32_t read_domains, uint32_t write_domain) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; + drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *)target_bo; - return do_bo_emit_reloc(bo, offset, target_bo, target_offset, - read_domains, write_domain, - !bufmgr_gem->fenced_relocs); + if (target_bo_gem->kflags & EXEC_OBJECT_PINNED) + return drm_intel_gem_bo_add_softpin_target(bo, target_bo); + else + return do_bo_emit_reloc(bo, offset, target_bo, target_offset, + read_domains, write_domain, + !bufmgr_gem->fenced_relocs); } static int @@ -1594,10 +2122,13 @@ drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo) * * Any further drm_intel_bufmgr_check_aperture_space() queries * involving this buffer in the tree are undefined after this call. + * + * This also removes all softpinned targets being referenced by the BO. */ void drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start) { + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; int i; struct timespec time; @@ -1605,15 +2136,28 @@ drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start) clock_gettime(CLOCK_MONOTONIC, &time); assert(bo_gem->reloc_count >= start); + /* Unreference the cleared target buffers */ + pthread_mutex_lock(&bufmgr_gem->lock); + for (i = start; i < bo_gem->reloc_count; i++) { - if (bo_gem->reloc_target_info[i].bo != bo) { - drm_intel_gem_bo_unreference_locked_timed(bo_gem-> - reloc_target_info[i].bo, + drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->reloc_target_info[i].bo; + if (&target_bo_gem->bo != bo) { + bo_gem->reloc_tree_fences -= target_bo_gem->reloc_tree_fences; + drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo, time.tv_sec); } } bo_gem->reloc_count = start; + + for (i = 0; i < bo_gem->softpin_target_count; i++) { + drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->softpin_target[i]; + drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo, time.tv_sec); + } + bo_gem->softpin_target_count = 0; + + pthread_mutex_unlock(&bufmgr_gem->lock); + } /** @@ -1652,7 +2196,7 @@ drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo) drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; int i; - if (bo_gem->relocs == NULL) + if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL) return; for (i = 0; i < bo_gem->reloc_count; i++) { @@ -1673,6 +2217,17 @@ drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo) /* Add the target to the validate list */ drm_intel_add_validate_buffer2(target_bo, need_fence); } + + for (i = 0; i < bo_gem->softpin_target_count; i++) { + drm_intel_bo *target_bo = bo_gem->softpin_target[i]; + + if (target_bo == bo) + continue; + + drm_intel_gem_bo_mark_mmaps_incoherent(bo); + drm_intel_gem_bo_process_reloc2(target_bo); + drm_intel_add_validate_buffer2(target_bo, false); + } } @@ -1686,11 +2241,14 @@ drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem) drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; /* Update the buffer offset */ - if (bufmgr_gem->exec_objects[i].offset != bo->offset) { - DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", - bo_gem->gem_handle, bo_gem->name, bo->offset, - (unsigned long long)bufmgr_gem->exec_objects[i]. - offset); + if (bufmgr_gem->exec_objects[i].offset != bo->offset64) { + DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n", + bo_gem->gem_handle, bo_gem->name, + upper_32_bits(bo->offset64), + lower_32_bits(bo->offset64), + upper_32_bits(bufmgr_gem->exec_objects[i].offset), + lower_32_bits(bufmgr_gem->exec_objects[i].offset)); + bo->offset64 = bufmgr_gem->exec_objects[i].offset; bo->offset = bufmgr_gem->exec_objects[i].offset; } } @@ -1706,25 +2264,40 @@ drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem) drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; /* Update the buffer offset */ - if (bufmgr_gem->exec2_objects[i].offset != bo->offset) { - DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", - bo_gem->gem_handle, bo_gem->name, bo->offset, - (unsigned long long)bufmgr_gem->exec2_objects[i].offset); + if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) { + /* If we're seeing softpinned object here it means that the kernel + * has relocated our object... Indicating a programming error + */ + assert(!(bo_gem->kflags & EXEC_OBJECT_PINNED)); + DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n", + bo_gem->gem_handle, bo_gem->name, + upper_32_bits(bo->offset64), + lower_32_bits(bo->offset64), + upper_32_bits(bufmgr_gem->exec2_objects[i].offset), + lower_32_bits(bufmgr_gem->exec2_objects[i].offset)); + bo->offset64 = bufmgr_gem->exec2_objects[i].offset; bo->offset = bufmgr_gem->exec2_objects[i].offset; } } } +void +drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo, + int x1, int y1, int width, int height, + enum aub_dump_bmp_format format, + int pitch, int offset) +{ +} + static int drm_intel_gem_bo_exec(drm_intel_bo *bo, int used, drm_clip_rect_t * cliprects, int num_cliprects, int DR4) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; - drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; struct drm_i915_gem_execbuffer execbuf; int ret, i; - if (bo_gem->has_error) + if (to_bo_gem(bo)->has_error) return -ENOMEM; pthread_mutex_lock(&bufmgr_gem->lock); @@ -1736,7 +2309,7 @@ drm_intel_gem_bo_exec(drm_intel_bo *bo, int used, */ drm_intel_add_validate_buffer(bo); - VG_CLEAR(execbuf); + memclear(execbuf); execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects; execbuf.buffer_count = bufmgr_gem->exec_count; execbuf.batch_start_offset = 0; @@ -1769,8 +2342,9 @@ drm_intel_gem_bo_exec(drm_intel_bo *bo, int used, drm_intel_gem_dump_validation_list(bufmgr_gem); for (i = 0; i < bufmgr_gem->exec_count; i++) { - drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; - drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + drm_intel_bo_gem *bo_gem = to_bo_gem(bufmgr_gem->exec_bos[i]); + + bo_gem->idle = false; /* Disconnect the buffer from the validate list */ bo_gem->validate_index = -1; @@ -1783,15 +2357,19 @@ drm_intel_gem_bo_exec(drm_intel_bo *bo, int used, } static int -drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, - drm_clip_rect_t *cliprects, int num_cliprects, int DR4, - unsigned int flags) +do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx, + drm_clip_rect_t *cliprects, int num_cliprects, int DR4, + int in_fence, int *out_fence, + unsigned int flags) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; struct drm_i915_gem_execbuffer2 execbuf; int ret = 0; int i; + if (to_bo_gem(bo)->has_error) + return -ENOMEM; + switch (flags & 0x7) { default: return -EINVAL; @@ -1803,6 +2381,10 @@ drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, if (!bufmgr_gem->has_bsd) return -EINVAL; break; + case I915_EXEC_VEBOX: + if (!bufmgr_gem->has_vebox) + return -EINVAL; + break; case I915_EXEC_RENDER: case I915_EXEC_DEFAULT: break; @@ -1817,7 +2399,7 @@ drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, */ drm_intel_add_validate_buffer2(bo, 0); - VG_CLEAR(execbuf); + memclear(execbuf); execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects; execbuf.buffer_count = bufmgr_gem->exec_count; execbuf.batch_start_offset = 0; @@ -1827,14 +2409,25 @@ drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, execbuf.DR1 = 0; execbuf.DR4 = DR4; execbuf.flags = flags; - execbuf.rsvd1 = 0; + if (ctx == NULL) + i915_execbuffer2_set_context_id(execbuf, 0); + else + i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id); execbuf.rsvd2 = 0; + if (in_fence != -1) { + execbuf.rsvd2 = in_fence; + execbuf.flags |= I915_EXEC_FENCE_IN; + } + if (out_fence != NULL) { + *out_fence = -1; + execbuf.flags |= I915_EXEC_FENCE_OUT; + } if (bufmgr_gem->no_exec) goto skip_execution; ret = drmIoctl(bufmgr_gem->fd, - DRM_IOCTL_I915_GEM_EXECBUFFER2, + DRM_IOCTL_I915_GEM_EXECBUFFER2_WR, &execbuf); if (ret != 0) { ret = -errno; @@ -1850,13 +2443,17 @@ drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, } drm_intel_update_buffer_offsets2(bufmgr_gem); + if (ret == 0 && out_fence != NULL) + *out_fence = execbuf.rsvd2 >> 32; + skip_execution: if (bufmgr_gem->bufmgr.debug) drm_intel_gem_dump_validation_list(bufmgr_gem); for (i = 0; i < bufmgr_gem->exec_count; i++) { - drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; - drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; + drm_intel_bo_gem *bo_gem = to_bo_gem(bufmgr_gem->exec_bos[i]); + + bo_gem->idle = false; /* Disconnect the buffer from the validate list */ bo_gem->validate_index = -1; @@ -1873,9 +2470,35 @@ drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used, drm_clip_rect_t *cliprects, int num_cliprects, int DR4) { - return drm_intel_gem_bo_mrb_exec2(bo, used, - cliprects, num_cliprects, DR4, - I915_EXEC_RENDER); + return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4, + -1, NULL, I915_EXEC_RENDER); +} + +static int +drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, + drm_clip_rect_t *cliprects, int num_cliprects, int DR4, + unsigned int flags) +{ + return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4, + -1, NULL, flags); +} + +int +drm_intel_gem_bo_context_exec(drm_intel_bo *bo, drm_intel_context *ctx, + int used, unsigned int flags) +{ + return do_exec2(bo, used, ctx, NULL, 0, 0, -1, NULL, flags); +} + +int +drm_intel_gem_bo_fence_exec(drm_intel_bo *bo, + drm_intel_context *ctx, + int used, + int in_fence, + int *out_fence, + unsigned int flags) +{ + return do_exec2(bo, used, ctx, NULL, 0, 0, in_fence, out_fence, flags); } static int @@ -1886,7 +2509,7 @@ drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment) struct drm_i915_gem_pin pin; int ret; - VG_CLEAR(pin); + memclear(pin); pin.handle = bo_gem->gem_handle; pin.alignment = alignment; @@ -1896,6 +2519,7 @@ drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment) if (ret != 0) return -errno; + bo->offset64 = pin.offset; bo->offset = pin.offset; return 0; } @@ -1908,7 +2532,7 @@ drm_intel_gem_bo_unpin(drm_intel_bo *bo) struct drm_i915_gem_unpin unpin; int ret; - VG_CLEAR(unpin); + memclear(unpin); unpin.handle = bo_gem->gem_handle; ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin); @@ -1964,6 +2588,12 @@ drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; int ret; + /* Tiling with userptr surfaces is not supported + * on all hardware so refuse it for time being. + */ + if (bo_gem->is_userptr) + return -EINVAL; + /* Linear buffers have no stride. By ensuring that we only ever use * stride 0 with linear buffers, we simplify our code. */ @@ -1972,7 +2602,7 @@ drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, ret = drm_intel_gem_bo_set_tiling_internal(bo, *tiling_mode, stride); if (ret == 0) - drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); + drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0); *tiling_mode = bo_gem->tiling_mode; return ret; @@ -1990,26 +2620,139 @@ drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, } static int +drm_intel_gem_bo_set_softpin_offset(drm_intel_bo *bo, uint64_t offset) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + bo->offset64 = offset; + bo->offset = offset; + bo_gem->kflags |= EXEC_OBJECT_PINNED; + + return 0; +} + +drm_intel_bo * +drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int size) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; + int ret; + uint32_t handle; + drm_intel_bo_gem *bo_gem; + struct drm_i915_gem_get_tiling get_tiling; + + pthread_mutex_lock(&bufmgr_gem->lock); + ret = drmPrimeFDToHandle(bufmgr_gem->fd, prime_fd, &handle); + if (ret) { + DBG("create_from_prime: failed to obtain handle from fd: %s\n", strerror(errno)); + pthread_mutex_unlock(&bufmgr_gem->lock); + return NULL; + } + + /* + * See if the kernel has already returned this buffer to us. Just as + * for named buffers, we must not create two bo's pointing at the same + * kernel object + */ + HASH_FIND(handle_hh, bufmgr_gem->handle_table, + &handle, sizeof(handle), bo_gem); + if (bo_gem) { + drm_intel_gem_bo_reference(&bo_gem->bo); + goto out; + } + + bo_gem = calloc(1, sizeof(*bo_gem)); + if (!bo_gem) + goto out; + + atomic_set(&bo_gem->refcount, 1); + DRMINITLISTHEAD(&bo_gem->vma_list); + + /* Determine size of bo. The fd-to-handle ioctl really should + * return the size, but it doesn't. If we have kernel 3.12 or + * later, we can lseek on the prime fd to get the size. Older + * kernels will just fail, in which case we fall back to the + * provided (estimated or guess size). */ + ret = lseek(prime_fd, 0, SEEK_END); + if (ret != -1) + bo_gem->bo.size = ret; + else + bo_gem->bo.size = size; + + bo_gem->bo.handle = handle; + bo_gem->bo.bufmgr = bufmgr; + + bo_gem->gem_handle = handle; + HASH_ADD(handle_hh, bufmgr_gem->handle_table, + gem_handle, sizeof(bo_gem->gem_handle), bo_gem); + + bo_gem->name = "prime"; + bo_gem->validate_index = -1; + bo_gem->reloc_tree_fences = 0; + bo_gem->used_as_reloc_target = false; + bo_gem->has_error = false; + bo_gem->reusable = false; + + memclear(get_tiling); + get_tiling.handle = bo_gem->gem_handle; + if (drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_GET_TILING, + &get_tiling)) + goto err; + + bo_gem->tiling_mode = get_tiling.tiling_mode; + bo_gem->swizzle_mode = get_tiling.swizzle_mode; + /* XXX stride is unknown */ + drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0); + +out: + pthread_mutex_unlock(&bufmgr_gem->lock); + return &bo_gem->bo; + +err: + drm_intel_gem_bo_free(&bo_gem->bo); + pthread_mutex_unlock(&bufmgr_gem->lock); + return NULL; +} + +int +drm_intel_bo_gem_export_to_prime(drm_intel_bo *bo, int *prime_fd) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + if (drmPrimeHandleToFD(bufmgr_gem->fd, bo_gem->gem_handle, + DRM_CLOEXEC, prime_fd) != 0) + return -errno; + + bo_gem->reusable = false; + + return 0; +} + +static int drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; - int ret; if (!bo_gem->global_name) { struct drm_gem_flink flink; - VG_CLEAR(flink); + memclear(flink); flink.handle = bo_gem->gem_handle; - - ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink); - if (ret != 0) + if (drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink)) return -errno; - bo_gem->global_name = flink.name; - bo_gem->reusable = false; + pthread_mutex_lock(&bufmgr_gem->lock); + if (!bo_gem->global_name) { + bo_gem->global_name = flink.name; + bo_gem->reusable = false; - DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); + HASH_ADD(name_hh, bufmgr_gem->name_table, + global_name, sizeof(bo_gem->global_name), + bo_gem); + } + pthread_mutex_unlock(&bufmgr_gem->lock); } *name = bo_gem->global_name; @@ -2032,6 +2775,59 @@ drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr) } /** + * Disables implicit synchronisation before executing the bo + * + * This will cause rendering corruption unless you correctly manage explicit + * fences for all rendering involving this buffer - including use by others. + * Disabling the implicit serialisation is only required if that serialisation + * is too coarse (for example, you have split the buffer into many + * non-overlapping regions and are sharing the whole buffer between concurrent + * independent command streams). + * + * Note the kernel must advertise support via I915_PARAM_HAS_EXEC_ASYNC, + * which can be checked using drm_intel_bufmgr_can_disable_implicit_sync, + * or subsequent execbufs involving the bo will generate EINVAL. + */ +void +drm_intel_gem_bo_disable_implicit_sync(drm_intel_bo *bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + bo_gem->kflags |= EXEC_OBJECT_ASYNC; +} + +/** + * Enables implicit synchronisation before executing the bo + * + * This is the default behaviour of the kernel, to wait upon prior writes + * completing on the object before rendering with it, or to wait for prior + * reads to complete before writing into the object. + * drm_intel_gem_bo_disable_implicit_sync() can stop this behaviour, telling + * the kernel never to insert a stall before using the object. Then this + * function can be used to restore the implicit sync before subsequent + * rendering. + */ +void +drm_intel_gem_bo_enable_implicit_sync(drm_intel_bo *bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + bo_gem->kflags &= ~EXEC_OBJECT_ASYNC; +} + +/** + * Query whether the kernel supports disabling of its implicit synchronisation + * before execbuf. See drm_intel_gem_bo_disable_implicit_sync() + */ +int +drm_intel_bufmgr_gem_can_disable_implicit_sync(drm_intel_bufmgr *bufmgr) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; + + return bufmgr_gem->has_exec_async; +} + +/** * Enable use of fenced reloc type. * * New code should enable this to avoid unnecessary fence register @@ -2255,6 +3051,13 @@ _drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) return 1; } + for (i = 0; i< bo_gem->softpin_target_count; i++) { + if (bo_gem->softpin_target[i] == target_bo) + return 1; + if (_drm_intel_gem_bo_references(bo_gem->softpin_target[i], target_bo)) + return 1; + } + return 0; } @@ -2320,6 +3123,34 @@ drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, int limit) drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); } +static int +parse_devid_override(const char *devid_override) +{ + static const struct { + const char *name; + int pci_id; + } name_map[] = { + { "brw", PCI_CHIP_I965_GM }, + { "g4x", PCI_CHIP_GM45_GM }, + { "ilk", PCI_CHIP_ILD_G }, + { "snb", PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS }, + { "ivb", PCI_CHIP_IVYBRIDGE_S_GT2 }, + { "hsw", PCI_CHIP_HASWELL_CRW_E_GT3 }, + { "byt", PCI_CHIP_VALLEYVIEW_3 }, + { "bdw", 0x1620 | BDW_ULX }, + { "skl", PCI_CHIP_SKYLAKE_DT_GT2 }, + { "kbl", PCI_CHIP_KABYLAKE_DT_GT2 }, + }; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(name_map); i++) { + if (!strcmp(name_map[i].name, devid_override)) + return name_map[i].pci_id; + } + + return strtod(devid_override, NULL); +} + /** * Get the PCI ID for the device. This can be overridden by setting the * INTEL_DEVID_OVERRIDE environment variable to the desired ID. @@ -2328,7 +3159,7 @@ static int get_pci_device_id(drm_intel_bufmgr_gem *bufmgr_gem) { char *devid_override; - int devid; + int devid = 0; int ret; drm_i915_getparam_t gp; @@ -2336,11 +3167,11 @@ get_pci_device_id(drm_intel_bufmgr_gem *bufmgr_gem) devid_override = getenv("INTEL_DEVID_OVERRIDE"); if (devid_override) { bufmgr_gem->no_exec = true; - return strtod(devid_override, NULL); + return parse_devid_override(devid_override); } } - VG_CLEAR(gp); + memclear(gp); gp.param = I915_PARAM_CHIPSET_ID; gp.value = &devid; ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); @@ -2360,6 +3191,409 @@ drm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *bufmgr) } /** + * Sets the AUB filename. + * + * This function has to be called before drm_intel_bufmgr_gem_set_aub_dump() + * for it to have any effect. + */ +void +drm_intel_bufmgr_gem_set_aub_filename(drm_intel_bufmgr *bufmgr, + const char *filename) +{ +} + +/** + * Sets up AUB dumping. + * + * This is a trace file format that can be used with the simulator. + * Packets are emitted in a format somewhat like GPU command packets. + * You can set up a GTT and upload your objects into the referenced + * space, then send off batchbuffers and get BMPs out the other end. + */ +void +drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable) +{ + fprintf(stderr, "libdrm aub dumping is deprecated.\n\n" + "Use intel_aubdump from intel-gpu-tools instead. Install intel-gpu-tools,\n" + "then run (for example)\n\n" + "\t$ intel_aubdump --output=trace.aub glxgears -geometry 500x500\n\n" + "See the intel_aubdump man page for more details.\n"); +} + +drm_intel_context * +drm_intel_gem_context_create(drm_intel_bufmgr *bufmgr) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; + struct drm_i915_gem_context_create create; + drm_intel_context *context = NULL; + int ret; + + context = calloc(1, sizeof(*context)); + if (!context) + return NULL; + + memclear(create); + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); + if (ret != 0) { + DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n", + strerror(errno)); + free(context); + return NULL; + } + + context->ctx_id = create.ctx_id; + context->bufmgr = bufmgr; + + return context; +} + +int +drm_intel_gem_context_get_id(drm_intel_context *ctx, uint32_t *ctx_id) +{ + if (ctx == NULL) + return -EINVAL; + + *ctx_id = ctx->ctx_id; + + return 0; +} + +void +drm_intel_gem_context_destroy(drm_intel_context *ctx) +{ + drm_intel_bufmgr_gem *bufmgr_gem; + struct drm_i915_gem_context_destroy destroy; + int ret; + + if (ctx == NULL) + return; + + memclear(destroy); + + bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr; + destroy.ctx_id = ctx->ctx_id; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, + &destroy); + if (ret != 0) + fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n", + strerror(errno)); + + free(ctx); +} + +int +drm_intel_get_reset_stats(drm_intel_context *ctx, + uint32_t *reset_count, + uint32_t *active, + uint32_t *pending) +{ + drm_intel_bufmgr_gem *bufmgr_gem; + struct drm_i915_reset_stats stats; + int ret; + + if (ctx == NULL) + return -EINVAL; + + memclear(stats); + + bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr; + stats.ctx_id = ctx->ctx_id; + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GET_RESET_STATS, + &stats); + if (ret == 0) { + if (reset_count != NULL) + *reset_count = stats.reset_count; + + if (active != NULL) + *active = stats.batch_active; + + if (pending != NULL) + *pending = stats.batch_pending; + } + + return ret; +} + +int +drm_intel_reg_read(drm_intel_bufmgr *bufmgr, + uint32_t offset, + uint64_t *result) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; + struct drm_i915_reg_read reg_read; + int ret; + + memclear(reg_read); + reg_read.offset = offset; + + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_REG_READ, ®_read); + + *result = reg_read.val; + return ret; +} + +int +drm_intel_get_subslice_total(int fd, unsigned int *subslice_total) +{ + drm_i915_getparam_t gp; + int ret; + + memclear(gp); + gp.value = (int*)subslice_total; + gp.param = I915_PARAM_SUBSLICE_TOTAL; + ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret) + return -errno; + + return 0; +} + +int +drm_intel_get_eu_total(int fd, unsigned int *eu_total) +{ + drm_i915_getparam_t gp; + int ret; + + memclear(gp); + gp.value = (int*)eu_total; + gp.param = I915_PARAM_EU_TOTAL; + ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret) + return -errno; + + return 0; +} + +int +drm_intel_get_pooled_eu(int fd) +{ + drm_i915_getparam_t gp; + int ret = -1; + + memclear(gp); + gp.param = I915_PARAM_HAS_POOLED_EU; + gp.value = &ret; + if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp)) + return -errno; + + return ret; +} + +int +drm_intel_get_min_eu_in_pool(int fd) +{ + drm_i915_getparam_t gp; + int ret = -1; + + memclear(gp); + gp.param = I915_PARAM_MIN_EU_IN_POOL; + gp.value = &ret; + if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp)) + return -errno; + + return ret; +} + +/** + * Annotate the given bo for use in aub dumping. + * + * \param annotations is an array of drm_intel_aub_annotation objects + * describing the type of data in various sections of the bo. Each + * element of the array specifies the type and subtype of a section of + * the bo, and the past-the-end offset of that section. The elements + * of \c annotations must be sorted so that ending_offset is + * increasing. + * + * \param count is the number of elements in the \c annotations array. + * If \c count is zero, then \c annotations will not be dereferenced. + * + * Annotations are copied into a private data structure, so caller may + * re-use the memory pointed to by \c annotations after the call + * returns. + * + * Annotations are stored for the lifetime of the bo; to reset to the + * default state (no annotations), call this function with a \c count + * of zero. + */ +void +drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo, + drm_intel_aub_annotation *annotations, + unsigned count) +{ +} + +static pthread_mutex_t bufmgr_list_mutex = PTHREAD_MUTEX_INITIALIZER; +static drmMMListHead bufmgr_list = { &bufmgr_list, &bufmgr_list }; + +static drm_intel_bufmgr_gem * +drm_intel_bufmgr_gem_find(int fd) +{ + drm_intel_bufmgr_gem *bufmgr_gem; + + DRMLISTFOREACHENTRY(bufmgr_gem, &bufmgr_list, managers) { + if (bufmgr_gem->fd == fd) { + atomic_inc(&bufmgr_gem->refcount); + return bufmgr_gem; + } + } + + return NULL; +} + +static void +drm_intel_bufmgr_gem_unref(drm_intel_bufmgr *bufmgr) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; + + if (atomic_add_unless(&bufmgr_gem->refcount, -1, 1)) { + pthread_mutex_lock(&bufmgr_list_mutex); + + if (atomic_dec_and_test(&bufmgr_gem->refcount)) { + DRMLISTDEL(&bufmgr_gem->managers); + drm_intel_bufmgr_gem_destroy(bufmgr); + } + + pthread_mutex_unlock(&bufmgr_list_mutex); + } +} + +void *drm_intel_gem_bo_map__gtt(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + if (bo_gem->gtt_virtual) + return bo_gem->gtt_virtual; + + if (bo_gem->is_userptr) + return NULL; + + pthread_mutex_lock(&bufmgr_gem->lock); + if (bo_gem->gtt_virtual == NULL) { + struct drm_i915_gem_mmap_gtt mmap_arg; + void *ptr; + + DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n", + bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); + + if (bo_gem->map_count++ == 0) + drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); + + memclear(mmap_arg); + mmap_arg.handle = bo_gem->gem_handle; + + /* Get the fake offset back... */ + ptr = MAP_FAILED; + if (drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_MMAP_GTT, + &mmap_arg) == 0) { + /* and mmap it */ + ptr = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE, + MAP_SHARED, bufmgr_gem->fd, + mmap_arg.offset); + } + if (ptr == MAP_FAILED) { + if (--bo_gem->map_count == 0) + drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); + ptr = NULL; + } + + bo_gem->gtt_virtual = ptr; + } + pthread_mutex_unlock(&bufmgr_gem->lock); + + return bo_gem->gtt_virtual; +} + +void *drm_intel_gem_bo_map__cpu(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + if (bo_gem->mem_virtual) + return bo_gem->mem_virtual; + + if (bo_gem->is_userptr) { + /* Return the same user ptr */ + return bo_gem->user_virtual; + } + + pthread_mutex_lock(&bufmgr_gem->lock); + if (!bo_gem->mem_virtual) { + struct drm_i915_gem_mmap mmap_arg; + + if (bo_gem->map_count++ == 0) + drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); + + DBG("bo_map: %d (%s), map_count=%d\n", + bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); + + memclear(mmap_arg); + mmap_arg.handle = bo_gem->gem_handle; + mmap_arg.size = bo->size; + if (drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_MMAP, + &mmap_arg)) { + DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", + __FILE__, __LINE__, bo_gem->gem_handle, + bo_gem->name, strerror(errno)); + if (--bo_gem->map_count == 0) + drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); + } else { + VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1)); + bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; + } + } + pthread_mutex_unlock(&bufmgr_gem->lock); + + return bo_gem->mem_virtual; +} + +void *drm_intel_gem_bo_map__wc(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + if (bo_gem->wc_virtual) + return bo_gem->wc_virtual; + + if (bo_gem->is_userptr) + return NULL; + + pthread_mutex_lock(&bufmgr_gem->lock); + if (!bo_gem->wc_virtual) { + struct drm_i915_gem_mmap mmap_arg; + + if (bo_gem->map_count++ == 0) + drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); + + DBG("bo_map: %d (%s), map_count=%d\n", + bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); + + memclear(mmap_arg); + mmap_arg.handle = bo_gem->gem_handle; + mmap_arg.size = bo->size; + mmap_arg.flags = I915_MMAP_WC; + if (drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_MMAP, + &mmap_arg)) { + DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", + __FILE__, __LINE__, bo_gem->gem_handle, + bo_gem->name, strerror(errno)); + if (--bo_gem->map_count == 0) + drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); + } else { + VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1)); + bo_gem->wc_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; + } + } + pthread_mutex_unlock(&bufmgr_gem->lock); + + return bo_gem->wc_virtual; +} + +/** * Initializes the GEM buffer manager, which uses the kernel to allocate, map, * and manage map buffer objections. * @@ -2374,17 +3608,26 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) int ret, tmp; bool exec2 = false; + pthread_mutex_lock(&bufmgr_list_mutex); + + bufmgr_gem = drm_intel_bufmgr_gem_find(fd); + if (bufmgr_gem) + goto exit; + bufmgr_gem = calloc(1, sizeof(*bufmgr_gem)); if (bufmgr_gem == NULL) - return NULL; + goto exit; bufmgr_gem->fd = fd; + atomic_set(&bufmgr_gem->refcount, 1); if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) { free(bufmgr_gem); - return NULL; + bufmgr_gem = NULL; + goto exit; } + memclear(aperture); ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); @@ -2414,19 +3657,29 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) else if (IS_GEN6(bufmgr_gem->pci_device)) bufmgr_gem->gen = 6; else if (IS_GEN7(bufmgr_gem->pci_device)) - bufmgr_gem->gen = 7; - else - assert(0); + bufmgr_gem->gen = 7; + else if (IS_GEN8(bufmgr_gem->pci_device)) + bufmgr_gem->gen = 8; + else if (IS_GEN9(bufmgr_gem->pci_device)) + bufmgr_gem->gen = 9; + else if (IS_GEN10(bufmgr_gem->pci_device)) + bufmgr_gem->gen = 10; + else { + free(bufmgr_gem); + bufmgr_gem = NULL; + goto exit; + } if (IS_GEN3(bufmgr_gem->pci_device) && bufmgr_gem->gtt_size > 256*1024*1024) { /* The unmappable part of gtt on gen 3 (i.e. above 256MB) can't * be used for tiled blits. To simplify the accounting, just - * substract the unmappable part (fixed to 256MB on all known + * subtract the unmappable part (fixed to 256MB on all known * gen3 devices) if the kernel advertises it. */ bufmgr_gem->gtt_size -= 256*1024*1024; } + memclear(gp); gp.value = &tmp; gp.param = I915_PARAM_HAS_EXECBUF2; @@ -2446,16 +3699,35 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); bufmgr_gem->has_relaxed_fencing = ret == 0; + gp.param = I915_PARAM_HAS_EXEC_ASYNC; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + bufmgr_gem->has_exec_async = ret == 0; + + bufmgr_gem->bufmgr.bo_alloc_userptr = check_bo_alloc_userptr; + + gp.param = I915_PARAM_HAS_WAIT_TIMEOUT; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + bufmgr_gem->has_wait_timeout = ret == 0; + gp.param = I915_PARAM_HAS_LLC; ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); - if (ret == -EINVAL) { + if (ret != 0) { /* Kernel does not supports HAS_LLC query, fallback to GPU * generation detection and assume that we have LLC on GEN6/7 */ bufmgr_gem->has_llc = (IS_GEN6(bufmgr_gem->pci_device) | IS_GEN7(bufmgr_gem->pci_device)); } else - bufmgr_gem->has_llc = ret == 0; + bufmgr_gem->has_llc = *gp.value; + + gp.param = I915_PARAM_HAS_VEBOX; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + bufmgr_gem->has_vebox = (ret == 0) & (*gp.value > 0); + + gp.param = I915_PARAM_HAS_EXEC_SOFTPIN; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret == 0 && *gp.value > 0) + bufmgr_gem->bufmgr.bo_set_softpin_offset = drm_intel_gem_bo_set_softpin_offset; if (bufmgr_gem->gen < 4) { gp.param = I915_PARAM_NUM_FENCES_AVAIL; @@ -2483,6 +3755,13 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) } } + if (bufmgr_gem->gen >= 8) { + gp.param = I915_PARAM_HAS_ALIASING_PPGTT; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret == 0 && *gp.value == 3) + bufmgr_gem->bufmgr.bo_use_48b_address_range = drm_intel_gem_bo_use_48b_address_range; + } + /* Let's go with one relocation per every 2 dwords (but round down a bit * since a power of two will mean an extra page allocation for the reloc * buffer). @@ -2517,7 +3796,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec; bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy; bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise; - bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_destroy; + bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_unref; bufmgr_gem->bufmgr.debug = 0; bufmgr_gem->bufmgr.check_aperture_space = drm_intel_gem_check_aperture_space; @@ -2527,11 +3806,15 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) drm_intel_gem_get_pipe_from_crtc_id; bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references; - DRMINITLISTHEAD(&bufmgr_gem->named); init_cache_buckets(bufmgr_gem); DRMINITLISTHEAD(&bufmgr_gem->vma_cache); bufmgr_gem->vma_max = -1; /* unlimited by default */ - return &bufmgr_gem->bufmgr; + DRMLISTADD(&bufmgr_gem->managers, &bufmgr_list); + +exit: + pthread_mutex_unlock(&bufmgr_list_mutex); + + return bufmgr_gem != NULL ? &bufmgr_gem->bufmgr : NULL; }