X-Git-Url: http://git.osdn.net/view?a=blobdiff_plain;f=intel%2Fintel_bufmgr_gem.c;h=71f140f54dd0e611626124484c2b1214234492ba;hb=5c490bd6854a5b265aaa62ef62926c3ac97092c7;hp=7c947863cf5a902afb0b17cc4b558247fbd7778a;hpb=0f8da82500ec542e269092c0718479e25eaff5f6;p=android-x86%2Fexternal-libdrm.git diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c index 7c947863..71f140f5 100644 --- a/intel/intel_bufmgr_gem.c +++ b/intel/intel_bufmgr_gem.c @@ -61,10 +61,10 @@ #include "intel_bufmgr.h" #include "intel_bufmgr_priv.h" #include "intel_chipset.h" -#include "intel_aub.h" #include "string.h" #include "i915_drm.h" +#include "uthash.h" #ifdef HAVE_VALGRIND #include @@ -82,6 +82,23 @@ } while (0) #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#define MAX2(A, B) ((A) > (B) ? (A) : (B)) + +/** + * upper_32_bits - return bits 32-63 of a number + * @n: the number we're accessing + * + * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress + * the "right shift count >= width of type" warning when that quantity is + * 32-bits. + */ +#define upper_32_bits(n) ((__u32)(((n) >> 16) >> 16)) + +/** + * lower_32_bits - return bits 0-31 of a number + * @n: the number we're accessing + */ +#define lower_32_bits(n) ((__u32)(n)) typedef struct _drm_intel_bo_gem drm_intel_bo_gem; @@ -114,7 +131,9 @@ typedef struct _drm_intel_bufmgr_gem { drmMMListHead managers; - drmMMListHead named; + drm_intel_bo_gem *name_table; + drm_intel_bo_gem *handle_table; + drmMMListHead vma_cache; int vma_count, vma_open, vma_max; @@ -130,11 +149,14 @@ typedef struct _drm_intel_bufmgr_gem { unsigned int bo_reuse : 1; unsigned int no_exec : 1; unsigned int has_vebox : 1; + unsigned int has_exec_async : 1; bool fenced_relocs; - char *aub_filename; - FILE *aub_file; - uint32_t aub_offset; + struct { + void *ptr; + uint32_t handle; + } userptr_active; + } drm_intel_bufmgr_gem; #define DRM_INTEL_RELOC_FENCE (1<<0) @@ -157,7 +179,9 @@ struct _drm_intel_bo_gem { * List contains both flink named and prime fd'd objects */ unsigned int global_name; - drmMMListHead name_list; + + UT_hash_handle handle_hh; + UT_hash_handle name_hh; /** * Index of the buffer within the validation list while preparing a @@ -172,6 +196,8 @@ struct _drm_intel_bo_gem { uint32_t swizzle_mode; unsigned long stride; + unsigned long kflags; + time_t free_time; /** Array passed to the DRM containing relocation information. */ @@ -182,10 +208,19 @@ struct _drm_intel_bo_gem { drm_intel_reloc_target *reloc_target_info; /** Number of entries in relocs */ int reloc_count; + /** Array of BOs that are referenced by this buffer and will be softpinned */ + drm_intel_bo **softpin_target; + /** Number softpinned BOs that are referenced by this buffer */ + int softpin_target_count; + /** Maximum amount of softpinned BOs that are referenced by this buffer */ + int softpin_target_size; + /** Mapped address for the buffer, saved across map/unmap cycles */ void *mem_virtual; /** GTT virtual address for the buffer, saved across map/unmap cycles */ void *gtt_virtual; + /** WC CPU address for the buffer, saved across map/unmap cycles */ + void *wc_virtual; /** * Virtual address of the buffer allocated by user, used for userptr * objects only. @@ -224,7 +259,7 @@ struct _drm_intel_bo_gem { * Boolean of whether the GPU is definitely not accessing the buffer. * * This is only valid when reusable, since non-reusable - * buffers are those that have been shared wth other + * buffers are those that have been shared with other * processes, so we don't know their state. */ bool idle; @@ -248,13 +283,8 @@ struct _drm_intel_bo_gem { */ int reloc_tree_fences; - /** Flags that we may need to do the SW_FINSIH ioctl on unmap. */ + /** Flags that we may need to do the SW_FINISH ioctl on unmap. */ bool mapped_cpu_write; - - uint32_t aub_offset; - - drm_intel_aub_annotation *aub_annotations; - unsigned aub_annotation_count; }; static unsigned int @@ -279,6 +309,11 @@ static void drm_intel_gem_bo_unreference(drm_intel_bo *bo); static void drm_intel_gem_bo_free(drm_intel_bo *bo); +static inline drm_intel_bo_gem *to_bo_gem(drm_intel_bo *bo) +{ + return (drm_intel_bo_gem *)bo; +} + static unsigned long drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size, uint32_t *tiling_mode) @@ -387,8 +422,9 @@ drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; - if (bo_gem->relocs == NULL) { - DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle, + if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL) { + DBG("%2d: %d %s(%s)\n", i, bo_gem->gem_handle, + bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "", bo_gem->name); continue; } @@ -398,16 +434,36 @@ drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) drm_intel_bo_gem *target_gem = (drm_intel_bo_gem *) target_bo; - DBG("%2d: %d (%s)@0x%08llx -> " - "%d (%s)@0x%08lx + 0x%08x\n", + DBG("%2d: %d %s(%s)@0x%08x %08x -> " + "%d (%s)@0x%08x %08x + 0x%08x\n", i, - bo_gem->gem_handle, bo_gem->name, - (unsigned long long)bo_gem->relocs[j].offset, + bo_gem->gem_handle, + bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "", + bo_gem->name, + upper_32_bits(bo_gem->relocs[j].offset), + lower_32_bits(bo_gem->relocs[j].offset), target_gem->gem_handle, target_gem->name, - target_bo->offset64, + upper_32_bits(target_bo->offset64), + lower_32_bits(target_bo->offset64), bo_gem->relocs[j].delta); } + + for (j = 0; j < bo_gem->softpin_target_count; j++) { + drm_intel_bo *target_bo = bo_gem->softpin_target[j]; + drm_intel_bo_gem *target_gem = + (drm_intel_bo_gem *) target_bo; + DBG("%2d: %d %s(%s) -> " + "%d *(%s)@0x%08x %08x\n", + i, + bo_gem->gem_handle, + bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "", + bo_gem->name, + target_gem->gem_handle, + target_gem->name, + upper_32_bits(target_bo->offset64), + lower_32_bits(target_bo->offset64)); + } } } @@ -459,7 +515,7 @@ drm_intel_add_validate_buffer(drm_intel_bo *bo) bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle; bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count; bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs; - bufmgr_gem->exec_objects[index].alignment = 0; + bufmgr_gem->exec_objects[index].alignment = bo->align; bufmgr_gem->exec_objects[index].offset = 0; bufmgr_gem->exec_bos[index] = bo; bufmgr_gem->exec_count++; @@ -471,11 +527,14 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; int index; + unsigned long flags; + + flags = 0; + if (need_fence) + flags |= EXEC_OBJECT_NEEDS_FENCE; if (bo_gem->validate_index != -1) { - if (need_fence) - bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= - EXEC_OBJECT_NEEDS_FENCE; + bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= flags; return; } @@ -501,16 +560,12 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle; bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count; bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs; - bufmgr_gem->exec2_objects[index].alignment = 0; - bufmgr_gem->exec2_objects[index].offset = 0; - bufmgr_gem->exec_bos[index] = bo; - bufmgr_gem->exec2_objects[index].flags = 0; + bufmgr_gem->exec2_objects[index].alignment = bo->align; + bufmgr_gem->exec2_objects[index].offset = bo->offset64; + bufmgr_gem->exec2_objects[index].flags = bo_gem->kflags | flags; bufmgr_gem->exec2_objects[index].rsvd1 = 0; bufmgr_gem->exec2_objects[index].rsvd2 = 0; - if (need_fence) { - bufmgr_gem->exec2_objects[index].flags |= - EXEC_OBJECT_NEEDS_FENCE; - } + bufmgr_gem->exec_bos[index] = bo; bufmgr_gem->exec_count++; } @@ -519,9 +574,10 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) static void drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem, - drm_intel_bo_gem *bo_gem) + drm_intel_bo_gem *bo_gem, + unsigned int alignment) { - int size; + unsigned int size; assert(!bo_gem->used_as_reloc_target); @@ -533,7 +589,7 @@ drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem, */ size = bo_gem->bo.size; if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) { - int min_size; + unsigned int min_size; if (bufmgr_gem->has_relaxed_fencing) { if (bufmgr_gem->gen == 3) @@ -547,10 +603,10 @@ drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem, min_size = size; /* Account for worst-case alignment. */ - size = 2 * min_size; + alignment = MAX2(alignment, min_size); } - bo_gem->reloc_tree_size = size; + bo_gem->reloc_tree_size = size + alignment; } static int @@ -603,7 +659,6 @@ drm_intel_gem_bo_busy(drm_intel_bo *bo) } else { return false; } - return (ret == 0 && busy.busy); } static int @@ -655,7 +710,8 @@ drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, unsigned long size, unsigned long flags, uint32_t tiling_mode, - unsigned long stride) + unsigned long stride, + unsigned int alignment) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; drm_intel_bo_gem *bo_gem; @@ -697,7 +753,9 @@ retry: bucket->head.prev, head); DRMLISTDEL(&bo_gem->head); alloc_from_cache = true; + bo_gem->bo.align = alignment; } else { + assert(alignment == 0); /* For non-render-target BOs (where we're probably * going to map it first thing in order to fill it * with data), check if the last BO in the cache is @@ -730,14 +788,17 @@ retry: } } } - pthread_mutex_unlock(&bufmgr_gem->lock); if (!alloc_from_cache) { struct drm_i915_gem_create create; bo_gem = calloc(1, sizeof(*bo_gem)); if (!bo_gem) - return NULL; + goto err; + + /* drm_intel_gem_bo_free calls DRMLISTDEL() for an uninitialized + list (vma_list), so better set the list head here */ + DRMINITLISTHEAD(&bo_gem->vma_list); bo_gem->bo.size = bo_size; @@ -747,28 +808,28 @@ retry: ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CREATE, &create); - bo_gem->gem_handle = create.handle; - bo_gem->bo.handle = bo_gem->gem_handle; if (ret != 0) { free(bo_gem); - return NULL; + goto err; } + + bo_gem->gem_handle = create.handle; + HASH_ADD(handle_hh, bufmgr_gem->handle_table, + gem_handle, sizeof(bo_gem->gem_handle), + bo_gem); + + bo_gem->bo.handle = bo_gem->gem_handle; bo_gem->bo.bufmgr = bufmgr; + bo_gem->bo.align = alignment; bo_gem->tiling_mode = I915_TILING_NONE; bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; bo_gem->stride = 0; - /* drm_intel_gem_bo_free calls DRMLISTDEL() for an uninitialized - list (vma_list), so better set the list head here */ - DRMINITLISTHEAD(&bo_gem->name_list); - DRMINITLISTHEAD(&bo_gem->vma_list); if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, tiling_mode, - stride)) { - drm_intel_gem_bo_free(&bo_gem->bo); - return NULL; - } + stride)) + goto err_free; } bo_gem->name = name; @@ -778,15 +839,20 @@ retry: bo_gem->used_as_reloc_target = false; bo_gem->has_error = false; bo_gem->reusable = true; - bo_gem->aub_annotations = NULL; - bo_gem->aub_annotation_count = 0; - drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); + drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, alignment); + pthread_mutex_unlock(&bufmgr_gem->lock); DBG("bo_create: buf %d (%s) %ldb\n", bo_gem->gem_handle, bo_gem->name, size); return &bo_gem->bo; + +err_free: + drm_intel_gem_bo_free(&bo_gem->bo); +err: + pthread_mutex_unlock(&bufmgr_gem->lock); + return NULL; } static drm_intel_bo * @@ -797,7 +863,8 @@ drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, { return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, BO_ALLOC_FOR_RENDER, - I915_TILING_NONE, 0); + I915_TILING_NONE, 0, + alignment); } static drm_intel_bo * @@ -807,7 +874,7 @@ drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, unsigned int alignment) { return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0, - I915_TILING_NONE, 0); + I915_TILING_NONE, 0, 0); } static drm_intel_bo * @@ -859,7 +926,7 @@ drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name, stride = 0; return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags, - tiling, stride); + tiling, stride, 0); } static drm_intel_bo * @@ -886,6 +953,9 @@ drm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr, if (!bo_gem) return NULL; + atomic_set(&bo_gem->refcount, 1); + DRMINITLISTHEAD(&bo_gem->vma_list); + bo_gem->bo.size = size; memclear(userptr); @@ -904,6 +974,8 @@ drm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr, return NULL; } + pthread_mutex_lock(&bufmgr_gem->lock); + bo_gem->gem_handle = userptr.handle; bo_gem->bo.handle = bo_gem->gem_handle; bo_gem->bo.bufmgr = bufmgr; @@ -915,18 +987,19 @@ drm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr, bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; bo_gem->stride = 0; - DRMINITLISTHEAD(&bo_gem->name_list); - DRMINITLISTHEAD(&bo_gem->vma_list); + HASH_ADD(handle_hh, bufmgr_gem->handle_table, + gem_handle, sizeof(bo_gem->gem_handle), + bo_gem); bo_gem->name = name; - atomic_set(&bo_gem->refcount, 1); bo_gem->validate_index = -1; bo_gem->reloc_tree_fences = 0; bo_gem->used_as_reloc_target = false; bo_gem->has_error = false; bo_gem->reusable = false; - drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); + drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0); + pthread_mutex_unlock(&bufmgr_gem->lock); DBG("bo_create_userptr: " "ptr %p buf %d (%s) size %ldb, stride 0x%x, tile mode %d\n", @@ -943,7 +1016,6 @@ has_userptr(drm_intel_bufmgr_gem *bufmgr_gem) void *ptr; long pgsz; struct drm_i915_gem_userptr userptr; - struct drm_gem_close close_bo; pgsz = sysconf(_SC_PAGESIZE); assert(pgsz > 0); @@ -970,15 +1042,15 @@ retry: return false; } - memclear(close_bo); - close_bo.handle = userptr.handle; - ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close_bo); - free(ptr); - if (ret) { - fprintf(stderr, "Failed to release test userptr object! (%d) " - "i915 kernel driver may not be sane!\n", errno); - return false; - } + /* We don't release the userptr bo here as we want to keep the + * kernel mm tracking alive for our lifetime. The first time we + * create a userptr object the kernel has to install a mmu_notifer + * which is a heavyweight operation (e.g. it requires taking all + * mm_locks and stop_machine()). + */ + + bufmgr_gem->userptr_active.ptr = ptr; + bufmgr_gem->userptr_active.handle = userptr.handle; return true; } @@ -1017,7 +1089,6 @@ drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, int ret; struct drm_gem_open open_arg; struct drm_i915_gem_get_tiling get_tiling; - drmMMListHead *list; /* At the moment most applications only have a few named bo. * For instance, in a DRI client only the render buffers passed @@ -1026,15 +1097,11 @@ drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, * provides a sufficiently fast match. */ pthread_mutex_lock(&bufmgr_gem->lock); - for (list = bufmgr_gem->named.next; - list != &bufmgr_gem->named; - list = list->next) { - bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list); - if (bo_gem->global_name == handle) { - drm_intel_gem_bo_reference(&bo_gem->bo); - pthread_mutex_unlock(&bufmgr_gem->lock); - return &bo_gem->bo; - } + HASH_FIND(name_hh, bufmgr_gem->name_table, + &handle, sizeof(handle), bo_gem); + if (bo_gem) { + drm_intel_gem_bo_reference(&bo_gem->bo); + goto out; } memclear(open_arg); @@ -1045,29 +1112,26 @@ drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, if (ret != 0) { DBG("Couldn't reference %s handle 0x%08x: %s\n", name, handle, strerror(errno)); - pthread_mutex_unlock(&bufmgr_gem->lock); - return NULL; + bo_gem = NULL; + goto out; } /* Now see if someone has used a prime handle to get this * object from the kernel before by looking through the list * again for a matching gem_handle */ - for (list = bufmgr_gem->named.next; - list != &bufmgr_gem->named; - list = list->next) { - bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list); - if (bo_gem->gem_handle == open_arg.handle) { - drm_intel_gem_bo_reference(&bo_gem->bo); - pthread_mutex_unlock(&bufmgr_gem->lock); - return &bo_gem->bo; - } + HASH_FIND(handle_hh, bufmgr_gem->handle_table, + &open_arg.handle, sizeof(open_arg.handle), bo_gem); + if (bo_gem) { + drm_intel_gem_bo_reference(&bo_gem->bo); + goto out; } bo_gem = calloc(1, sizeof(*bo_gem)); - if (!bo_gem) { - pthread_mutex_unlock(&bufmgr_gem->lock); - return NULL; - } + if (!bo_gem) + goto out; + + atomic_set(&bo_gem->refcount, 1); + DRMINITLISTHEAD(&bo_gem->vma_list); bo_gem->bo.size = open_arg.size; bo_gem->bo.offset = 0; @@ -1075,34 +1139,39 @@ drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, bo_gem->bo.virtual = NULL; bo_gem->bo.bufmgr = bufmgr; bo_gem->name = name; - atomic_set(&bo_gem->refcount, 1); bo_gem->validate_index = -1; bo_gem->gem_handle = open_arg.handle; bo_gem->bo.handle = open_arg.handle; bo_gem->global_name = handle; bo_gem->reusable = false; + HASH_ADD(handle_hh, bufmgr_gem->handle_table, + gem_handle, sizeof(bo_gem->gem_handle), bo_gem); + HASH_ADD(name_hh, bufmgr_gem->name_table, + global_name, sizeof(bo_gem->global_name), bo_gem); + memclear(get_tiling); get_tiling.handle = bo_gem->gem_handle; ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling); - if (ret != 0) { - drm_intel_gem_bo_unreference(&bo_gem->bo); - pthread_mutex_unlock(&bufmgr_gem->lock); - return NULL; - } + if (ret != 0) + goto err_unref; + bo_gem->tiling_mode = get_tiling.tiling_mode; bo_gem->swizzle_mode = get_tiling.swizzle_mode; /* XXX stride is unknown */ - drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); - - DRMINITLISTHEAD(&bo_gem->vma_list); - DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); - pthread_mutex_unlock(&bufmgr_gem->lock); + drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0); DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name); +out: + pthread_mutex_unlock(&bufmgr_gem->lock); return &bo_gem->bo; + +err_unref: + drm_intel_gem_bo_free(&bo_gem->bo); + pthread_mutex_unlock(&bufmgr_gem->lock); + return NULL; } static void @@ -1119,11 +1188,20 @@ drm_intel_gem_bo_free(drm_intel_bo *bo) drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size); bufmgr_gem->vma_count--; } + if (bo_gem->wc_virtual) { + VG(VALGRIND_FREELIKE_BLOCK(bo_gem->wc_virtual, 0)); + drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size); + bufmgr_gem->vma_count--; + } if (bo_gem->gtt_virtual) { drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size); bufmgr_gem->vma_count--; } + if (bo_gem->global_name) + HASH_DELETE(name_hh, bufmgr_gem->name_table, bo_gem); + HASH_DELETE(handle_hh, bufmgr_gem->handle_table, bo_gem); + /* Close this object */ memclear(close); close.handle = bo_gem->gem_handle; @@ -1132,7 +1210,6 @@ drm_intel_gem_bo_free(drm_intel_bo *bo) DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n", bo_gem->gem_handle, bo_gem->name, strerror(errno)); } - free(bo_gem->aub_annotations); free(bo); } @@ -1145,6 +1222,9 @@ drm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo *bo) if (bo_gem->mem_virtual) VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size); + if (bo_gem->wc_virtual) + VALGRIND_MAKE_MEM_NOACCESS(bo_gem->wc_virtual, bo->size); + if (bo_gem->gtt_virtual) VALGRIND_MAKE_MEM_NOACCESS(bo_gem->gtt_virtual, bo->size); #endif @@ -1209,6 +1289,11 @@ static void drm_intel_gem_bo_purge_vma_cache(drm_intel_bufmgr_gem *bufmgr_gem) bo_gem->mem_virtual = NULL; bufmgr_gem->vma_count--; } + if (bo_gem->wc_virtual) { + drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size); + bo_gem->wc_virtual = NULL; + bufmgr_gem->vma_count--; + } if (bo_gem->gtt_virtual) { drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size); bo_gem->gtt_virtual = NULL; @@ -1224,6 +1309,8 @@ static void drm_intel_gem_bo_close_vma(drm_intel_bufmgr_gem *bufmgr_gem, DRMLISTADDTAIL(&bo_gem->vma_list, &bufmgr_gem->vma_cache); if (bo_gem->mem_virtual) bufmgr_gem->vma_count++; + if (bo_gem->wc_virtual) + bufmgr_gem->vma_count++; if (bo_gem->gtt_virtual) bufmgr_gem->vma_count++; drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); @@ -1236,6 +1323,8 @@ static void drm_intel_gem_bo_open_vma(drm_intel_bufmgr_gem *bufmgr_gem, DRMLISTDEL(&bo_gem->vma_list); if (bo_gem->mem_virtual) bufmgr_gem->vma_count--; + if (bo_gem->wc_virtual) + bufmgr_gem->vma_count--; if (bo_gem->gtt_virtual) bufmgr_gem->vma_count--; drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); @@ -1257,8 +1346,13 @@ drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) time); } } + for (i = 0; i < bo_gem->softpin_target_count; i++) + drm_intel_gem_bo_unreference_locked_timed(bo_gem->softpin_target[i], + time); + bo_gem->kflags = 0; bo_gem->reloc_count = 0; bo_gem->used_as_reloc_target = false; + bo_gem->softpin_target_count = 0; DBG("bo_unreference final: %d (%s)\n", bo_gem->gem_handle, bo_gem->name); @@ -1272,6 +1366,11 @@ drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) free(bo_gem->relocs); bo_gem->relocs = NULL; } + if (bo_gem->softpin_target) { + free(bo_gem->softpin_target); + bo_gem->softpin_target = NULL; + bo_gem->softpin_target_size = 0; + } /* Clear any left-over mappings */ if (bo_gem->map_count) { @@ -1281,8 +1380,6 @@ drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) drm_intel_gem_bo_mark_mmaps_incoherent(bo); } - DRMLISTDEL(&bo_gem->name_list); - bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size); /* Put the buffer into our internal cache for reuse if we can. */ if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL && @@ -1604,7 +1701,7 @@ static int drm_intel_gem_bo_unmap(drm_intel_bo *bo) } /* We need to unmap after every innovation as we cannot track - * an open vma for every bo as that will exhaasut the system + * an open vma for every bo as that will exhaust the system * limits and cause later failures. */ if (--bo_gem->map_count == 0) { @@ -1805,12 +1902,12 @@ static void drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; - int i; + struct drm_gem_close close_bo; + int i, ret; free(bufmgr_gem->exec2_objects); free(bufmgr_gem->exec_objects); free(bufmgr_gem->exec_bos); - free(bufmgr_gem->aub_filename); pthread_mutex_destroy(&bufmgr_gem->lock); @@ -1829,6 +1926,18 @@ drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr) } } + /* Release userptr bo kept hanging around for optimisation. */ + if (bufmgr_gem->userptr_active.ptr) { + memclear(close_bo); + close_bo.handle = bufmgr_gem->userptr_active.handle; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close_bo); + free(bufmgr_gem->userptr_active.ptr); + if (ret) + fprintf(stderr, + "Failed to release test userptr object! (%d) " + "i915 kernel driver may not be sane!\n", errno); + } + free(bufmgr); } @@ -1897,14 +2006,6 @@ do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences; } - bo_gem->relocs[bo_gem->reloc_count].offset = offset; - bo_gem->relocs[bo_gem->reloc_count].delta = target_offset; - bo_gem->relocs[bo_gem->reloc_count].target_handle = - target_bo_gem->gem_handle; - bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains; - bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain; - bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64; - bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo; if (target_bo != bo) drm_intel_gem_bo_reference(target_bo); @@ -1914,21 +2015,81 @@ do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, else bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0; + bo_gem->relocs[bo_gem->reloc_count].offset = offset; + bo_gem->relocs[bo_gem->reloc_count].delta = target_offset; + bo_gem->relocs[bo_gem->reloc_count].target_handle = + target_bo_gem->gem_handle; + bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains; + bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain; + bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64; bo_gem->reloc_count++; return 0; } +static void +drm_intel_gem_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + if (enable) + bo_gem->kflags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS; + else + bo_gem->kflags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS; +} + +static int +drm_intel_gem_bo_add_softpin_target(drm_intel_bo *bo, drm_intel_bo *target_bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; + if (bo_gem->has_error) + return -ENOMEM; + + if (target_bo_gem->has_error) { + bo_gem->has_error = true; + return -ENOMEM; + } + + if (!(target_bo_gem->kflags & EXEC_OBJECT_PINNED)) + return -EINVAL; + if (target_bo_gem == bo_gem) + return -EINVAL; + + if (bo_gem->softpin_target_count == bo_gem->softpin_target_size) { + int new_size = bo_gem->softpin_target_size * 2; + if (new_size == 0) + new_size = bufmgr_gem->max_relocs; + + bo_gem->softpin_target = realloc(bo_gem->softpin_target, new_size * + sizeof(drm_intel_bo *)); + if (!bo_gem->softpin_target) + return -ENOMEM; + + bo_gem->softpin_target_size = new_size; + } + bo_gem->softpin_target[bo_gem->softpin_target_count] = target_bo; + drm_intel_gem_bo_reference(target_bo); + bo_gem->softpin_target_count++; + + return 0; +} + static int drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, drm_intel_bo *target_bo, uint32_t target_offset, uint32_t read_domains, uint32_t write_domain) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; + drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *)target_bo; - return do_bo_emit_reloc(bo, offset, target_bo, target_offset, - read_domains, write_domain, - !bufmgr_gem->fenced_relocs); + if (target_bo_gem->kflags & EXEC_OBJECT_PINNED) + return drm_intel_gem_bo_add_softpin_target(bo, target_bo); + else + return do_bo_emit_reloc(bo, offset, target_bo, target_offset, + read_domains, write_domain, + !bufmgr_gem->fenced_relocs); } static int @@ -1961,6 +2122,8 @@ drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo) * * Any further drm_intel_bufmgr_check_aperture_space() queries * involving this buffer in the tree are undefined after this call. + * + * This also removes all softpinned targets being referenced by the BO. */ void drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start) @@ -1987,6 +2150,12 @@ drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start) } bo_gem->reloc_count = start; + for (i = 0; i < bo_gem->softpin_target_count; i++) { + drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->softpin_target[i]; + drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo, time.tv_sec); + } + bo_gem->softpin_target_count = 0; + pthread_mutex_unlock(&bufmgr_gem->lock); } @@ -2027,7 +2196,7 @@ drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo) drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; int i; - if (bo_gem->relocs == NULL) + if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL) return; for (i = 0; i < bo_gem->reloc_count; i++) { @@ -2048,6 +2217,17 @@ drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo) /* Add the target to the validate list */ drm_intel_add_validate_buffer2(target_bo, need_fence); } + + for (i = 0; i < bo_gem->softpin_target_count; i++) { + drm_intel_bo *target_bo = bo_gem->softpin_target[i]; + + if (target_bo == bo) + continue; + + drm_intel_gem_bo_mark_mmaps_incoherent(bo); + drm_intel_gem_bo_process_reloc2(target_bo); + drm_intel_add_validate_buffer2(target_bo, false); + } } @@ -2062,10 +2242,12 @@ drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem) /* Update the buffer offset */ if (bufmgr_gem->exec_objects[i].offset != bo->offset64) { - DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", - bo_gem->gem_handle, bo_gem->name, bo->offset64, - (unsigned long long)bufmgr_gem->exec_objects[i]. - offset); + DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n", + bo_gem->gem_handle, bo_gem->name, + upper_32_bits(bo->offset64), + lower_32_bits(bo->offset64), + upper_32_bits(bufmgr_gem->exec_objects[i].offset), + lower_32_bits(bufmgr_gem->exec_objects[i].offset)); bo->offset64 = bufmgr_gem->exec_objects[i].offset; bo->offset = bufmgr_gem->exec_objects[i].offset; } @@ -2083,386 +2265,110 @@ drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem) /* Update the buffer offset */ if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) { - DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", - bo_gem->gem_handle, bo_gem->name, bo->offset64, - (unsigned long long)bufmgr_gem->exec2_objects[i].offset); + /* If we're seeing softpinned object here it means that the kernel + * has relocated our object... Indicating a programming error + */ + assert(!(bo_gem->kflags & EXEC_OBJECT_PINNED)); + DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n", + bo_gem->gem_handle, bo_gem->name, + upper_32_bits(bo->offset64), + lower_32_bits(bo->offset64), + upper_32_bits(bufmgr_gem->exec2_objects[i].offset), + lower_32_bits(bufmgr_gem->exec2_objects[i].offset)); bo->offset64 = bufmgr_gem->exec2_objects[i].offset; bo->offset = bufmgr_gem->exec2_objects[i].offset; } } } -static void -aub_out(drm_intel_bufmgr_gem *bufmgr_gem, uint32_t data) -{ - fwrite(&data, 1, 4, bufmgr_gem->aub_file); -} - -static void -aub_out_data(drm_intel_bufmgr_gem *bufmgr_gem, void *data, size_t size) +void +drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo, + int x1, int y1, int width, int height, + enum aub_dump_bmp_format format, + int pitch, int offset) { - fwrite(data, 1, size, bufmgr_gem->aub_file); } -static void -aub_write_bo_data(drm_intel_bo *bo, uint32_t offset, uint32_t size) +static int +drm_intel_gem_bo_exec(drm_intel_bo *bo, int used, + drm_clip_rect_t * cliprects, int num_cliprects, int DR4) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; - drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; - uint32_t *data; - unsigned int i; + struct drm_i915_gem_execbuffer execbuf; + int ret, i; - data = malloc(bo->size); - drm_intel_bo_get_subdata(bo, offset, size, data); + if (to_bo_gem(bo)->has_error) + return -ENOMEM; - /* Easy mode: write out bo with no relocations */ - if (!bo_gem->reloc_count) { - aub_out_data(bufmgr_gem, data, size); - free(data); - return; - } + pthread_mutex_lock(&bufmgr_gem->lock); + /* Update indices and set up the validate list. */ + drm_intel_gem_bo_process_reloc(bo); + + /* Add the batch buffer to the validation list. There are no + * relocations pointing to it. + */ + drm_intel_add_validate_buffer(bo); - /* Otherwise, handle the relocations while writing. */ - for (i = 0; i < size / 4; i++) { - int r; - for (r = 0; r < bo_gem->reloc_count; r++) { - struct drm_i915_gem_relocation_entry *reloc; - drm_intel_reloc_target *info; + memclear(execbuf); + execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects; + execbuf.buffer_count = bufmgr_gem->exec_count; + execbuf.batch_start_offset = 0; + execbuf.batch_len = used; + execbuf.cliprects_ptr = (uintptr_t) cliprects; + execbuf.num_cliprects = num_cliprects; + execbuf.DR1 = 0; + execbuf.DR4 = DR4; - reloc = &bo_gem->relocs[r]; - info = &bo_gem->reloc_target_info[r]; + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_EXECBUFFER, + &execbuf); + if (ret != 0) { + ret = -errno; + if (errno == ENOSPC) { + DBG("Execbuffer fails to pin. " + "Estimate: %u. Actual: %u. Available: %u\n", + drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, + bufmgr_gem-> + exec_count), + drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, + bufmgr_gem-> + exec_count), + (unsigned int)bufmgr_gem->gtt_size); + } + } + drm_intel_update_buffer_offsets(bufmgr_gem); - if (reloc->offset == offset + i * 4) { - drm_intel_bo_gem *target_gem; - uint32_t val; + if (bufmgr_gem->bufmgr.debug) + drm_intel_gem_dump_validation_list(bufmgr_gem); - target_gem = (drm_intel_bo_gem *)info->bo; + for (i = 0; i < bufmgr_gem->exec_count; i++) { + drm_intel_bo_gem *bo_gem = to_bo_gem(bufmgr_gem->exec_bos[i]); - val = reloc->delta; - val += target_gem->aub_offset; + bo_gem->idle = false; - aub_out(bufmgr_gem, val); - data[i] = val; - break; - } - } - if (r == bo_gem->reloc_count) { - /* no relocation, just the data */ - aub_out(bufmgr_gem, data[i]); - } + /* Disconnect the buffer from the validate list */ + bo_gem->validate_index = -1; + bufmgr_gem->exec_bos[i] = NULL; } + bufmgr_gem->exec_count = 0; + pthread_mutex_unlock(&bufmgr_gem->lock); - free(data); + return ret; } -static void -aub_bo_get_address(drm_intel_bo *bo) +static int +do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx, + drm_clip_rect_t *cliprects, int num_cliprects, int DR4, + int in_fence, int *out_fence, + unsigned int flags) { - drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; - drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; + struct drm_i915_gem_execbuffer2 execbuf; + int ret = 0; + int i; - /* Give the object a graphics address in the AUB file. We - * don't just use the GEM object address because we do AUB - * dumping before execution -- we want to successfully log - * when the hardware might hang, and we might even want to aub - * capture for a driver trying to execute on a different - * generation of hardware by disabling the actual kernel exec - * call. - */ - bo_gem->aub_offset = bufmgr_gem->aub_offset; - bufmgr_gem->aub_offset += bo->size; - /* XXX: Handle aperture overflow. */ - assert(bufmgr_gem->aub_offset < 256 * 1024 * 1024); -} - -static void -aub_write_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype, - uint32_t offset, uint32_t size) -{ - drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; - drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; - - aub_out(bufmgr_gem, - CMD_AUB_TRACE_HEADER_BLOCK | - ((bufmgr_gem->gen >= 8 ? 6 : 5) - 2)); - aub_out(bufmgr_gem, - AUB_TRACE_MEMTYPE_GTT | type | AUB_TRACE_OP_DATA_WRITE); - aub_out(bufmgr_gem, subtype); - aub_out(bufmgr_gem, bo_gem->aub_offset + offset); - aub_out(bufmgr_gem, size); - if (bufmgr_gem->gen >= 8) - aub_out(bufmgr_gem, 0); - aub_write_bo_data(bo, offset, size); -} - -/** - * Break up large objects into multiple writes. Otherwise a 128kb VBO - * would overflow the 16 bits of size field in the packet header and - * everything goes badly after that. - */ -static void -aub_write_large_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype, - uint32_t offset, uint32_t size) -{ - uint32_t block_size; - uint32_t sub_offset; - - for (sub_offset = 0; sub_offset < size; sub_offset += block_size) { - block_size = size - sub_offset; - - if (block_size > 8 * 4096) - block_size = 8 * 4096; - - aub_write_trace_block(bo, type, subtype, offset + sub_offset, - block_size); - } -} - -static void -aub_write_bo(drm_intel_bo *bo) -{ - drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; - uint32_t offset = 0; - unsigned i; - - aub_bo_get_address(bo); - - /* Write out each annotated section separately. */ - for (i = 0; i < bo_gem->aub_annotation_count; ++i) { - drm_intel_aub_annotation *annotation = - &bo_gem->aub_annotations[i]; - uint32_t ending_offset = annotation->ending_offset; - if (ending_offset > bo->size) - ending_offset = bo->size; - if (ending_offset > offset) { - aub_write_large_trace_block(bo, annotation->type, - annotation->subtype, - offset, - ending_offset - offset); - offset = ending_offset; - } - } - - /* Write out any remaining unannotated data */ - if (offset < bo->size) { - aub_write_large_trace_block(bo, AUB_TRACE_TYPE_NOTYPE, 0, - offset, bo->size - offset); - } -} - -/* - * Make a ringbuffer on fly and dump it - */ -static void -aub_build_dump_ringbuffer(drm_intel_bufmgr_gem *bufmgr_gem, - uint32_t batch_buffer, int ring_flag) -{ - uint32_t ringbuffer[4096]; - int ring = AUB_TRACE_TYPE_RING_PRB0; /* The default ring */ - int ring_count = 0; - - if (ring_flag == I915_EXEC_BSD) - ring = AUB_TRACE_TYPE_RING_PRB1; - else if (ring_flag == I915_EXEC_BLT) - ring = AUB_TRACE_TYPE_RING_PRB2; - - /* Make a ring buffer to execute our batchbuffer. */ - memset(ringbuffer, 0, sizeof(ringbuffer)); - if (bufmgr_gem->gen >= 8) { - ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START | (3 - 2); - ringbuffer[ring_count++] = batch_buffer; - ringbuffer[ring_count++] = 0; - } else { - ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START; - ringbuffer[ring_count++] = batch_buffer; - } - - /* Write out the ring. This appears to trigger execution of - * the ring in the simulator. - */ - aub_out(bufmgr_gem, - CMD_AUB_TRACE_HEADER_BLOCK | - ((bufmgr_gem->gen >= 8 ? 6 : 5) - 2)); - aub_out(bufmgr_gem, - AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE); - aub_out(bufmgr_gem, 0); /* general/surface subtype */ - aub_out(bufmgr_gem, bufmgr_gem->aub_offset); - aub_out(bufmgr_gem, ring_count * 4); - if (bufmgr_gem->gen >= 8) - aub_out(bufmgr_gem, 0); - - /* FIXME: Need some flush operations here? */ - aub_out_data(bufmgr_gem, ringbuffer, ring_count * 4); - - /* Update offset pointer */ - bufmgr_gem->aub_offset += 4096; -} - -void -drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo, - int x1, int y1, int width, int height, - enum aub_dump_bmp_format format, - int pitch, int offset) -{ - drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; - drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; - uint32_t cpp; - - switch (format) { - case AUB_DUMP_BMP_FORMAT_8BIT: - cpp = 1; - break; - case AUB_DUMP_BMP_FORMAT_ARGB_4444: - cpp = 2; - break; - case AUB_DUMP_BMP_FORMAT_ARGB_0888: - case AUB_DUMP_BMP_FORMAT_ARGB_8888: - cpp = 4; - break; - default: - printf("Unknown AUB dump format %d\n", format); - return; - } - - if (!bufmgr_gem->aub_file) - return; - - aub_out(bufmgr_gem, CMD_AUB_DUMP_BMP | 4); - aub_out(bufmgr_gem, (y1 << 16) | x1); - aub_out(bufmgr_gem, - (format << 24) | - (cpp << 19) | - pitch / 4); - aub_out(bufmgr_gem, (height << 16) | width); - aub_out(bufmgr_gem, bo_gem->aub_offset + offset); - aub_out(bufmgr_gem, - ((bo_gem->tiling_mode != I915_TILING_NONE) ? (1 << 2) : 0) | - ((bo_gem->tiling_mode == I915_TILING_Y) ? (1 << 3) : 0)); -} - -static void -aub_exec(drm_intel_bo *bo, int ring_flag, int used) -{ - drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; - drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; - int i; - bool batch_buffer_needs_annotations; - - if (!bufmgr_gem->aub_file) - return; - - /* If batch buffer is not annotated, annotate it the best we - * can. - */ - batch_buffer_needs_annotations = bo_gem->aub_annotation_count == 0; - if (batch_buffer_needs_annotations) { - drm_intel_aub_annotation annotations[2] = { - { AUB_TRACE_TYPE_BATCH, 0, used }, - { AUB_TRACE_TYPE_NOTYPE, 0, bo->size } - }; - drm_intel_bufmgr_gem_set_aub_annotations(bo, annotations, 2); - } - - /* Write out all buffers to AUB memory */ - for (i = 0; i < bufmgr_gem->exec_count; i++) { - aub_write_bo(bufmgr_gem->exec_bos[i]); - } - - /* Remove any annotations we added */ - if (batch_buffer_needs_annotations) - drm_intel_bufmgr_gem_set_aub_annotations(bo, NULL, 0); - - /* Dump ring buffer */ - aub_build_dump_ringbuffer(bufmgr_gem, bo_gem->aub_offset, ring_flag); - - fflush(bufmgr_gem->aub_file); - - /* - * One frame has been dumped. So reset the aub_offset for the next frame. - * - * FIXME: Can we do this? - */ - bufmgr_gem->aub_offset = 0x10000; -} - -static int -drm_intel_gem_bo_exec(drm_intel_bo *bo, int used, - drm_clip_rect_t * cliprects, int num_cliprects, int DR4) -{ - drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; - drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; - struct drm_i915_gem_execbuffer execbuf; - int ret, i; - - if (bo_gem->has_error) - return -ENOMEM; - - pthread_mutex_lock(&bufmgr_gem->lock); - /* Update indices and set up the validate list. */ - drm_intel_gem_bo_process_reloc(bo); - - /* Add the batch buffer to the validation list. There are no - * relocations pointing to it. - */ - drm_intel_add_validate_buffer(bo); - - memclear(execbuf); - execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects; - execbuf.buffer_count = bufmgr_gem->exec_count; - execbuf.batch_start_offset = 0; - execbuf.batch_len = used; - execbuf.cliprects_ptr = (uintptr_t) cliprects; - execbuf.num_cliprects = num_cliprects; - execbuf.DR1 = 0; - execbuf.DR4 = DR4; - - ret = drmIoctl(bufmgr_gem->fd, - DRM_IOCTL_I915_GEM_EXECBUFFER, - &execbuf); - if (ret != 0) { - ret = -errno; - if (errno == ENOSPC) { - DBG("Execbuffer fails to pin. " - "Estimate: %u. Actual: %u. Available: %u\n", - drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, - bufmgr_gem-> - exec_count), - drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, - bufmgr_gem-> - exec_count), - (unsigned int)bufmgr_gem->gtt_size); - } - } - drm_intel_update_buffer_offsets(bufmgr_gem); - - if (bufmgr_gem->bufmgr.debug) - drm_intel_gem_dump_validation_list(bufmgr_gem); - - for (i = 0; i < bufmgr_gem->exec_count; i++) { - drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; - drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; - - bo_gem->idle = false; - - /* Disconnect the buffer from the validate list */ - bo_gem->validate_index = -1; - bufmgr_gem->exec_bos[i] = NULL; - } - bufmgr_gem->exec_count = 0; - pthread_mutex_unlock(&bufmgr_gem->lock); - - return ret; -} - -static int -do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx, - drm_clip_rect_t *cliprects, int num_cliprects, int DR4, - unsigned int flags) -{ - drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; - struct drm_i915_gem_execbuffer2 execbuf; - int ret = 0; - int i; + if (to_bo_gem(bo)->has_error) + return -ENOMEM; switch (flags & 0x7) { default: @@ -2508,14 +2414,20 @@ do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx, else i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id); execbuf.rsvd2 = 0; - - aub_exec(bo, flags, used); + if (in_fence != -1) { + execbuf.rsvd2 = in_fence; + execbuf.flags |= I915_EXEC_FENCE_IN; + } + if (out_fence != NULL) { + *out_fence = -1; + execbuf.flags |= I915_EXEC_FENCE_OUT; + } if (bufmgr_gem->no_exec) goto skip_execution; ret = drmIoctl(bufmgr_gem->fd, - DRM_IOCTL_I915_GEM_EXECBUFFER2, + DRM_IOCTL_I915_GEM_EXECBUFFER2_WR, &execbuf); if (ret != 0) { ret = -errno; @@ -2531,13 +2443,15 @@ do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx, } drm_intel_update_buffer_offsets2(bufmgr_gem); + if (ret == 0 && out_fence != NULL) + *out_fence = execbuf.rsvd2 >> 32; + skip_execution: if (bufmgr_gem->bufmgr.debug) drm_intel_gem_dump_validation_list(bufmgr_gem); for (i = 0; i < bufmgr_gem->exec_count; i++) { - drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; - drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; + drm_intel_bo_gem *bo_gem = to_bo_gem(bufmgr_gem->exec_bos[i]); bo_gem->idle = false; @@ -2557,7 +2471,7 @@ drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used, int DR4) { return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4, - I915_EXEC_RENDER); + -1, NULL, I915_EXEC_RENDER); } static int @@ -2566,14 +2480,25 @@ drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, unsigned int flags) { return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4, - flags); + -1, NULL, flags); } int drm_intel_gem_bo_context_exec(drm_intel_bo *bo, drm_intel_context *ctx, int used, unsigned int flags) { - return do_exec2(bo, used, ctx, NULL, 0, 0, flags); + return do_exec2(bo, used, ctx, NULL, 0, 0, -1, NULL, flags); +} + +int +drm_intel_gem_bo_fence_exec(drm_intel_bo *bo, + drm_intel_context *ctx, + int used, + int in_fence, + int *out_fence, + unsigned int flags) +{ + return do_exec2(bo, used, ctx, NULL, 0, 0, in_fence, out_fence, flags); } static int @@ -2677,7 +2602,7 @@ drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, ret = drm_intel_gem_bo_set_tiling_internal(bo, *tiling_mode, stride); if (ret == 0) - drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); + drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0); *tiling_mode = bo_gem->tiling_mode; return ret; @@ -2694,6 +2619,18 @@ drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, return 0; } +static int +drm_intel_gem_bo_set_softpin_offset(drm_intel_bo *bo, uint64_t offset) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + bo->offset64 = offset; + bo->offset = offset; + bo_gem->kflags |= EXEC_OBJECT_PINNED; + + return 0; +} + drm_intel_bo * drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int size) { @@ -2702,38 +2639,34 @@ drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int s uint32_t handle; drm_intel_bo_gem *bo_gem; struct drm_i915_gem_get_tiling get_tiling; - drmMMListHead *list; + pthread_mutex_lock(&bufmgr_gem->lock); ret = drmPrimeFDToHandle(bufmgr_gem->fd, prime_fd, &handle); + if (ret) { + DBG("create_from_prime: failed to obtain handle from fd: %s\n", strerror(errno)); + pthread_mutex_unlock(&bufmgr_gem->lock); + return NULL; + } /* * See if the kernel has already returned this buffer to us. Just as * for named buffers, we must not create two bo's pointing at the same * kernel object */ - pthread_mutex_lock(&bufmgr_gem->lock); - for (list = bufmgr_gem->named.next; - list != &bufmgr_gem->named; - list = list->next) { - bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list); - if (bo_gem->gem_handle == handle) { - drm_intel_gem_bo_reference(&bo_gem->bo); - pthread_mutex_unlock(&bufmgr_gem->lock); - return &bo_gem->bo; - } - } - - if (ret) { - fprintf(stderr,"ret is %d %d\n", ret, errno); - pthread_mutex_unlock(&bufmgr_gem->lock); - return NULL; + HASH_FIND(handle_hh, bufmgr_gem->handle_table, + &handle, sizeof(handle), bo_gem); + if (bo_gem) { + drm_intel_gem_bo_reference(&bo_gem->bo); + goto out; } bo_gem = calloc(1, sizeof(*bo_gem)); - if (!bo_gem) { - pthread_mutex_unlock(&bufmgr_gem->lock); - return NULL; - } + if (!bo_gem) + goto out; + + atomic_set(&bo_gem->refcount, 1); + DRMINITLISTHEAD(&bo_gem->vma_list); + /* Determine size of bo. The fd-to-handle ioctl really should * return the size, but it doesn't. If we have kernel 3.12 or * later, we can lseek on the prime fd to get the size. Older @@ -2749,8 +2682,8 @@ drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int s bo_gem->bo.bufmgr = bufmgr; bo_gem->gem_handle = handle; - - atomic_set(&bo_gem->refcount, 1); + HASH_ADD(handle_hh, bufmgr_gem->handle_table, + gem_handle, sizeof(bo_gem->gem_handle), bo_gem); bo_gem->name = "prime"; bo_gem->validate_index = -1; @@ -2759,25 +2692,26 @@ drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int s bo_gem->has_error = false; bo_gem->reusable = false; - DRMINITLISTHEAD(&bo_gem->vma_list); - DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); - pthread_mutex_unlock(&bufmgr_gem->lock); - memclear(get_tiling); get_tiling.handle = bo_gem->gem_handle; - ret = drmIoctl(bufmgr_gem->fd, - DRM_IOCTL_I915_GEM_GET_TILING, - &get_tiling); - if (ret != 0) { - drm_intel_gem_bo_unreference(&bo_gem->bo); - return NULL; - } + if (drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_GET_TILING, + &get_tiling)) + goto err; + bo_gem->tiling_mode = get_tiling.tiling_mode; bo_gem->swizzle_mode = get_tiling.swizzle_mode; /* XXX stride is unknown */ - drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); + drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0); +out: + pthread_mutex_unlock(&bufmgr_gem->lock); return &bo_gem->bo; + +err: + drm_intel_gem_bo_free(&bo_gem->bo); + pthread_mutex_unlock(&bufmgr_gem->lock); + return NULL; } int @@ -2786,11 +2720,6 @@ drm_intel_bo_gem_export_to_prime(drm_intel_bo *bo, int *prime_fd) drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; - pthread_mutex_lock(&bufmgr_gem->lock); - if (DRMLISTEMPTY(&bo_gem->name_list)) - DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); - pthread_mutex_unlock(&bufmgr_gem->lock); - if (drmPrimeHandleToFD(bufmgr_gem->fd, bo_gem->gem_handle, DRM_CLOEXEC, prime_fd) != 0) return -errno; @@ -2805,27 +2734,24 @@ drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; - int ret; if (!bo_gem->global_name) { struct drm_gem_flink flink; memclear(flink); flink.handle = bo_gem->gem_handle; + if (drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink)) + return -errno; pthread_mutex_lock(&bufmgr_gem->lock); + if (!bo_gem->global_name) { + bo_gem->global_name = flink.name; + bo_gem->reusable = false; - ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink); - if (ret != 0) { - pthread_mutex_unlock(&bufmgr_gem->lock); - return -errno; + HASH_ADD(name_hh, bufmgr_gem->name_table, + global_name, sizeof(bo_gem->global_name), + bo_gem); } - - bo_gem->global_name = flink.name; - bo_gem->reusable = false; - - if (DRMLISTEMPTY(&bo_gem->name_list)) - DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); pthread_mutex_unlock(&bufmgr_gem->lock); } @@ -2849,6 +2775,59 @@ drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr) } /** + * Disables implicit synchronisation before executing the bo + * + * This will cause rendering corruption unless you correctly manage explicit + * fences for all rendering involving this buffer - including use by others. + * Disabling the implicit serialisation is only required if that serialisation + * is too coarse (for example, you have split the buffer into many + * non-overlapping regions and are sharing the whole buffer between concurrent + * independent command streams). + * + * Note the kernel must advertise support via I915_PARAM_HAS_EXEC_ASYNC, + * which can be checked using drm_intel_bufmgr_can_disable_implicit_sync, + * or subsequent execbufs involving the bo will generate EINVAL. + */ +void +drm_intel_gem_bo_disable_implicit_sync(drm_intel_bo *bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + bo_gem->kflags |= EXEC_OBJECT_ASYNC; +} + +/** + * Enables implicit synchronisation before executing the bo + * + * This is the default behaviour of the kernel, to wait upon prior writes + * completing on the object before rendering with it, or to wait for prior + * reads to complete before writing into the object. + * drm_intel_gem_bo_disable_implicit_sync() can stop this behaviour, telling + * the kernel never to insert a stall before using the object. Then this + * function can be used to restore the implicit sync before subsequent + * rendering. + */ +void +drm_intel_gem_bo_enable_implicit_sync(drm_intel_bo *bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + bo_gem->kflags &= ~EXEC_OBJECT_ASYNC; +} + +/** + * Query whether the kernel supports disabling of its implicit synchronisation + * before execbuf. See drm_intel_gem_bo_disable_implicit_sync() + */ +int +drm_intel_bufmgr_gem_can_disable_implicit_sync(drm_intel_bufmgr *bufmgr) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; + + return bufmgr_gem->has_exec_async; +} + +/** * Enable use of fenced reloc type. * * New code should enable this to avoid unnecessary fence register @@ -3072,6 +3051,13 @@ _drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) return 1; } + for (i = 0; i< bo_gem->softpin_target_count; i++) { + if (bo_gem->softpin_target[i] == target_bo) + return 1; + if (_drm_intel_gem_bo_references(bo_gem->softpin_target[i], target_bo)) + return 1; + } + return 0; } @@ -3137,6 +3123,34 @@ drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, int limit) drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); } +static int +parse_devid_override(const char *devid_override) +{ + static const struct { + const char *name; + int pci_id; + } name_map[] = { + { "brw", PCI_CHIP_I965_GM }, + { "g4x", PCI_CHIP_GM45_GM }, + { "ilk", PCI_CHIP_ILD_G }, + { "snb", PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS }, + { "ivb", PCI_CHIP_IVYBRIDGE_S_GT2 }, + { "hsw", PCI_CHIP_HASWELL_CRW_E_GT3 }, + { "byt", PCI_CHIP_VALLEYVIEW_3 }, + { "bdw", 0x1620 | BDW_ULX }, + { "skl", PCI_CHIP_SKYLAKE_DT_GT2 }, + { "kbl", PCI_CHIP_KABYLAKE_DT_GT2 }, + }; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(name_map); i++) { + if (!strcmp(name_map[i].name, devid_override)) + return name_map[i].pci_id; + } + + return strtod(devid_override, NULL); +} + /** * Get the PCI ID for the device. This can be overridden by setting the * INTEL_DEVID_OVERRIDE environment variable to the desired ID. @@ -3153,7 +3167,7 @@ get_pci_device_id(drm_intel_bufmgr_gem *bufmgr_gem) devid_override = getenv("INTEL_DEVID_OVERRIDE"); if (devid_override) { bufmgr_gem->no_exec = true; - return strtod(devid_override, NULL); + return parse_devid_override(devid_override); } } @@ -3186,11 +3200,6 @@ void drm_intel_bufmgr_gem_set_aub_filename(drm_intel_bufmgr *bufmgr, const char *filename) { - drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; - - free(bufmgr_gem->aub_filename); - if (filename) - bufmgr_gem->aub_filename = strdup(filename); } /** @@ -3204,58 +3213,11 @@ drm_intel_bufmgr_gem_set_aub_filename(drm_intel_bufmgr *bufmgr, void drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable) { - drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; - int entry = 0x200003; - int i; - int gtt_size = 0x10000; - const char *filename; - - if (!enable) { - if (bufmgr_gem->aub_file) { - fclose(bufmgr_gem->aub_file); - bufmgr_gem->aub_file = NULL; - } - return; - } - - if (geteuid() != getuid()) - return; - - if (bufmgr_gem->aub_filename) - filename = bufmgr_gem->aub_filename; - else - filename = "intel.aub"; - bufmgr_gem->aub_file = fopen(filename, "w+"); - if (!bufmgr_gem->aub_file) - return; - - /* Start allocating objects from just after the GTT. */ - bufmgr_gem->aub_offset = gtt_size; - - /* Start with a (required) version packet. */ - aub_out(bufmgr_gem, CMD_AUB_HEADER | (13 - 2)); - aub_out(bufmgr_gem, - (4 << AUB_HEADER_MAJOR_SHIFT) | - (0 << AUB_HEADER_MINOR_SHIFT)); - for (i = 0; i < 8; i++) { - aub_out(bufmgr_gem, 0); /* app name */ - } - aub_out(bufmgr_gem, 0); /* timestamp */ - aub_out(bufmgr_gem, 0); /* timestamp */ - aub_out(bufmgr_gem, 0); /* comment len */ - - /* Set up the GTT. The max we can handle is 256M */ - aub_out(bufmgr_gem, CMD_AUB_TRACE_HEADER_BLOCK | ((bufmgr_gem->gen >= 8 ? 6 : 5) - 2)); - /* Need to use GTT_ENTRY type for recent emulator */ - aub_out(bufmgr_gem, AUB_TRACE_MEMTYPE_GTT_ENTRY | 0 | AUB_TRACE_OP_DATA_WRITE); - aub_out(bufmgr_gem, 0); /* subtype */ - aub_out(bufmgr_gem, 0); /* offset */ - aub_out(bufmgr_gem, gtt_size); /* size */ - if (bufmgr_gem->gen >= 8) - aub_out(bufmgr_gem, 0); - for (i = 0x000; i < gtt_size; i += 4, entry += 0x1000) { - aub_out(bufmgr_gem, entry); - } + fprintf(stderr, "libdrm aub dumping is deprecated.\n\n" + "Use intel_aubdump from intel-gpu-tools instead. Install intel-gpu-tools,\n" + "then run (for example)\n\n" + "\t$ intel_aubdump --output=trace.aub glxgears -geometry 500x500\n\n" + "See the intel_aubdump man page for more details.\n"); } drm_intel_context * @@ -3285,6 +3247,17 @@ drm_intel_gem_context_create(drm_intel_bufmgr *bufmgr) return context; } +int +drm_intel_gem_context_get_id(drm_intel_context *ctx, uint32_t *ctx_id) +{ + if (ctx == NULL) + return -EINVAL; + + *ctx_id = ctx->ctx_id; + + return 0; +} + void drm_intel_gem_context_destroy(drm_intel_context *ctx) { @@ -3392,6 +3365,36 @@ drm_intel_get_eu_total(int fd, unsigned int *eu_total) return 0; } +int +drm_intel_get_pooled_eu(int fd) +{ + drm_i915_getparam_t gp; + int ret = -1; + + memclear(gp); + gp.param = I915_PARAM_HAS_POOLED_EU; + gp.value = &ret; + if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp)) + return -errno; + + return ret; +} + +int +drm_intel_get_min_eu_in_pool(int fd) +{ + drm_i915_getparam_t gp; + int ret = -1; + + memclear(gp); + gp.param = I915_PARAM_MIN_EU_IN_POOL; + gp.value = &ret; + if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp)) + return -errno; + + return ret; +} + /** * Annotate the given bo for use in aub dumping. * @@ -3418,19 +3421,6 @@ drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo, drm_intel_aub_annotation *annotations, unsigned count) { - drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; - unsigned size = sizeof(*annotations) * count; - drm_intel_aub_annotation *new_annotations = - count > 0 ? realloc(bo_gem->aub_annotations, size) : NULL; - if (new_annotations == NULL) { - free(bo_gem->aub_annotations); - bo_gem->aub_annotations = NULL; - bo_gem->aub_annotation_count = 0; - return; - } - memcpy(new_annotations, annotations, size); - bo_gem->aub_annotations = new_annotations; - bo_gem->aub_annotation_count = count; } static pthread_mutex_t bufmgr_list_mutex = PTHREAD_MUTEX_INITIALIZER; @@ -3468,6 +3458,141 @@ drm_intel_bufmgr_gem_unref(drm_intel_bufmgr *bufmgr) } } +void *drm_intel_gem_bo_map__gtt(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + if (bo_gem->gtt_virtual) + return bo_gem->gtt_virtual; + + if (bo_gem->is_userptr) + return NULL; + + pthread_mutex_lock(&bufmgr_gem->lock); + if (bo_gem->gtt_virtual == NULL) { + struct drm_i915_gem_mmap_gtt mmap_arg; + void *ptr; + + DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n", + bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); + + if (bo_gem->map_count++ == 0) + drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); + + memclear(mmap_arg); + mmap_arg.handle = bo_gem->gem_handle; + + /* Get the fake offset back... */ + ptr = MAP_FAILED; + if (drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_MMAP_GTT, + &mmap_arg) == 0) { + /* and mmap it */ + ptr = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE, + MAP_SHARED, bufmgr_gem->fd, + mmap_arg.offset); + } + if (ptr == MAP_FAILED) { + if (--bo_gem->map_count == 0) + drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); + ptr = NULL; + } + + bo_gem->gtt_virtual = ptr; + } + pthread_mutex_unlock(&bufmgr_gem->lock); + + return bo_gem->gtt_virtual; +} + +void *drm_intel_gem_bo_map__cpu(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + if (bo_gem->mem_virtual) + return bo_gem->mem_virtual; + + if (bo_gem->is_userptr) { + /* Return the same user ptr */ + return bo_gem->user_virtual; + } + + pthread_mutex_lock(&bufmgr_gem->lock); + if (!bo_gem->mem_virtual) { + struct drm_i915_gem_mmap mmap_arg; + + if (bo_gem->map_count++ == 0) + drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); + + DBG("bo_map: %d (%s), map_count=%d\n", + bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); + + memclear(mmap_arg); + mmap_arg.handle = bo_gem->gem_handle; + mmap_arg.size = bo->size; + if (drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_MMAP, + &mmap_arg)) { + DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", + __FILE__, __LINE__, bo_gem->gem_handle, + bo_gem->name, strerror(errno)); + if (--bo_gem->map_count == 0) + drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); + } else { + VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1)); + bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; + } + } + pthread_mutex_unlock(&bufmgr_gem->lock); + + return bo_gem->mem_virtual; +} + +void *drm_intel_gem_bo_map__wc(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + if (bo_gem->wc_virtual) + return bo_gem->wc_virtual; + + if (bo_gem->is_userptr) + return NULL; + + pthread_mutex_lock(&bufmgr_gem->lock); + if (!bo_gem->wc_virtual) { + struct drm_i915_gem_mmap mmap_arg; + + if (bo_gem->map_count++ == 0) + drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); + + DBG("bo_map: %d (%s), map_count=%d\n", + bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); + + memclear(mmap_arg); + mmap_arg.handle = bo_gem->gem_handle; + mmap_arg.size = bo->size; + mmap_arg.flags = I915_MMAP_WC; + if (drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_MMAP, + &mmap_arg)) { + DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", + __FILE__, __LINE__, bo_gem->gem_handle, + bo_gem->name, strerror(errno)); + if (--bo_gem->map_count == 0) + drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); + } else { + VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1)); + bo_gem->wc_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; + } + } + pthread_mutex_unlock(&bufmgr_gem->lock); + + return bo_gem->wc_virtual; +} + /** * Initializes the GEM buffer manager, which uses the kernel to allocate, map, * and manage map buffer objections. @@ -3537,6 +3662,8 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) bufmgr_gem->gen = 8; else if (IS_GEN9(bufmgr_gem->pci_device)) bufmgr_gem->gen = 9; + else if (IS_GEN10(bufmgr_gem->pci_device)) + bufmgr_gem->gen = 10; else { free(bufmgr_gem); bufmgr_gem = NULL; @@ -3547,7 +3674,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) bufmgr_gem->gtt_size > 256*1024*1024) { /* The unmappable part of gtt on gen 3 (i.e. above 256MB) can't * be used for tiled blits. To simplify the accounting, just - * substract the unmappable part (fixed to 256MB on all known + * subtract the unmappable part (fixed to 256MB on all known * gen3 devices) if the kernel advertises it. */ bufmgr_gem->gtt_size -= 256*1024*1024; } @@ -3572,6 +3699,10 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); bufmgr_gem->has_relaxed_fencing = ret == 0; + gp.param = I915_PARAM_HAS_EXEC_ASYNC; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + bufmgr_gem->has_exec_async = ret == 0; + bufmgr_gem->bufmgr.bo_alloc_userptr = check_bo_alloc_userptr; gp.param = I915_PARAM_HAS_WAIT_TIMEOUT; @@ -3593,6 +3724,11 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); bufmgr_gem->has_vebox = (ret == 0) & (*gp.value > 0); + gp.param = I915_PARAM_HAS_EXEC_SOFTPIN; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret == 0 && *gp.value > 0) + bufmgr_gem->bufmgr.bo_set_softpin_offset = drm_intel_gem_bo_set_softpin_offset; + if (bufmgr_gem->gen < 4) { gp.param = I915_PARAM_NUM_FENCES_AVAIL; gp.value = &bufmgr_gem->available_fences; @@ -3619,6 +3755,13 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) } } + if (bufmgr_gem->gen >= 8) { + gp.param = I915_PARAM_HAS_ALIASING_PPGTT; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret == 0 && *gp.value == 3) + bufmgr_gem->bufmgr.bo_use_48b_address_range = drm_intel_gem_bo_use_48b_address_range; + } + /* Let's go with one relocation per every 2 dwords (but round down a bit * since a power of two will mean an extra page allocation for the reloc * buffer). @@ -3663,7 +3806,6 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) drm_intel_gem_get_pipe_from_crtc_id; bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references; - DRMINITLISTHEAD(&bufmgr_gem->named); init_cache_buckets(bufmgr_gem); DRMINITLISTHEAD(&bufmgr_gem->vma_cache);