1 /**************************************************************************
3 * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
28 /* Originally a fake version of the buffer manager so that we can
29 * prototype the changes in a driver fairly quickly, has been fleshed
30 * out to a fully functional interim solution.
32 * Basically wraps the old style memory management in the new
33 * programming interface, but is more expressive and avoids many of
34 * the bugs in the old texture manager.
47 #include "intel_bufmgr.h"
48 #include "intel_bufmgr_priv.h"
52 #include "libdrm_lists.h"
54 #define ALIGN(value, alignment) ((value + alignment - 1) & ~(alignment - 1))
56 #define DBG(...) do { \
57 if (bufmgr_fake->bufmgr.debug) \
58 drmMsg(__VA_ARGS__); \
63 #define BM_NO_BACKING_STORE 0x00000001
64 #define BM_NO_FENCE_SUBDATA 0x00000002
65 #define BM_PINNED 0x00000004
67 /* Wrapper around mm.c's mem_block, which understands that you must
68 * wait for fences to expire before memory can be freed. This is
69 * specific to our use of memcpy for uploads - an upload that was
70 * processed through the command queue wouldn't need to care about
73 #define MAX_RELOCS 4096
75 struct fake_buffer_reloc
77 /** Buffer object that the relocation points at. */
78 drm_intel_bo *target_buf;
79 /** Offset of the relocation entry within reloc_buf. */
81 /** Cached value of the offset when we last performed this relocation. */
82 uint32_t last_target_offset;
83 /** Value added to target_buf's offset to get the relocation entry. */
85 /** Cache domains the target buffer is read into. */
86 uint32_t read_domains;
87 /** Cache domain the target buffer will have dirty cachelines in. */
88 uint32_t write_domain;
92 struct block *next, *prev;
93 struct mem_block *mem; /* BM_MEM_AGP */
96 * Marks that the block is currently in the aperture and has yet to be
99 unsigned on_hardware:1;
101 * Marks that the block is currently fenced (being used by rendering) and
102 * can't be freed until @fence is passed.
106 /** Fence cookie for the block. */
107 unsigned fence; /* Split to read_fence, write_fence */
113 typedef struct _bufmgr_fake {
114 drm_intel_bufmgr bufmgr;
116 pthread_mutex_t lock;
118 unsigned long low_offset;
122 struct mem_block *heap;
124 unsigned buf_nr; /* for generating ids */
127 * List of blocks which are currently in the GART but haven't been
130 struct block on_hardware;
132 * List of blocks which are in the GART and have an active fence on them.
136 * List of blocks which have an expired fence and are ready to be evicted.
140 unsigned int last_fence;
143 unsigned need_fence:1;
147 * Driver callback to emit a fence, returning the cookie.
149 * This allows the driver to hook in a replacement for the DRM usage in
152 * Currently, this also requires that a write flush be emitted before
153 * emitting the fence, but this should change.
155 unsigned int (*fence_emit)(void *private);
156 /** Driver callback to wait for a fence cookie to have passed. */
157 void (*fence_wait)(unsigned int fence, void *private);
161 * Driver callback to execute a buffer.
163 * This allows the driver to hook in a replacement for the DRM usage in
166 int (*exec)(drm_intel_bo *bo, unsigned int used, void *priv);
169 /** Driver-supplied argument to driver callbacks */
171 /* Pointer to kernel-updated sarea data for the last completed user irq */
172 volatile int *last_dispatch;
178 int performed_rendering;
179 } drm_intel_bufmgr_fake;
181 typedef struct _drm_intel_bo_fake {
184 unsigned id; /* debug only */
188 /** has the card written to this buffer - we make need to copy it back */
189 unsigned card_dirty:1;
190 unsigned int refcount;
191 /* Flags may consist of any of the DRM_BO flags, plus
192 * DRM_BO_NO_BACKING_STORE and BM_NO_FENCE_SUBDATA, which are the first two
193 * driver private flags.
196 /** Cache domains the target buffer is read into. */
197 uint32_t read_domains;
198 /** Cache domain the target buffer will have dirty cachelines in. */
199 uint32_t write_domain;
201 unsigned int alignment;
202 int is_static, validated;
203 unsigned int map_count;
205 /** relocation list */
206 struct fake_buffer_reloc *relocs;
209 * Total size of the target_bos of this buffer.
211 * Used for estimation in check_aperture.
213 unsigned int child_size;
217 void (*invalidate_cb)(drm_intel_bo *bo, void *ptr);
218 void *invalidate_ptr;
221 static int clear_fenced(drm_intel_bufmgr_fake *bufmgr_fake,
222 unsigned int fence_cookie);
224 #define MAXFENCE 0x7fffffff
226 static int FENCE_LTE( unsigned a, unsigned b )
231 if (a < b && b - a < (1<<24))
234 if (a > b && MAXFENCE - a + b < (1<<24))
240 void drm_intel_bufmgr_fake_set_fence_callback(drm_intel_bufmgr *bufmgr,
241 unsigned int (*emit)(void *priv),
242 void (*wait)(unsigned int fence,
246 drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *)bufmgr;
248 bufmgr_fake->fence_emit = emit;
249 bufmgr_fake->fence_wait = wait;
250 bufmgr_fake->fence_priv = priv;
254 _fence_emit_internal(drm_intel_bufmgr_fake *bufmgr_fake)
256 struct drm_i915_irq_emit ie;
259 if (bufmgr_fake->fence_emit != NULL) {
260 seq = bufmgr_fake->fence_emit(bufmgr_fake->fence_priv);
265 ret = drmCommandWriteRead(bufmgr_fake->fd, DRM_I915_IRQ_EMIT,
268 drmMsg("%s: drm_i915_irq_emit: %d\n", __FUNCTION__, ret);
272 DBG("emit 0x%08x\n", seq);
277 _fence_wait_internal(drm_intel_bufmgr_fake *bufmgr_fake, int seq)
279 struct drm_i915_irq_wait iw;
280 int hw_seq, busy_count = 0;
284 if (bufmgr_fake->fence_wait != NULL) {
285 bufmgr_fake->fence_wait(seq, bufmgr_fake->fence_priv);
286 clear_fenced(bufmgr_fake, seq);
290 DBG("wait 0x%08x\n", iw.irq_seq);
294 /* The kernel IRQ_WAIT implementation is all sorts of broken.
295 * 1) It returns 1 to 0x7fffffff instead of using the full 32-bit unsigned
297 * 2) It returns 0 if hw_seq >= seq, not seq - hw_seq < 0 on the 32-bit
299 * 3) It waits if seq < hw_seq, not seq - hw_seq > 0 on the 32-bit
301 * 4) It returns -EBUSY in 3 seconds even if the hardware is still
302 * successfully chewing through buffers.
304 * Assume that in userland we treat sequence numbers as ints, which makes
305 * some of the comparisons convenient, since the sequence numbers are
306 * all postive signed integers.
308 * From this we get several cases we need to handle. Here's a timeline.
309 * 0x2 0x7 0x7ffffff8 0x7ffffffd
311 * -------------------------------------------------------------------
313 * A) Normal wait for hw to catch up
316 * -------------------------------------------------------------------
317 * seq - hw_seq = 5. If we call IRQ_WAIT, it will wait for hw to catch up.
319 * B) Normal wait for a sequence number that's already passed.
322 * -------------------------------------------------------------------
323 * seq - hw_seq = -5. If we call IRQ_WAIT, it returns 0 quickly.
325 * C) Hardware has already wrapped around ahead of us
328 * -------------------------------------------------------------------
329 * seq - hw_seq = 0x80000000 - 5. If we called IRQ_WAIT, it would wait
330 * for hw_seq >= seq, which may never occur. Thus, we want to catch this
331 * in userland and return 0.
333 * D) We've wrapped around ahead of the hardware.
336 * -------------------------------------------------------------------
337 * seq - hw_seq = -(0x80000000 - 5). If we called IRQ_WAIT, it would return
338 * 0 quickly because hw_seq >= seq, even though the hardware isn't caught up.
339 * Thus, we need to catch this early return in userland and bother the
340 * kernel until the hardware really does catch up.
342 * E) Hardware might wrap after we test in userland.
345 * -------------------------------------------------------------------
346 * seq - hw_seq = 5. If we call IRQ_WAIT, it will likely see seq >= hw_seq
347 * and wait. However, suppose hw_seq wraps before we make it into the
348 * kernel. The kernel sees hw_seq >= seq and waits for 3 seconds then
349 * returns -EBUSY. This is case C). We should catch this and then return
352 * F) Hardware might take a long time on a buffer.
355 * -------------------------------------------------------------------
356 * seq - hw_seq = 5. If we call IRQ_WAIT, if sequence 2 through 5 take too
357 * long, it will return -EBUSY. Batchbuffers in the gltestperf demo were
358 * seen to take up to 7 seconds. We should catch early -EBUSY return
363 /* Keep a copy of last_dispatch so that if the wait -EBUSYs because the
364 * hardware didn't catch up in 3 seconds, we can see if it at least made
365 * progress and retry.
367 hw_seq = *bufmgr_fake->last_dispatch;
370 if (seq - hw_seq > 0x40000000)
373 ret = drmCommandWrite(bufmgr_fake->fd, DRM_I915_IRQ_WAIT,
376 kernel_lied = (ret == 0) && (seq - *bufmgr_fake->last_dispatch <
380 if (ret == -EBUSY && (seq - *bufmgr_fake->last_dispatch > 0x40000000))
383 /* Catch case F: Allow up to 15 seconds chewing on one buffer. */
384 if ((ret == -EBUSY) && (hw_seq != *bufmgr_fake->last_dispatch))
388 } while (kernel_lied || ret == -EAGAIN || ret == -EINTR ||
389 (ret == -EBUSY && busy_count < 5));
392 drmMsg("%s:%d: Error waiting for fence: %s.\n", __FILE__, __LINE__,
396 clear_fenced(bufmgr_fake, seq);
400 _fence_test(drm_intel_bufmgr_fake *bufmgr_fake, unsigned fence)
402 /* Slight problem with wrap-around:
404 return fence == 0 || FENCE_LTE(fence, bufmgr_fake->last_fence);
408 * Allocate a memory manager block for the buffer.
411 alloc_block(drm_intel_bo *bo)
413 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *)bo;
414 drm_intel_bufmgr_fake *bufmgr_fake= (drm_intel_bufmgr_fake *)bo->bufmgr;
415 struct block *block = (struct block *)calloc(sizeof *block, 1);
416 unsigned int align_log2 = ffs(bo_fake->alignment) - 1;
422 sz = (bo->size + bo_fake->alignment - 1) & ~(bo_fake->alignment - 1);
424 block->mem = mmAllocMem(bufmgr_fake->heap, sz, align_log2, 0);
430 DRMINITLISTHEAD(block);
432 /* Insert at head or at tail???
434 DRMLISTADDTAIL(block, &bufmgr_fake->lru);
436 block->virtual = (uint8_t *)bufmgr_fake->virtual +
437 block->mem->ofs - bufmgr_fake->low_offset;
440 bo_fake->block = block;
445 /* Release the card storage associated with buf:
447 static void free_block(drm_intel_bufmgr_fake *bufmgr_fake, struct block *block)
449 drm_intel_bo_fake *bo_fake;
450 DBG("free block %p %08x %d %d\n", block, block->mem->ofs, block->on_hardware, block->fenced);
455 bo_fake = (drm_intel_bo_fake *)block->bo;
456 if (!(bo_fake->flags & (BM_PINNED | BM_NO_BACKING_STORE)) && (bo_fake->card_dirty == 1)) {
457 memcpy(bo_fake->backing_store, block->virtual, block->bo->size);
458 bo_fake->card_dirty = 0;
462 if (block->on_hardware) {
465 else if (block->fenced) {
469 DBG(" - free immediately\n");
472 mmFreeMem(block->mem);
478 alloc_backing_store(drm_intel_bo *bo)
480 drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *)bo->bufmgr;
481 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *)bo;
482 assert(!bo_fake->backing_store);
483 assert(!(bo_fake->flags & (BM_PINNED|BM_NO_BACKING_STORE)));
485 bo_fake->backing_store = malloc(bo->size);
487 DBG("alloc_backing - buf %d %p %d\n", bo_fake->id, bo_fake->backing_store, bo->size);
488 assert(bo_fake->backing_store);
492 free_backing_store(drm_intel_bo *bo)
494 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *)bo;
496 if (bo_fake->backing_store) {
497 assert(!(bo_fake->flags & (BM_PINNED|BM_NO_BACKING_STORE)));
498 free(bo_fake->backing_store);
499 bo_fake->backing_store = NULL;
504 set_dirty(drm_intel_bo *bo)
506 drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *)bo->bufmgr;
507 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *)bo;
509 if (bo_fake->flags & BM_NO_BACKING_STORE && bo_fake->invalidate_cb != NULL)
510 bo_fake->invalidate_cb(bo, bo_fake->invalidate_ptr);
512 assert(!(bo_fake->flags & BM_PINNED));
514 DBG("set_dirty - buf %d\n", bo_fake->id);
519 evict_lru(drm_intel_bufmgr_fake *bufmgr_fake, unsigned int max_fence)
521 struct block *block, *tmp;
523 DBG("%s\n", __FUNCTION__);
525 DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->lru) {
526 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *)block->bo;
528 if (bo_fake != NULL && (bo_fake->flags & BM_NO_FENCE_SUBDATA))
531 if (block->fence && max_fence && !FENCE_LTE(block->fence, max_fence))
534 set_dirty(&bo_fake->bo);
535 bo_fake->block = NULL;
537 free_block(bufmgr_fake, block);
545 evict_mru(drm_intel_bufmgr_fake *bufmgr_fake)
547 struct block *block, *tmp;
549 DBG("%s\n", __FUNCTION__);
551 DRMLISTFOREACHSAFEREVERSE(block, tmp, &bufmgr_fake->lru) {
552 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *)block->bo;
554 if (bo_fake && (bo_fake->flags & BM_NO_FENCE_SUBDATA))
557 set_dirty(&bo_fake->bo);
558 bo_fake->block = NULL;
560 free_block(bufmgr_fake, block);
568 * Removes all objects from the fenced list older than the given fence.
570 static int clear_fenced(drm_intel_bufmgr_fake *bufmgr_fake,
571 unsigned int fence_cookie)
573 struct block *block, *tmp;
576 bufmgr_fake->last_fence = fence_cookie;
577 DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->fenced) {
578 assert(block->fenced);
580 if (_fence_test(bufmgr_fake, block->fence)) {
585 DBG("delayed free: offset %x sz %x\n",
586 block->mem->ofs, block->mem->size);
588 mmFreeMem(block->mem);
592 DBG("return to lru: offset %x sz %x\n",
593 block->mem->ofs, block->mem->size);
595 DRMLISTADDTAIL(block, &bufmgr_fake->lru);
601 /* Blocks are ordered by fence, so if one fails, all from
602 * here will fail also:
604 DBG("fence not passed: offset %x sz %x %d %d \n",
605 block->mem->ofs, block->mem->size, block->fence, bufmgr_fake->last_fence);
610 DBG("%s: %d\n", __FUNCTION__, ret);
614 static void fence_blocks(drm_intel_bufmgr_fake *bufmgr_fake, unsigned fence)
616 struct block *block, *tmp;
618 DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->on_hardware) {
619 DBG("Fence block %p (sz 0x%x ofs %x buf %p) with fence %d\n", block,
620 block->mem->size, block->mem->ofs, block->bo, fence);
621 block->fence = fence;
623 block->on_hardware = 0;
626 /* Move to tail of pending list here
629 DRMLISTADDTAIL(block, &bufmgr_fake->fenced);
632 assert(DRMLISTEMPTY(&bufmgr_fake->on_hardware));
635 static int evict_and_alloc_block(drm_intel_bo *bo)
637 drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *)bo->bufmgr;
638 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *)bo;
640 assert(bo_fake->block == NULL);
642 /* Search for already free memory:
647 /* If we're not thrashing, allow lru eviction to dig deeper into
648 * recently used textures. We'll probably be thrashing soon:
650 if (!bufmgr_fake->thrashing) {
651 while (evict_lru(bufmgr_fake, 0))
656 /* Keep thrashing counter alive?
658 if (bufmgr_fake->thrashing)
659 bufmgr_fake->thrashing = 20;
661 /* Wait on any already pending fences - here we are waiting for any
662 * freed memory that has been submitted to hardware and fenced to
665 while (!DRMLISTEMPTY(&bufmgr_fake->fenced)) {
666 uint32_t fence = bufmgr_fake->fenced.next->fence;
667 _fence_wait_internal(bufmgr_fake, fence);
673 if (!DRMLISTEMPTY(&bufmgr_fake->on_hardware)) {
674 while (!DRMLISTEMPTY(&bufmgr_fake->fenced)) {
675 uint32_t fence = bufmgr_fake->fenced.next->fence;
676 _fence_wait_internal(bufmgr_fake, fence);
679 if (!bufmgr_fake->thrashing) {
682 bufmgr_fake->thrashing = 20;
688 while (evict_mru(bufmgr_fake))
692 DBG("%s 0x%x bytes failed\n", __FUNCTION__, bo->size);
697 /***********************************************************************
702 * Wait for hardware idle by emitting a fence and waiting for it.
705 drm_intel_bufmgr_fake_wait_idle(drm_intel_bufmgr_fake *bufmgr_fake)
709 cookie = _fence_emit_internal(bufmgr_fake);
710 _fence_wait_internal(bufmgr_fake, cookie);
714 * Wait for rendering to a buffer to complete.
716 * It is assumed that the bathcbuffer which performed the rendering included
717 * the necessary flushing.
720 drm_intel_fake_bo_wait_rendering_locked(drm_intel_bo *bo)
722 drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *)bo->bufmgr;
723 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *)bo;
725 if (bo_fake->block == NULL || !bo_fake->block->fenced)
728 _fence_wait_internal(bufmgr_fake, bo_fake->block->fence);
732 drm_intel_fake_bo_wait_rendering(drm_intel_bo *bo)
734 drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *)bo->bufmgr;
736 pthread_mutex_lock(&bufmgr_fake->lock);
737 drm_intel_fake_bo_wait_rendering_locked(bo);
738 pthread_mutex_unlock(&bufmgr_fake->lock);
741 /* Specifically ignore texture memory sharing.
742 * -- just evict everything
743 * -- and wait for idle
746 drm_intel_bufmgr_fake_contended_lock_take(drm_intel_bufmgr *bufmgr)
748 drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *)bufmgr;
749 struct block *block, *tmp;
751 pthread_mutex_lock(&bufmgr_fake->lock);
753 bufmgr_fake->need_fence = 1;
754 bufmgr_fake->fail = 0;
756 /* Wait for hardware idle. We don't know where acceleration has been
757 * happening, so we'll need to wait anyway before letting anything get
758 * put on the card again.
760 drm_intel_bufmgr_fake_wait_idle(bufmgr_fake);
762 /* Check that we hadn't released the lock without having fenced the last
765 assert(DRMLISTEMPTY(&bufmgr_fake->fenced));
766 assert(DRMLISTEMPTY(&bufmgr_fake->on_hardware));
768 DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->lru) {
769 assert(_fence_test(bufmgr_fake, block->fence));
770 set_dirty(block->bo);
773 pthread_mutex_unlock(&bufmgr_fake->lock);
776 static drm_intel_bo *
777 drm_intel_fake_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name,
778 unsigned long size, unsigned int alignment)
780 drm_intel_bufmgr_fake *bufmgr_fake;
781 drm_intel_bo_fake *bo_fake;
783 bufmgr_fake = (drm_intel_bufmgr_fake *)bufmgr;
787 bo_fake = calloc(1, sizeof(*bo_fake));
791 bo_fake->bo.size = size;
792 bo_fake->bo.offset = -1;
793 bo_fake->bo.virtual = NULL;
794 bo_fake->bo.bufmgr = bufmgr;
795 bo_fake->refcount = 1;
797 /* Alignment must be a power of two */
798 assert((alignment & (alignment - 1)) == 0);
801 bo_fake->alignment = alignment;
802 bo_fake->id = ++bufmgr_fake->buf_nr;
803 bo_fake->name = name;
805 bo_fake->is_static = 0;
807 DBG("drm_bo_alloc: (buf %d: %s, %d kb)\n", bo_fake->id, bo_fake->name,
808 bo_fake->bo.size / 1024);
814 drm_intel_bo_fake_alloc_static(drm_intel_bufmgr *bufmgr, const char *name,
815 unsigned long offset, unsigned long size,
818 drm_intel_bufmgr_fake *bufmgr_fake;
819 drm_intel_bo_fake *bo_fake;
821 bufmgr_fake = (drm_intel_bufmgr_fake *)bufmgr;
825 bo_fake = calloc(1, sizeof(*bo_fake));
829 bo_fake->bo.size = size;
830 bo_fake->bo.offset = offset;
831 bo_fake->bo.virtual = virtual;
832 bo_fake->bo.bufmgr = bufmgr;
833 bo_fake->refcount = 1;
834 bo_fake->id = ++bufmgr_fake->buf_nr;
835 bo_fake->name = name;
836 bo_fake->flags = BM_PINNED;
837 bo_fake->is_static = 1;
839 DBG("drm_bo_alloc_static: (buf %d: %s, %d kb)\n", bo_fake->id, bo_fake->name,
840 bo_fake->bo.size / 1024);
846 drm_intel_fake_bo_reference(drm_intel_bo *bo)
848 drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *)bo->bufmgr;
849 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *)bo;
851 pthread_mutex_lock(&bufmgr_fake->lock);
853 pthread_mutex_unlock(&bufmgr_fake->lock);
857 drm_intel_fake_bo_reference_locked(drm_intel_bo *bo)
859 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *)bo;
865 drm_intel_fake_bo_unreference_locked(drm_intel_bo *bo)
867 drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *)bo->bufmgr;
868 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *)bo;
871 if (--bo_fake->refcount == 0) {
872 assert(bo_fake->map_count == 0);
873 /* No remaining references, so free it */
875 free_block(bufmgr_fake, bo_fake->block);
876 free_backing_store(bo);
878 for (i = 0; i < bo_fake->nr_relocs; i++)
879 drm_intel_fake_bo_unreference_locked(bo_fake->relocs[i].target_buf);
881 DBG("drm_bo_unreference: free buf %d %s\n", bo_fake->id, bo_fake->name);
883 free(bo_fake->relocs);
889 drm_intel_fake_bo_unreference(drm_intel_bo *bo)
891 drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *)bo->bufmgr;
893 pthread_mutex_lock(&bufmgr_fake->lock);
894 drm_intel_fake_bo_unreference_locked(bo);
895 pthread_mutex_unlock(&bufmgr_fake->lock);
899 * Set the buffer as not requiring backing store, and instead get the callback
900 * invoked whenever it would be set dirty.
902 void drm_intel_bo_fake_disable_backing_store(drm_intel_bo *bo,
903 void (*invalidate_cb)(drm_intel_bo *bo,
907 drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *)bo->bufmgr;
908 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *)bo;
910 pthread_mutex_lock(&bufmgr_fake->lock);
912 if (bo_fake->backing_store)
913 free_backing_store(bo);
915 bo_fake->flags |= BM_NO_BACKING_STORE;
917 DBG("disable_backing_store set buf %d dirty\n", bo_fake->id);
919 bo_fake->invalidate_cb = invalidate_cb;
920 bo_fake->invalidate_ptr = ptr;
922 /* Note that it is invalid right from the start. Also note
923 * invalidate_cb is called with the bufmgr locked, so cannot
924 * itself make bufmgr calls.
926 if (invalidate_cb != NULL)
927 invalidate_cb(bo, ptr);
929 pthread_mutex_unlock(&bufmgr_fake->lock);
933 * Map a buffer into bo->virtual, allocating either card memory space (If
934 * BM_NO_BACKING_STORE or BM_PINNED) or backing store, as necessary.
937 drm_intel_fake_bo_map_locked(drm_intel_bo *bo, int write_enable)
939 drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *)bo->bufmgr;
940 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *)bo;
942 /* Static buffers are always mapped. */
943 if (bo_fake->is_static) {
944 if (bo_fake->card_dirty) {
945 drm_intel_bufmgr_fake_wait_idle(bufmgr_fake);
946 bo_fake->card_dirty = 0;
951 /* Allow recursive mapping. Mesa may recursively map buffers with
952 * nested display loops, and it is used internally in bufmgr_fake
955 if (bo_fake->map_count++ != 0)
959 DBG("drm_bo_map: (buf %d: %s, %d kb)\n", bo_fake->id, bo_fake->name,
960 bo_fake->bo.size / 1024);
962 if (bo->virtual != NULL) {
963 drmMsg("%s: already mapped\n", __FUNCTION__);
966 else if (bo_fake->flags & (BM_NO_BACKING_STORE|BM_PINNED)) {
968 if (!bo_fake->block && !evict_and_alloc_block(bo)) {
969 DBG("%s: alloc failed\n", __FUNCTION__);
970 bufmgr_fake->fail = 1;
974 assert(bo_fake->block);
977 if (!(bo_fake->flags & BM_NO_FENCE_SUBDATA) &&
978 bo_fake->block->fenced) {
979 drm_intel_fake_bo_wait_rendering_locked(bo);
982 bo->virtual = bo_fake->block->virtual;
989 if (bo_fake->backing_store == 0)
990 alloc_backing_store(bo);
992 if ((bo_fake->card_dirty == 1) && bo_fake->block) {
993 if (bo_fake->block->fenced)
994 drm_intel_fake_bo_wait_rendering_locked(bo);
996 memcpy(bo_fake->backing_store, bo_fake->block->virtual, bo_fake->block->bo->size);
997 bo_fake->card_dirty = 0;
1000 bo->virtual = bo_fake->backing_store;
1008 drm_intel_fake_bo_map(drm_intel_bo *bo, int write_enable)
1010 drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *)bo->bufmgr;
1013 pthread_mutex_lock(&bufmgr_fake->lock);
1014 ret = drm_intel_fake_bo_map_locked(bo, write_enable);
1015 pthread_mutex_unlock(&bufmgr_fake->lock);
1021 drm_intel_fake_bo_unmap_locked(drm_intel_bo *bo)
1023 drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *)bo->bufmgr;
1024 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *)bo;
1026 /* Static buffers are always mapped. */
1027 if (bo_fake->is_static)
1030 assert(bo_fake->map_count != 0);
1031 if (--bo_fake->map_count != 0)
1034 DBG("drm_bo_unmap: (buf %d: %s, %d kb)\n", bo_fake->id, bo_fake->name,
1035 bo_fake->bo.size / 1024);
1043 drm_intel_fake_bo_unmap(drm_intel_bo *bo)
1045 drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *)bo->bufmgr;
1048 pthread_mutex_lock(&bufmgr_fake->lock);
1049 ret = drm_intel_fake_bo_unmap_locked(bo);
1050 pthread_mutex_unlock(&bufmgr_fake->lock);
1056 drm_intel_fake_kick_all_locked(drm_intel_bufmgr_fake *bufmgr_fake)
1058 struct block *block, *tmp;
1060 bufmgr_fake->performed_rendering = 0;
1061 /* okay for ever BO that is on the HW kick it off.
1062 seriously not afraid of the POLICE right now */
1063 DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->on_hardware) {
1064 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *)block->bo;
1066 block->on_hardware = 0;
1067 free_block(bufmgr_fake, block);
1068 bo_fake->block = NULL;
1069 bo_fake->validated = 0;
1070 if (!(bo_fake->flags & BM_NO_BACKING_STORE))
1077 drm_intel_fake_bo_validate(drm_intel_bo *bo)
1079 drm_intel_bufmgr_fake *bufmgr_fake;
1080 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *)bo;
1082 bufmgr_fake = (drm_intel_bufmgr_fake *)bo->bufmgr;
1084 DBG("drm_bo_validate: (buf %d: %s, %d kb)\n", bo_fake->id, bo_fake->name,
1085 bo_fake->bo.size / 1024);
1087 /* Sanity check: Buffers should be unmapped before being validated.
1088 * This is not so much of a problem for bufmgr_fake, but TTM refuses,
1089 * and the problem is harder to debug there.
1091 assert(bo_fake->map_count == 0);
1093 if (bo_fake->is_static) {
1094 /* Add it to the needs-fence list */
1095 bufmgr_fake->need_fence = 1;
1099 /* Allocate the card memory */
1100 if (!bo_fake->block && !evict_and_alloc_block(bo)) {
1101 bufmgr_fake->fail = 1;
1102 DBG("Failed to validate buf %d:%s\n", bo_fake->id, bo_fake->name);
1106 assert(bo_fake->block);
1107 assert(bo_fake->block->bo == &bo_fake->bo);
1109 bo->offset = bo_fake->block->mem->ofs;
1111 /* Upload the buffer contents if necessary */
1112 if (bo_fake->dirty) {
1113 DBG("Upload dirty buf %d:%s, sz %d offset 0x%x\n", bo_fake->id,
1114 bo_fake->name, bo->size, bo_fake->block->mem->ofs);
1116 assert(!(bo_fake->flags &
1117 (BM_NO_BACKING_STORE|BM_PINNED)));
1119 /* Actually, should be able to just wait for a fence on the memory,
1120 * which we would be tracking when we free it. Waiting for idle is
1121 * a sufficiently large hammer for now.
1123 drm_intel_bufmgr_fake_wait_idle(bufmgr_fake);
1125 /* we may never have mapped this BO so it might not have any backing
1126 * store if this happens it should be rare, but 0 the card memory
1128 if (bo_fake->backing_store)
1129 memcpy(bo_fake->block->virtual, bo_fake->backing_store, bo->size);
1131 memset(bo_fake->block->virtual, 0, bo->size);
1136 bo_fake->block->fenced = 0;
1137 bo_fake->block->on_hardware = 1;
1138 DRMLISTDEL(bo_fake->block);
1139 DRMLISTADDTAIL(bo_fake->block, &bufmgr_fake->on_hardware);
1141 bo_fake->validated = 1;
1142 bufmgr_fake->need_fence = 1;
1148 drm_intel_fake_fence_validated(drm_intel_bufmgr *bufmgr)
1150 drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *)bufmgr;
1151 unsigned int cookie;
1153 cookie = _fence_emit_internal(bufmgr_fake);
1154 fence_blocks(bufmgr_fake, cookie);
1156 DBG("drm_fence_validated: 0x%08x cookie\n", cookie);
1160 drm_intel_fake_destroy(drm_intel_bufmgr *bufmgr)
1162 drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *)bufmgr;
1164 pthread_mutex_destroy(&bufmgr_fake->lock);
1165 mmDestroy(bufmgr_fake->heap);
1170 drm_intel_fake_emit_reloc(drm_intel_bo *bo, uint32_t offset,
1171 drm_intel_bo *target_bo, uint32_t target_offset,
1172 uint32_t read_domains, uint32_t write_domain)
1174 drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *)bo->bufmgr;
1175 struct fake_buffer_reloc *r;
1176 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *)bo;
1177 drm_intel_bo_fake *target_fake = (drm_intel_bo_fake *)target_bo;
1180 pthread_mutex_lock(&bufmgr_fake->lock);
1185 if (bo_fake->relocs == NULL) {
1186 bo_fake->relocs = malloc(sizeof(struct fake_buffer_reloc) * MAX_RELOCS);
1189 r = &bo_fake->relocs[bo_fake->nr_relocs++];
1191 assert(bo_fake->nr_relocs <= MAX_RELOCS);
1193 drm_intel_fake_bo_reference_locked(target_bo);
1195 if (!target_fake->is_static) {
1196 bo_fake->child_size += ALIGN(target_bo->size, target_fake->alignment);
1197 bo_fake->child_size += target_fake->child_size;
1199 r->target_buf = target_bo;
1201 r->last_target_offset = target_bo->offset;
1202 r->delta = target_offset;
1203 r->read_domains = read_domains;
1204 r->write_domain = write_domain;
1206 if (bufmgr_fake->debug) {
1207 /* Check that a conflicting relocation hasn't already been emitted. */
1208 for (i = 0; i < bo_fake->nr_relocs - 1; i++) {
1209 struct fake_buffer_reloc *r2 = &bo_fake->relocs[i];
1211 assert(r->offset != r2->offset);
1215 pthread_mutex_unlock(&bufmgr_fake->lock);
1221 * Incorporates the validation flags associated with each relocation into
1222 * the combined validation flags for the buffer on this batchbuffer submission.
1225 drm_intel_fake_calculate_domains(drm_intel_bo *bo)
1227 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *)bo;
1230 for (i = 0; i < bo_fake->nr_relocs; i++) {
1231 struct fake_buffer_reloc *r = &bo_fake->relocs[i];
1232 drm_intel_bo_fake *target_fake = (drm_intel_bo_fake *)r->target_buf;
1234 /* Do the same for the tree of buffers we depend on */
1235 drm_intel_fake_calculate_domains(r->target_buf);
1237 target_fake->read_domains |= r->read_domains;
1238 target_fake->write_domain |= r->write_domain;
1244 drm_intel_fake_reloc_and_validate_buffer(drm_intel_bo *bo)
1246 drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *)bo->bufmgr;
1247 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *)bo;
1250 assert(bo_fake->map_count == 0);
1252 for (i = 0; i < bo_fake->nr_relocs; i++) {
1253 struct fake_buffer_reloc *r = &bo_fake->relocs[i];
1254 drm_intel_bo_fake *target_fake = (drm_intel_bo_fake *)r->target_buf;
1255 uint32_t reloc_data;
1257 /* Validate the target buffer if that hasn't been done. */
1258 if (!target_fake->validated) {
1259 ret = drm_intel_fake_reloc_and_validate_buffer(r->target_buf);
1261 if (bo->virtual != NULL)
1262 drm_intel_fake_bo_unmap_locked(bo);
1267 /* Calculate the value of the relocation entry. */
1268 if (r->target_buf->offset != r->last_target_offset) {
1269 reloc_data = r->target_buf->offset + r->delta;
1271 if (bo->virtual == NULL)
1272 drm_intel_fake_bo_map_locked(bo, 1);
1274 *(uint32_t *)((uint8_t *)bo->virtual + r->offset) = reloc_data;
1276 r->last_target_offset = r->target_buf->offset;
1280 if (bo->virtual != NULL)
1281 drm_intel_fake_bo_unmap_locked(bo);
1283 if (bo_fake->write_domain != 0) {
1284 if (!(bo_fake->flags & (BM_NO_BACKING_STORE|BM_PINNED))) {
1285 if (bo_fake->backing_store == 0)
1286 alloc_backing_store(bo);
1288 bo_fake->card_dirty = 1;
1289 bufmgr_fake->performed_rendering = 1;
1292 return drm_intel_fake_bo_validate(bo);
1296 drm_intel_bo_fake_post_submit(drm_intel_bo *bo)
1298 drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *)bo->bufmgr;
1299 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *)bo;
1302 for (i = 0; i < bo_fake->nr_relocs; i++) {
1303 struct fake_buffer_reloc *r = &bo_fake->relocs[i];
1304 drm_intel_bo_fake *target_fake = (drm_intel_bo_fake *)r->target_buf;
1306 if (target_fake->validated)
1307 drm_intel_bo_fake_post_submit(r->target_buf);
1309 DBG("%s@0x%08x + 0x%08x -> %s@0x%08x + 0x%08x\n",
1310 bo_fake->name, (uint32_t)bo->offset, r->offset,
1311 target_fake->name, (uint32_t)r->target_buf->offset, r->delta);
1314 assert(bo_fake->map_count == 0);
1315 bo_fake->validated = 0;
1316 bo_fake->read_domains = 0;
1317 bo_fake->write_domain = 0;
1321 void drm_intel_bufmgr_fake_set_exec_callback(drm_intel_bufmgr *bufmgr,
1322 int (*exec)(drm_intel_bo *bo,
1327 drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *)bufmgr;
1329 bufmgr_fake->exec = exec;
1330 bufmgr_fake->exec_priv = priv;
1334 drm_intel_fake_bo_exec(drm_intel_bo *bo, int used,
1335 drm_clip_rect_t *cliprects, int num_cliprects,
1338 drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *)bo->bufmgr;
1339 drm_intel_bo_fake *batch_fake = (drm_intel_bo_fake *)bo;
1340 struct drm_i915_batchbuffer batch;
1342 int retry_count = 0;
1344 pthread_mutex_lock(&bufmgr_fake->lock);
1346 bufmgr_fake->performed_rendering = 0;
1348 drm_intel_fake_calculate_domains(bo);
1350 batch_fake->read_domains = I915_GEM_DOMAIN_COMMAND;
1352 /* we've ran out of RAM so blow the whole lot away and retry */
1354 ret = drm_intel_fake_reloc_and_validate_buffer(bo);
1355 if (bufmgr_fake->fail == 1) {
1356 if (retry_count == 0) {
1358 drm_intel_fake_kick_all_locked(bufmgr_fake);
1359 bufmgr_fake->fail = 0;
1361 } else /* dump out the memory here */
1362 mmDumpMemInfo(bufmgr_fake->heap);
1367 if (bufmgr_fake->exec != NULL) {
1368 int ret = bufmgr_fake->exec(bo, used, bufmgr_fake->exec_priv);
1370 pthread_mutex_unlock(&bufmgr_fake->lock);
1374 batch.start = bo->offset;
1376 batch.cliprects = cliprects;
1377 batch.num_cliprects = num_cliprects;
1381 if (drmCommandWrite(bufmgr_fake->fd, DRM_I915_BATCHBUFFER, &batch,
1383 drmMsg("DRM_I915_BATCHBUFFER: %d\n", -errno);
1384 pthread_mutex_unlock(&bufmgr_fake->lock);
1389 drm_intel_fake_fence_validated(bo->bufmgr);
1391 drm_intel_bo_fake_post_submit(bo);
1393 pthread_mutex_unlock(&bufmgr_fake->lock);
1399 * Return an error if the list of BOs will exceed the aperture size.
1401 * This is a rough guess and likely to fail, as during the validate sequence we
1402 * may place a buffer in an inopportune spot early on and then fail to fit
1403 * a set smaller than the aperture.
1406 drm_intel_fake_check_aperture_space(drm_intel_bo **bo_array, int count)
1408 drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *)bo_array[0]->bufmgr;
1409 unsigned int sz = 0;
1412 for (i = 0; i < count; i++) {
1413 drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *)bo_array[i];
1415 if (bo_fake == NULL)
1418 if (!bo_fake->is_static)
1419 sz += ALIGN(bo_array[i]->size, bo_fake->alignment);
1420 sz += bo_fake->child_size;
1423 if (sz > bufmgr_fake->size) {
1424 DBG("check_space: overflowed bufmgr size, %dkb vs %dkb\n",
1425 sz / 1024, bufmgr_fake->size / 1024);
1429 DBG("drm_check_space: sz %dkb vs bufgr %dkb\n", sz / 1024 ,
1430 bufmgr_fake->size / 1024);
1435 * Evicts all buffers, waiting for fences to pass and copying contents out
1438 * Used by the X Server on LeaveVT, when the card memory is no longer our
1442 drm_intel_bufmgr_fake_evict_all(drm_intel_bufmgr *bufmgr)
1444 drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *)bufmgr;
1445 struct block *block, *tmp;
1447 pthread_mutex_lock(&bufmgr_fake->lock);
1449 bufmgr_fake->need_fence = 1;
1450 bufmgr_fake->fail = 0;
1452 /* Wait for hardware idle. We don't know where acceleration has been
1453 * happening, so we'll need to wait anyway before letting anything get
1454 * put on the card again.
1456 drm_intel_bufmgr_fake_wait_idle(bufmgr_fake);
1458 /* Check that we hadn't released the lock without having fenced the last
1461 assert(DRMLISTEMPTY(&bufmgr_fake->fenced));
1462 assert(DRMLISTEMPTY(&bufmgr_fake->on_hardware));
1464 DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->lru) {
1465 /* Releases the memory, and memcpys dirty contents out if necessary. */
1466 free_block(bufmgr_fake, block);
1469 pthread_mutex_unlock(&bufmgr_fake->lock);
1471 void drm_intel_bufmgr_fake_set_last_dispatch(drm_intel_bufmgr *bufmgr,
1472 volatile unsigned int *last_dispatch)
1474 drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *)bufmgr;
1476 bufmgr_fake->last_dispatch = (volatile int *)last_dispatch;
1480 drm_intel_bufmgr_fake_init(int fd,
1481 unsigned long low_offset, void *low_virtual,
1483 volatile unsigned int *last_dispatch)
1485 drm_intel_bufmgr_fake *bufmgr_fake;
1487 bufmgr_fake = calloc(1, sizeof(*bufmgr_fake));
1489 if (pthread_mutex_init(&bufmgr_fake->lock, NULL) != 0) {
1494 /* Initialize allocator */
1495 DRMINITLISTHEAD(&bufmgr_fake->fenced);
1496 DRMINITLISTHEAD(&bufmgr_fake->on_hardware);
1497 DRMINITLISTHEAD(&bufmgr_fake->lru);
1499 bufmgr_fake->low_offset = low_offset;
1500 bufmgr_fake->virtual = low_virtual;
1501 bufmgr_fake->size = size;
1502 bufmgr_fake->heap = mmInit(low_offset, size);
1504 /* Hook in methods */
1505 bufmgr_fake->bufmgr.bo_alloc = drm_intel_fake_bo_alloc;
1506 bufmgr_fake->bufmgr.bo_reference = drm_intel_fake_bo_reference;
1507 bufmgr_fake->bufmgr.bo_unreference = drm_intel_fake_bo_unreference;
1508 bufmgr_fake->bufmgr.bo_map = drm_intel_fake_bo_map;
1509 bufmgr_fake->bufmgr.bo_unmap = drm_intel_fake_bo_unmap;
1510 bufmgr_fake->bufmgr.bo_wait_rendering = drm_intel_fake_bo_wait_rendering;
1511 bufmgr_fake->bufmgr.bo_emit_reloc = drm_intel_fake_emit_reloc;
1512 bufmgr_fake->bufmgr.destroy = drm_intel_fake_destroy;
1513 bufmgr_fake->bufmgr.bo_exec = drm_intel_fake_bo_exec;
1514 bufmgr_fake->bufmgr.check_aperture_space = drm_intel_fake_check_aperture_space;
1515 bufmgr_fake->bufmgr.debug = 0;
1517 bufmgr_fake->fd = fd;
1518 bufmgr_fake->last_dispatch = (volatile int *)last_dispatch;
1520 return &bufmgr_fake->bufmgr;