OSDN Git Service

[intel-GEM] Add memory domain support.
authorKeith Packard <keithp@keithp.com>
Wed, 7 May 2008 03:00:23 +0000 (20:00 -0700)
committerKeith Packard <keithp@keithp.com>
Wed, 7 May 2008 03:00:23 +0000 (20:00 -0700)
Memory domains allow the kernel to track which caches to flush and how to
move objects before buffer execution.

linux-core/drm_gem.c
linux-core/i915_gem.c

index 929c008..e2272f2 100644 (file)
@@ -325,6 +325,10 @@ drm_gem_mmap_ioctl(struct drm_device *dev, void *data,
        drm_gem_object_unreference(obj);
        if (IS_ERR((void *)addr))
                return (int) addr;
+       
+       /* XXX hack until we have a driver callback to make this work */
+       obj->read_domains = DRM_GEM_DOMAIN_CPU;
+       obj->write_domain = DRM_GEM_DOMAIN_CPU;
 
        args->addr_ptr = (uint64_t) addr;
 
@@ -540,40 +544,3 @@ drm_gem_object_handle_free (struct kref *kref)
 }
 EXPORT_SYMBOL(drm_gem_object_handle_free);
 
-/*
- * Set the next domain for the specified object. This
- * may not actually perform the necessary flushing/invaliding though,
- * as that may want to be batched with other set_domain operations
- */
-int drm_gem_object_set_domain (struct drm_gem_object *obj,
-                              uint32_t read_domains,
-                              uint32_t write_domain)
-{
-       struct drm_device       *dev = obj->dev;
-       uint32_t                invalidate_domains = 0;
-       uint32_t                flush_domains = 0;
-       
-       /*
-        * Flush the current write domain if
-        * the new read domains don't match. Invalidate
-        * any read domains which differ from the old
-        * write domain
-        */
-       if (obj->write_domain && obj->write_domain != read_domains)
-       {
-               flush_domains |= obj->write_domain;
-               invalidate_domains |= read_domains & ~obj->write_domain;
-       }
-       /*
-        * Invalidate any read caches which may have
-        * stale data. That is, any new read domains.
-        */
-       invalidate_domains |= read_domains & ~obj->read_domains;
-       obj->write_domain = write_domain;
-       obj->read_domain = read_domains;
-       if ((flush_domains | invalidate_domains) & DRM_GEM_DOMAIN_CPU)
-               drm_gem_object_clflush (obj);
-       dev->invalidate_domains |= invalidate_domains & ~DRM_GEM_DOMAIN_CPU;
-       dev->flush_domains |= flush_domains & ~DRM_GEM_DOMAIN_CPU;
-}
-EXPORT_SYMBOL(drm_gem_object_set_domain);
index d630b0b..911f9aa 100644 (file)
@@ -30,8 +30,8 @@
 #include "i915_drm.h"
 #include "i915_drv.h"
 
-#define WATCH_BUF  0
-#define WATCH_EXEC 0
+#define WATCH_BUF  1
+#define WATCH_EXEC 1
 
 int
 i915_gem_init_ioctl(struct drm_device *dev, void *data,
@@ -73,9 +73,33 @@ i915_gem_object_free_page_list(struct drm_gem_object *obj)
        obj_priv->page_list = NULL;
 }
 
+static void
+i915_gem_flush(struct drm_device *dev, uint32_t domains)
+{
+       drm_i915_private_t *dev_priv = dev->dev_private;
+       uint32_t        cmd;
+       RING_LOCALS;
+
+#if WATCH_EXEC
+       DRM_INFO ("%s: flush %08x\n", __FUNCTION__, domains);
+#endif
+       cmd = CMD_MI_FLUSH | MI_NO_WRITE_FLUSH;
+       if (domains & DRM_GEM_DOMAIN_I915_RENDER)
+               cmd &= ~MI_NO_WRITE_FLUSH;
+       if (domains & DRM_GEM_DOMAIN_I915_SAMPLER)
+               cmd |= MI_READ_FLUSH;
+       if (domains & DRM_GEM_DOMAIN_I915_INSTRUCTION)
+               cmd |= MI_EXE_FLUSH;
+       
+       BEGIN_LP_RING(2);
+       OUT_RING(cmd);
+       OUT_RING(0); /* noop */
+       ADVANCE_LP_RING();
+}
+
 /**
  * Ensures that all rendering to the object has completed and the object is
- * safe to unbind from the GTT.
+ * safe to unbind from the GTT or access from the CPU.
  */
 static int
 i915_gem_object_wait_rendering(struct drm_gem_object *obj)
@@ -84,10 +108,27 @@ i915_gem_object_wait_rendering(struct drm_gem_object *obj)
        struct drm_i915_gem_object *obj_priv = obj->driver_private;
        int ret;
 
+       /* If there are writes queued to the buffer, flush and
+        * create a new cookie to wait for.
+        */
+       if (obj->write_domain & ~(DRM_GEM_DOMAIN_CPU))
+       {
+#if WATCH_BUF
+               DRM_INFO ("%s: flushing object %p from write domain %08x\n",
+                         __FUNCTION__, obj, obj->write_domain);
+#endif
+               i915_gem_flush (dev, obj->write_domain);
+               obj->write_domain = 0;
+               obj_priv->last_rendering_cookie = i915_emit_irq (dev);
+       }
        /* If there is rendering queued on the buffer being evicted, wait for
         * it.
         */
        if (obj_priv->last_rendering_cookie != 0) {
+#if WATCH_BUF
+               DRM_INFO ("%s: object %p wait for cookie %08x\n",
+                         __FUNCTION__, obj, obj_priv->last_rendering_cookie);
+#endif
                ret = i915_wait_irq(dev, obj_priv->last_rendering_cookie);
                if (ret != 0)
                        return ret;
@@ -125,6 +166,7 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
        drm_memrange_put_block(obj_priv->gtt_space);
        obj_priv->gtt_space = NULL;
        list_del_init(&obj_priv->gtt_lru_entry);
+       drm_gem_object_unreference (obj);
 }
 
 #if 0
@@ -277,10 +319,6 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
                unlock_page (obj_priv->page_list[i]);
        }
        
-       drm_ttm_cache_flush (obj_priv->page_list, page_count);
-       DRM_MEMORYBARRIER();
-       drm_agp_chipset_flush(dev);
-
        /* Create an AGP memory structure pointing at our pages, and bind it
         * into the GTT.
         */
@@ -298,6 +336,73 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
        return 0;
 }
 
+static void
+i915_gem_clflush_object (struct drm_gem_object *obj)
+{
+       struct drm_device               *dev = obj->dev;
+       struct drm_i915_gem_object      *obj_priv = obj->driver_private;
+
+       drm_ttm_cache_flush (obj_priv->page_list, obj->size / PAGE_SIZE);
+       drm_agp_chipset_flush(dev);
+}
+       
+/*
+ * Set the next domain for the specified object. This
+ * may not actually perform the necessary flushing/invaliding though,
+ * as that may want to be batched with other set_domain operations
+ */
+static void
+i915_gem_object_set_domain (struct drm_gem_object *obj,
+                           uint32_t read_domains,
+                           uint32_t write_domain)
+{
+       struct drm_device               *dev = obj->dev;
+       uint32_t                        invalidate_domains = 0;
+       uint32_t                        flush_domains = 0;
+       
+#if WATCH_BUF
+       DRM_INFO ("%s: object %p read %08x write %08x\n",
+                 __FUNCTION__, obj, read_domains, write_domain);
+#endif
+       /*
+        * Flush the current write domain if
+        * the new read domains don't match. Invalidate
+        * any read domains which differ from the old
+        * write domain
+        */
+       if (obj->write_domain && obj->write_domain != read_domains)
+       {
+               flush_domains |= obj->write_domain;
+               invalidate_domains |= read_domains & ~obj->write_domain;
+       }
+       /*
+        * Invalidate any read caches which may have
+        * stale data. That is, any new read domains.
+        */
+       invalidate_domains |= read_domains & ~obj->read_domains;
+       if ((flush_domains | invalidate_domains) & DRM_GEM_DOMAIN_CPU)
+       {
+#if WATCH_BUF
+               DRM_INFO ("%s: CPU domain flush %08x invalidate %08x\n",
+                         __FUNCTION__, flush_domains, invalidate_domains);
+#endif
+               /*
+                * If we're invaliding the CPU cache and flushing a GPU cache,
+                * then pause for rendering so that the GPU caches will be 
+                * flushed before the cpu cache is invalidated
+                */
+               if ((invalidate_domains & DRM_GEM_DOMAIN_CPU) &&
+                   (flush_domains & ~DRM_GEM_DOMAIN_CPU))
+                       i915_gem_object_wait_rendering (obj);
+               i915_gem_clflush_object (obj);
+       }
+
+       obj->write_domain = write_domain;
+       obj->read_domains = read_domains;
+       dev->invalidate_domains |= invalidate_domains & ~DRM_GEM_DOMAIN_CPU;
+       dev->flush_domains |= flush_domains & ~DRM_GEM_DOMAIN_CPU;
+}
+
 static int
 i915_gem_reloc_and_validate_object(struct drm_gem_object *obj,
                                   struct drm_file *file_priv,
@@ -318,12 +423,17 @@ i915_gem_reloc_and_validate_object(struct drm_gem_object *obj,
                if (obj_priv->gtt_space == NULL)
                        return -ENOMEM;
        }
+
+       /* Do domain migration */
+       i915_gem_object_set_domain (obj, entry->read_domains, entry->write_domain);
+
        entry->buffer_offset = obj_priv->gtt_offset;
 
        if (obj_priv->pin_count == 0) {
                /* Move our buffer to the head of the LRU. */
                list_del_init(&obj_priv->gtt_lru_entry);
                list_add(&obj_priv->gtt_lru_entry, &dev_priv->mm.gtt_lru);
+               drm_gem_object_reference (obj);
        }
 
        relocs = (struct drm_i915_gem_relocation_entry __user *) (uintptr_t) entry->relocs_ptr;
@@ -413,18 +523,6 @@ i915_gem_reloc_and_validate_object(struct drm_gem_object *obj,
        return 0;
 }
 
-static void
-i915_gem_flush(struct drm_device *dev)
-{
-       drm_i915_private_t *dev_priv = dev->dev_private;
-       RING_LOCALS;
-
-       BEGIN_LP_RING(2);
-       OUT_RING(CMD_MI_FLUSH | MI_READ_FLUSH | MI_EXE_FLUSH);
-       OUT_RING(0); /* noop */
-       ADVANCE_LP_RING();
-}
-
 static int
 i915_dispatch_gem_execbuffer (struct drm_device * dev,
                              struct drm_i915_gem_execbuffer * exec,
@@ -556,6 +654,17 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
                }
        }
 
+       if (dev->invalidate_domains | dev->flush_domains)
+       {
+#if WATCH_EXEC
+               DRM_INFO ("%s: invalidate_domains %08x flush_domains %08x\n",
+                         __FUNCTION__, dev->invalidate_domains, dev->flush_domains);
+#endif
+               i915_gem_flush (dev, dev->invalidate_domains | dev->flush_domains);
+               dev->invalidate_domains = 0;
+               dev->flush_domains = 0;
+       }
+
        exec_offset = validate_list[args->buffer_count - 1].buffer_offset;
 
        /* make sure all previous memory operations have passed */
@@ -575,14 +684,9 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
                goto err;
        }
 
-       /* Flush the rendering.  We want this flush to go away, which will
-        * require intelligent cache management.
-        */
-       i915_gem_flush(dev);
-
-       /* Get a cookie representing the flush of the current buffer, which we
+       /* Get a cookie representing the execution of the current buffer, which we
         * can wait on.  We would like to mitigate these interrupts, likely by
-        * only flushing occasionally (so that we have *some* interrupts
+        * only creating cookies occasionally (so that we have *some* interrupts
         * representing completion of buffers that we can wait on when trying
         * to clear up gtt space).
         */