From 3eab3d9eef65041952fd7b15a2eba13cb308968d Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Thu, 25 Jun 2015 11:57:56 -0700
Subject: [PATCH] drm/vmwgfx: Add command buffer support v3

Add command buffer support.
Currently we don't implement preemption or fancy error handling.
Tested with a couple of mesa-demos, compiz/unity and viewperf maya-03.

v2:
- Synchronize with pending work at command buffer manager takedown.
- Add an interface to flush the current command buffer for latency-critical
  command batches and apply it to framebuffer dirtying.

v3:
- Minor fixes of definitions and typos to address reviews.
- Removed new or moved branch predictor hints.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Sinclair Yeh <syeh@vmware.com>
---
 drivers/gpu/drm/vmwgfx/Makefile         |    2 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c  |   13 +
 drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c  | 1315 +++++++++++++++++++++++++++++++
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c     |   26 +
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h     |   40 +
 drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c |  180 ++++-
 drivers/gpu/drm/vmwgfx/vmwgfx_fb.c      |    1 +
 drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c    |   67 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_irq.c     |   41 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.c     |    4 +
 10 files changed, 1656 insertions(+), 33 deletions(-)
 create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c

diff --git a/drivers/gpu/drm/vmwgfx/Makefile b/drivers/gpu/drm/vmwgfx/Makefile
index ce0ab951f507..529bc7217c72 100644
--- a/drivers/gpu/drm/vmwgfx/Makefile
+++ b/drivers/gpu/drm/vmwgfx/Makefile
@@ -7,6 +7,6 @@ vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \
 	    vmwgfx_overlay.o vmwgfx_marker.o vmwgfx_gmrid_manager.o \
 	    vmwgfx_fence.o vmwgfx_dmabuf.o vmwgfx_scrn.o vmwgfx_context.o \
 	    vmwgfx_surface.o vmwgfx_prime.o vmwgfx_mob.o vmwgfx_shader.o \
-	    vmwgfx_cmdbuf_res.o \
+	    vmwgfx_cmdbuf_res.o vmwgfx_cmdbuf.o \
 
 obj-$(CONFIG_DRM_VMWGFX) := vmwgfx.o
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
index cff2bf9db9d2..3b349fd2d12d 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
@@ -72,6 +72,12 @@ static struct ttm_place mob_placement_flags = {
 	.flags = VMW_PL_FLAG_MOB | TTM_PL_FLAG_CACHED
 };
 
+static struct ttm_place mob_ne_placement_flags = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.flags = VMW_PL_FLAG_MOB | TTM_PL_FLAG_CACHED | TTM_PL_FLAG_NO_EVICT
+};
+
 struct ttm_placement vmw_vram_placement = {
 	.num_placement = 1,
 	.placement = &vram_placement_flags,
@@ -200,6 +206,13 @@ struct ttm_placement vmw_mob_placement = {
 	.busy_placement = &mob_placement_flags
 };
 
+struct ttm_placement vmw_mob_ne_placement = {
+	.num_placement = 1,
+	.num_busy_placement = 1,
+	.placement = &mob_ne_placement_flags,
+	.busy_placement = &mob_ne_placement_flags
+};
+
 struct vmw_ttm_tt {
 	struct ttm_dma_tt dma_ttm;
 	struct vmw_private *dev_priv;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c
new file mode 100644
index 000000000000..b044bf530974
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c
@@ -0,0 +1,1315 @@
+/**************************************************************************
+ *
+ * Copyright Â© 2015 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "vmwgfx_drv.h"
+#include "ttm/ttm_bo_api.h"
+
+/*
+ * Size of inline command buffers. Try to make sure that a page size is a
+ * multiple of the DMA pool allocation size.
+ */
+#define VMW_CMDBUF_INLINE_ALIGN 64
+#define VMW_CMDBUF_INLINE_SIZE (1024 - VMW_CMDBUF_INLINE_ALIGN)
+
+/**
+ * struct vmw_cmdbuf_context - Command buffer context queues
+ *
+ * @submitted: List of command buffers that have been submitted to the
+ * manager but not yet submitted to hardware.
+ * @hw_submitted: List of command buffers submitted to hardware.
+ * @preempted: List of preempted command buffers.
+ * @num_hw_submitted: Number of buffers currently being processed by hardware
+ */
+struct vmw_cmdbuf_context {
+	struct list_head submitted;
+	struct list_head hw_submitted;
+	struct list_head preempted;
+	unsigned num_hw_submitted;
+};
+
+/**
+ * struct vmw_cmdbuf_man: - Command buffer manager
+ *
+ * @cur_mutex: Mutex protecting the command buffer used for incremental small
+ * kernel command submissions, @cur.
+ * @space_mutex: Mutex to protect against starvation when we allocate
+ * main pool buffer space.
+ * @work: A struct work_struct implementeing command buffer error handling.
+ * Immutable.
+ * @dev_priv: Pointer to the device private struct. Immutable.
+ * @ctx: Array of command buffer context queues. The queues and the context
+ * data is protected by @lock.
+ * @error: List of command buffers that have caused device errors.
+ * Protected by @lock.
+ * @mm: Range manager for the command buffer space. Manager allocations and
+ * frees are protected by @lock.
+ * @cmd_space: Buffer object for the command buffer space, unless we were
+ * able to make a contigous coherent DMA memory allocation, @handle. Immutable.
+ * @map_obj: Mapping state for @cmd_space. Immutable.
+ * @map: Pointer to command buffer space. May be a mapped buffer object or
+ * a contigous coherent DMA memory allocation. Immutable.
+ * @cur: Command buffer for small kernel command submissions. Protected by
+ * the @cur_mutex.
+ * @cur_pos: Space already used in @cur. Protected by @cur_mutex.
+ * @default_size: Default size for the @cur command buffer. Immutable.
+ * @max_hw_submitted: Max number of in-flight command buffers the device can
+ * handle. Immutable.
+ * @lock: Spinlock protecting command submission queues.
+ * @header: Pool of DMA memory for device command buffer headers.
+ * Internal protection.
+ * @dheaders: Pool of DMA memory for device command buffer headers with trailing
+ * space for inline data. Internal protection.
+ * @tasklet: Tasklet struct for irq processing. Immutable.
+ * @alloc_queue: Wait queue for processes waiting to allocate command buffer
+ * space.
+ * @idle_queue: Wait queue for processes waiting for command buffer idle.
+ * @irq_on: Whether the process function has requested irq to be turned on.
+ * Protected by @lock.
+ * @using_mob: Whether the command buffer space is a MOB or a contigous DMA
+ * allocation. Immutable.
+ * @has_pool: Has a large pool of DMA memory which allows larger allocations.
+ * Typically this is false only during bootstrap.
+ * @handle: DMA address handle for the command buffer space if @using_mob is
+ * false. Immutable.
+ * @size: The size of the command buffer space. Immutable.
+ */
+struct vmw_cmdbuf_man {
+	struct mutex cur_mutex;
+	struct mutex space_mutex;
+	struct work_struct work;
+	struct vmw_private *dev_priv;
+	struct vmw_cmdbuf_context ctx[SVGA_CB_CONTEXT_MAX];
+	struct list_head error;
+	struct drm_mm mm;
+	struct ttm_buffer_object *cmd_space;
+	struct ttm_bo_kmap_obj map_obj;
+	u8 *map;
+	struct vmw_cmdbuf_header *cur;
+	size_t cur_pos;
+	size_t default_size;
+	unsigned max_hw_submitted;
+	spinlock_t lock;
+	struct dma_pool *headers;
+	struct dma_pool *dheaders;
+	struct tasklet_struct tasklet;
+	wait_queue_head_t alloc_queue;
+	wait_queue_head_t idle_queue;
+	bool irq_on;
+	bool using_mob;
+	bool has_pool;
+	dma_addr_t handle;
+	size_t size;
+};
+
+/**
+ * struct vmw_cmdbuf_header - Command buffer metadata
+ *
+ * @man: The command buffer manager.
+ * @cb_header: Device command buffer header, allocated from a DMA pool.
+ * @cb_context: The device command buffer context.
+ * @list: List head for attaching to the manager lists.
+ * @node: The range manager node.
+ * @handle. The DMA address of @cb_header. Handed to the device on command
+ * buffer submission.
+ * @cmd: Pointer to the command buffer space of this buffer.
+ * @size: Size of the command buffer space of this buffer.
+ * @reserved: Reserved space of this buffer.
+ * @inline_space: Whether inline command buffer space is used.
+ */
+struct vmw_cmdbuf_header {
+	struct vmw_cmdbuf_man *man;
+	SVGACBHeader *cb_header;
+	SVGACBContext cb_context;
+	struct list_head list;
+	struct drm_mm_node *node;
+	dma_addr_t handle;
+	u8 *cmd;
+	size_t size;
+	size_t reserved;
+	bool inline_space;
+};
+
+/**
+ * struct vmw_cmdbuf_dheader - Device command buffer header with inline
+ * command buffer space.
+ *
+ * @cb_header: Device command buffer header.
+ * @cmd: Inline command buffer space.
+ */
+struct vmw_cmdbuf_dheader {
+	SVGACBHeader cb_header;
+	u8 cmd[VMW_CMDBUF_INLINE_SIZE] __aligned(VMW_CMDBUF_INLINE_ALIGN);
+};
+
+/**
+ * struct vmw_cmdbuf_alloc_info - Command buffer space allocation metadata
+ *
+ * @page_size: Size of requested command buffer space in pages.
+ * @node: The range manager node if allocation succeeded.
+ * @ret: Error code if failure. Otherwise 0.
+ */
+struct vmw_cmdbuf_alloc_info {
+	size_t page_size;
+	struct drm_mm_node *node;
+	int ret;
+};
+
+/* Loop over each context in the command buffer manager. */
+#define for_each_cmdbuf_ctx(_man, _i, _ctx) \
+	for (_i = 0, _ctx = &(_man)->ctx[0]; (_i) < SVGA_CB_CONTEXT_MAX; \
+	     ++(_i), ++(_ctx))
+
+static int vmw_cmdbuf_startstop(struct vmw_cmdbuf_man *man, bool enable);
+
+
+/**
+ * vmw_cmdbuf_cur_lock - Helper to lock the cur_mutex.
+ *
+ * @man: The range manager.
+ * @interruptible: Whether to wait interruptible when locking.
+ */
+static int vmw_cmdbuf_cur_lock(struct vmw_cmdbuf_man *man, bool interruptible)
+{
+	if (interruptible) {
+		if (mutex_lock_interruptible(&man->cur_mutex))
+			return -ERESTARTSYS;
+	} else {
+		mutex_lock(&man->cur_mutex);
+	}
+
+	return 0;
+}
+
+/**
+ * vmw_cmdbuf_cur_unlock - Helper to unlock the cur_mutex.
+ *
+ * @man: The range manager.
+ */
+static void vmw_cmdbuf_cur_unlock(struct vmw_cmdbuf_man *man)
+{
+	mutex_unlock(&man->cur_mutex);
+}
+
+/**
+ * vmw_cmdbuf_header_inline_free - Free a struct vmw_cmdbuf_header that has
+ * been used for the device context with inline command buffers.
+ * Need not be called locked.
+ *
+ * @header: Pointer to the header to free.
+ */
+static void vmw_cmdbuf_header_inline_free(struct vmw_cmdbuf_header *header)
+{
+	struct vmw_cmdbuf_dheader *dheader;
+
+	if (WARN_ON_ONCE(!header->inline_space))
+		return;
+
+	dheader = container_of(header->cb_header, struct vmw_cmdbuf_dheader,
+			       cb_header);
+	dma_pool_free(header->man->dheaders, dheader, header->handle);
+	kfree(header);
+}
+
+/**
+ * __vmw_cmdbuf_header_free - Free a struct vmw_cmdbuf_header  and its
+ * associated structures.
+ *
+ * header: Pointer to the header to free.
+ *
+ * For internal use. Must be called with man::lock held.
+ */
+static void __vmw_cmdbuf_header_free(struct vmw_cmdbuf_header *header)
+{
+	struct vmw_cmdbuf_man *man = header->man;
+
+	BUG_ON(!spin_is_locked(&man->lock));
+
+	if (header->inline_space) {
+		vmw_cmdbuf_header_inline_free(header);
+		return;
+	}
+
+	drm_mm_remove_node(header->node);
+	kfree(header->node);
+	header->node = NULL;
+	wake_up_all(&man->alloc_queue);
+	if (header->cb_header)
+		dma_pool_free(man->headers, header->cb_header,
+			      header->handle);
+	kfree(header);
+}
+
+/**
+ * vmw_cmdbuf_header_free - Free a struct vmw_cmdbuf_header  and its
+ * associated structures.
+ *
+ * @header: Pointer to the header to free.
+ */
+void vmw_cmdbuf_header_free(struct vmw_cmdbuf_header *header)
+{
+	struct vmw_cmdbuf_man *man = header->man;
+
+	/* Avoid locking if inline_space */
+	if (header->inline_space) {
+		vmw_cmdbuf_header_inline_free(header);
+		return;
+	}
+	spin_lock_bh(&man->lock);
+	__vmw_cmdbuf_header_free(header);
+	spin_unlock_bh(&man->lock);
+}
+
+
+/**
+ * vmw_cmbuf_header_submit: Submit a command buffer to hardware.
+ *
+ * @header: The header of the buffer to submit.
+ */
+static int vmw_cmdbuf_header_submit(struct vmw_cmdbuf_header *header)
+{
+	struct vmw_cmdbuf_man *man = header->man;
+	u32 val;
+
+	val = (header->handle >> 32);
+	vmw_write(man->dev_priv, SVGA_REG_COMMAND_HIGH, val);
+	val = (header->handle & 0xFFFFFFFFULL);
+	val |= header->cb_context & SVGA_CB_CONTEXT_MASK;
+	vmw_write(man->dev_priv, SVGA_REG_COMMAND_LOW, val);
+
+	return header->cb_header->status;
+}
+
+/**
+ * vmw_cmdbuf_ctx_init: Initialize a command buffer context.
+ *
+ * @ctx: The command buffer context to initialize
+ */
+static void vmw_cmdbuf_ctx_init(struct vmw_cmdbuf_context *ctx)
+{
+	INIT_LIST_HEAD(&ctx->hw_submitted);
+	INIT_LIST_HEAD(&ctx->submitted);
+	INIT_LIST_HEAD(&ctx->preempted);
+	ctx->num_hw_submitted = 0;
+}
+
+/**
+ * vmw_cmdbuf_ctx_submit: Submit command buffers from a command buffer
+ * context.
+ *
+ * @man: The command buffer manager.
+ * @ctx: The command buffer context.
+ *
+ * Submits command buffers to hardware until there are no more command
+ * buffers to submit or the hardware can't handle more command buffers.
+ */
+static void vmw_cmdbuf_ctx_submit(struct vmw_cmdbuf_man *man,
+				  struct vmw_cmdbuf_context *ctx)
+{
+	while (ctx->num_hw_submitted < man->max_hw_submitted &&
+	      !list_empty(&ctx->submitted)) {
+		struct vmw_cmdbuf_header *entry;
+		SVGACBStatus status;
+
+		entry = list_first_entry(&ctx->submitted,
+					 struct vmw_cmdbuf_header,
+					 list);
+
+		status = vmw_cmdbuf_header_submit(entry);
+
+		/* This should never happen */
+		if (WARN_ON_ONCE(status == SVGA_CB_STATUS_QUEUE_FULL)) {
+			entry->cb_header->status = SVGA_CB_STATUS_NONE;
+			break;
+		}
+
+		list_del(&entry->list);
+		list_add_tail(&entry->list, &ctx->hw_submitted);
+		ctx->num_hw_submitted++;
+	}
+
+}
+
+/**
+ * vmw_cmdbuf_ctx_submit: Process a command buffer context.
+ *
+ * @man: The command buffer manager.
+ * @ctx: The command buffer context.
+ *
+ * Submit command buffers to hardware if possible, and process finished
+ * buffers. Typically freeing them, but on preemption or error take
+ * appropriate action. Wake up waiters if appropriate.
+ */
+static void vmw_cmdbuf_ctx_process(struct vmw_cmdbuf_man *man,
+				   struct vmw_cmdbuf_context *ctx,
+				   int *notempty)
+{
+	struct vmw_cmdbuf_header *entry, *next;
+
+	vmw_cmdbuf_ctx_submit(man, ctx);
+
+	list_for_each_entry_safe(entry, next, &ctx->hw_submitted, list) {
+		SVGACBStatus status = entry->cb_header->status;
+
+		if (status == SVGA_CB_STATUS_NONE)
+			break;
+
+		list_del(&entry->list);
+		wake_up_all(&man->idle_queue);
+		ctx->num_hw_submitted--;
+		switch (status) {
+		case SVGA_CB_STATUS_COMPLETED:
+			__vmw_cmdbuf_header_free(entry);
+			break;
+		case SVGA_CB_STATUS_COMMAND_ERROR:
+		case SVGA_CB_STATUS_CB_HEADER_ERROR:
+			list_add_tail(&entry->list, &man->error);
+			schedule_work(&man->work);
+			break;
+		case SVGA_CB_STATUS_PREEMPTED:
+			list_add(&entry->list, &ctx->preempted);
+			break;
+		default:
+			WARN_ONCE(true, "Undefined command buffer status.\n");
+			__vmw_cmdbuf_header_free(entry);
+			break;
+		}
+	}
+
+	vmw_cmdbuf_ctx_submit(man, ctx);
+	if (!list_empty(&ctx->submitted))
+		(*notempty)++;
+}
+
+/**
+ * vmw_cmdbuf_man_process - Process all command buffer contexts and
+ * switch on and off irqs as appropriate.
+ *
+ * @man: The command buffer manager.
+ *
+ * Calls vmw_cmdbuf_ctx_process() on all contexts. If any context has
+ * command buffers left that are not submitted to hardware, Make sure
+ * IRQ handling is turned on. Otherwise, make sure it's turned off. This
+ * function may return -EAGAIN to indicate it should be rerun due to
+ * possibly missed IRQs if IRQs has just been turned on.
+ */
+static int vmw_cmdbuf_man_process(struct vmw_cmdbuf_man *man)
+{
+	int notempty = 0;
+	struct vmw_cmdbuf_context *ctx;
+	int i;
+
+	for_each_cmdbuf_ctx(man, i, ctx)
+		vmw_cmdbuf_ctx_process(man, ctx, &notempty);
+
+	if (man->irq_on && !notempty) {
+		vmw_generic_waiter_remove(man->dev_priv,
+					  SVGA_IRQFLAG_COMMAND_BUFFER,
+					  &man->dev_priv->cmdbuf_waiters);
+		man->irq_on = false;
+	} else if (!man->irq_on && notempty) {
+		vmw_generic_waiter_add(man->dev_priv,
+				       SVGA_IRQFLAG_COMMAND_BUFFER,
+				       &man->dev_priv->cmdbuf_waiters);
+		man->irq_on = true;
+
+		/* Rerun in case we just missed an irq. */
+		return -EAGAIN;
+	}
+
+	return 0;
+}
+
+/**
+ * vmw_cmdbuf_ctx_add - Schedule a command buffer for submission on a
+ * command buffer context
+ *
+ * @man: The command buffer manager.
+ * @header: The header of the buffer to submit.
+ * @cb_context: The command buffer context to use.
+ *
+ * This function adds @header to the "submitted" queue of the command
+ * buffer context identified by @cb_context. It then calls the command buffer
+ * manager processing to potentially submit the buffer to hardware.
+ * @man->lock needs to be held when calling this function.
+ */
+static void vmw_cmdbuf_ctx_add(struct vmw_cmdbuf_man *man,
+			       struct vmw_cmdbuf_header *header,
+			       SVGACBContext cb_context)
+{
+	if (!(header->cb_header->flags & SVGA_CB_FLAG_DX_CONTEXT))
+		header->cb_header->dxContext = 0;
+	header->cb_context = cb_context;
+	list_add_tail(&header->list, &man->ctx[cb_context].submitted);
+
+	if (vmw_cmdbuf_man_process(man) == -EAGAIN)
+		vmw_cmdbuf_man_process(man);
+}
+
+/**
+ * vmw_cmdbuf_man_tasklet - The main part of the command buffer interrupt
+ * handler implemented as a tasklet.
+ *
+ * @data: Tasklet closure. A pointer to the command buffer manager cast to
+ * an unsigned long.
+ *
+ * The bottom half (tasklet) of the interrupt handler simply calls into the
+ * command buffer processor to free finished buffers and submit any
+ * queued buffers to hardware.
+ */
+static void vmw_cmdbuf_man_tasklet(unsigned long data)
+{
+	struct vmw_cmdbuf_man *man = (struct vmw_cmdbuf_man *) data;
+
+	spin_lock(&man->lock);
+	if (vmw_cmdbuf_man_process(man) == -EAGAIN)
+		(void) vmw_cmdbuf_man_process(man);
+	spin_unlock(&man->lock);
+}
+
+/**
+ * vmw_cmdbuf_work_func - The deferred work function that handles
+ * command buffer errors.
+ *
+ * @work: The work func closure argument.
+ *
+ * Restarting the command buffer context after an error requires process
+ * context, so it is deferred to this work function.
+ */
+static void vmw_cmdbuf_work_func(struct work_struct *work)
+{
+	struct vmw_cmdbuf_man *man =
+		container_of(work, struct vmw_cmdbuf_man, work);
+	struct vmw_cmdbuf_header *entry, *next;
+	bool restart;
+
+	spin_lock_bh(&man->lock);
+	list_for_each_entry_safe(entry, next, &man->error, list) {
+		restart = true;
+		DRM_ERROR("Command buffer error.\n");
+
+		list_del(&entry->list);
+		__vmw_cmdbuf_header_free(entry);
+		wake_up_all(&man->idle_queue);
+	}
+	spin_unlock_bh(&man->lock);
+
+	if (restart && vmw_cmdbuf_startstop(man, true))
+		DRM_ERROR("Failed restarting command buffer context 0.\n");
+
+}
+
+/**
+ * vmw_cmdbuf_man idle - Check whether the command buffer manager is idle.
+ *
+ * @man: The command buffer manager.
+ * @check_preempted: Check also the preempted queue for pending command buffers.
+ *
+ */
+static bool vmw_cmdbuf_man_idle(struct vmw_cmdbuf_man *man,
+				bool check_preempted)
+{
+	struct vmw_cmdbuf_context *ctx;
+	bool idle = false;
+	int i;
+
+	spin_lock_bh(&man->lock);
+	vmw_cmdbuf_man_process(man);
+	for_each_cmdbuf_ctx(man, i, ctx) {
+		if (!list_empty(&ctx->submitted) ||
+		    !list_empty(&ctx->hw_submitted) ||
+		    (check_preempted && !list_empty(&ctx->preempted)))
+			goto out_unlock;
+	}
+
+	idle = list_empty(&man->error);
+
+out_unlock:
+	spin_unlock_bh(&man->lock);
+
+	return idle;
+}
+
+/**
+ * __vmw_cmdbuf_cur_flush - Flush the current command buffer for small kernel
+ * command submissions
+ *
+ * @man: The command buffer manager.
+ *
+ * Flushes the current command buffer without allocating a new one. A new one
+ * is automatically allocated when needed. Call with @man->cur_mutex held.
+ */
+static void __vmw_cmdbuf_cur_flush(struct vmw_cmdbuf_man *man)
+{
+	struct vmw_cmdbuf_header *cur = man->cur;
+
+	WARN_ON(!mutex_is_locked(&man->cur_mutex));
+
+	if (!cur)
+		return;
+
+	spin_lock_bh(&man->lock);
+	if (man->cur_pos == 0) {
+		__vmw_cmdbuf_header_free(cur);
+		goto out_unlock;
+	}
+
+	man->cur->cb_header->length = man->cur_pos;
+	vmw_cmdbuf_ctx_add(man, man->cur, SVGA_CB_CONTEXT_0);
+out_unlock:
+	spin_unlock_bh(&man->lock);
+	man->cur = NULL;
+	man->cur_pos = 0;
+}
+
+/**
+ * vmw_cmdbuf_cur_flush - Flush the current command buffer for small kernel
+ * command submissions
+ *
+ * @man: The command buffer manager.
+ * @interruptible: Whether to sleep interruptible when sleeping.
+ *
+ * Flushes the current command buffer without allocating a new one. A new one
+ * is automatically allocated when needed.
+ */
+int vmw_cmdbuf_cur_flush(struct vmw_cmdbuf_man *man,
+			 bool interruptible)
+{
+	int ret = vmw_cmdbuf_cur_lock(man, interruptible);
+
+	if (ret)
+		return ret;
+
+	__vmw_cmdbuf_cur_flush(man);
+	vmw_cmdbuf_cur_unlock(man);
+
+	return 0;
+}
+
+/**
+ * vmw_cmdbuf_idle - Wait for command buffer manager idle.
+ *
+ * @man: The command buffer manager.
+ * @interruptible: Sleep interruptible while waiting.
+ * @timeout: Time out after this many ticks.
+ *
+ * Wait until the command buffer manager has processed all command buffers,
+ * or until a timeout occurs. If a timeout occurs, the function will return
+ * -EBUSY.
+ */
+int vmw_cmdbuf_idle(struct vmw_cmdbuf_man *man, bool interruptible,
+		    unsigned long timeout)
+{
+	int ret;
+
+	ret = vmw_cmdbuf_cur_flush(man, interruptible);
+	vmw_generic_waiter_add(man->dev_priv,
+			       SVGA_IRQFLAG_COMMAND_BUFFER,
+			       &man->dev_priv->cmdbuf_waiters);
+
+	if (interruptible) {
+		ret = wait_event_interruptible_timeout
+			(man->idle_queue, vmw_cmdbuf_man_idle(man, true),
+			 timeout);
+	} else {
+		ret = wait_event_timeout
+			(man->idle_queue, vmw_cmdbuf_man_idle(man, true),
+			 timeout);
+	}
+	vmw_generic_waiter_remove(man->dev_priv,
+				  SVGA_IRQFLAG_COMMAND_BUFFER,
+				  &man->dev_priv->cmdbuf_waiters);
+	if (ret == 0) {
+		if (!vmw_cmdbuf_man_idle(man, true))
+			ret = -EBUSY;
+		else
+			ret = 0;
+	}
+	if (ret > 0)
+		ret = 0;
+
+	return ret;
+}
+
+/**
+ * vmw_cmdbuf_try_alloc - Try to allocate buffer space from the main pool.
+ *
+ * @man: The command buffer manager.
+ * @info: Allocation info. Will hold the size on entry and allocated mm node
+ * on successful return.
+ *
+ * Try to allocate buffer space from the main pool. Returns true if succeeded.
+ * If a fatal error was hit, the error code is returned in @info->ret.
+ */
+static bool vmw_cmdbuf_try_alloc(struct vmw_cmdbuf_man *man,
+				 struct vmw_cmdbuf_alloc_info *info)
+{
+	int ret;
+
+	if (info->node)
+		return true;
+
+	info->node = kzalloc(sizeof(*info->node), GFP_KERNEL);
+	if (!info->node) {
+		info->ret = -ENOMEM;
+		return true;
+	}
+
+	spin_lock_bh(&man->lock);
+	ret = drm_mm_insert_node_generic(&man->mm, info->node, info->page_size, 0, 0,
+					 DRM_MM_SEARCH_DEFAULT,
+					 DRM_MM_CREATE_DEFAULT);
+	spin_unlock_bh(&man->lock);
+	if (ret) {
+		kfree(info->node);
+		info->node = NULL;
+	}
+
+	return !!info->node;
+}
+
+/**
+ * vmw_cmdbuf_alloc_space - Allocate buffer space from the main pool.
+ *
+ * @man: The command buffer manager.
+ * @size: The size of the allocation.
+ * @interruptible: Whether to sleep interruptible while waiting for space.
+ *
+ * This function allocates buffer space from the main pool, and if there is
+ * no space available ATM, it turns on IRQ handling and sleeps waiting for it to
+ * become available.
+ */
+static struct drm_mm_node *vmw_cmdbuf_alloc_space(struct vmw_cmdbuf_man *man,
+						  size_t size,
+						  bool interruptible)
+{
+	struct vmw_cmdbuf_alloc_info info;
+
+	info.page_size = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	info.node = NULL;
+	info.ret = 0;
+
+	/*
+	 * To prevent starvation of large requests, only one allocating call
+	 * at a time waiting for space.
+	 */
+	if (interruptible) {
+		if (mutex_lock_interruptible(&man->space_mutex))
+			return ERR_PTR(-ERESTARTSYS);
+	} else {
+		mutex_lock(&man->space_mutex);
+	}
+
+	/* Try to allocate space without waiting. */
+	(void) vmw_cmdbuf_try_alloc(man, &info);
+	if (info.ret && !info.node) {
+		mutex_unlock(&man->space_mutex);
+		return ERR_PTR(info.ret);
+	}
+
+	if (info.node) {
+		mutex_unlock(&man->space_mutex);
+		return info.node;
+	}
+
+	vmw_generic_waiter_add(man->dev_priv,
+			       SVGA_IRQFLAG_COMMAND_BUFFER,
+			       &man->dev_priv->cmdbuf_waiters);
+
+	if (interruptible) {
+		int ret;
+
+		ret = wait_event_interruptible
+			(man->alloc_queue, vmw_cmdbuf_try_alloc(man, &info));
+		if (ret) {
+			vmw_generic_waiter_remove
+				(man->dev_priv, SVGA_IRQFLAG_COMMAND_BUFFER,
+				 &man->dev_priv->cmdbuf_waiters);
+			mutex_unlock(&man->space_mutex);
+			return ERR_PTR(ret);
+		}
+	} else {
+		wait_event(man->alloc_queue, vmw_cmdbuf_try_alloc(man, &info));
+	}
+	vmw_generic_waiter_remove(man->dev_priv,
+				  SVGA_IRQFLAG_COMMAND_BUFFER,
+				  &man->dev_priv->cmdbuf_waiters);
+	mutex_unlock(&man->space_mutex);
+	if (info.ret && !info.node)
+		return ERR_PTR(info.ret);
+
+	return info.node;
+}
+
+/**
+ * vmw_cmdbuf_space_pool - Set up a command buffer header with command buffer
+ * space from the main pool.
+ *
+ * @man: The command buffer manager.
+ * @header: Pointer to the header to set up.
+ * @size: The requested size of the buffer space.
+ * @interruptible: Whether to sleep interruptible while waiting for space.
+ */
+static int vmw_cmdbuf_space_pool(struct vmw_cmdbuf_man *man,
+				 struct vmw_cmdbuf_header *header,
+				 size_t size,
+				 bool interruptible)
+{
+	SVGACBHeader *cb_hdr;
+	size_t offset;
+	int ret;
+
+	if (!man->has_pool)
+		return -ENOMEM;
+
+	header->node = vmw_cmdbuf_alloc_space(man, size, interruptible);
+
+	if (IS_ERR(header->node))
+		return PTR_ERR(header->node);
+
+	header->cb_header = dma_pool_alloc(man->headers, GFP_KERNEL,
+					   &header->handle);
+	if (!header->cb_header) {
+		ret = -ENOMEM;
+		goto out_no_cb_header;
+	}
+
+	header->size = header->node->size << PAGE_SHIFT;
+	cb_hdr = header->cb_header;
+	offset = header->node->start << PAGE_SHIFT;
+	header->cmd = man->map + offset;
+	memset(cb_hdr, 0, sizeof(*cb_hdr));
+	if (man->using_mob) {
+		cb_hdr->flags = SVGA_CB_FLAG_MOB;
+		cb_hdr->ptr.mob.mobid = man->cmd_space->mem.start;
+		cb_hdr->ptr.mob.mobOffset = offset;
+	} else {
+		cb_hdr->ptr.pa = (u64)man->handle + (u64)offset;
+	}
+
+	return 0;
+
+out_no_cb_header:
+	spin_lock_bh(&man->lock);
+	drm_mm_remove_node(header->node);
+	spin_unlock_bh(&man->lock);
+	kfree(header->node);
+
+	return ret;
+}
+
+/**
+ * vmw_cmdbuf_space_inline - Set up a command buffer header with
+ * inline command buffer space.
+ *
+ * @man: The command buffer manager.
+ * @header: Pointer to the header to set up.
+ * @size: The requested size of the buffer space.
+ */
+static int vmw_cmdbuf_space_inline(struct vmw_cmdbuf_man *man,
+				   struct vmw_cmdbuf_header *header,
+				   int size)
+{
+	struct vmw_cmdbuf_dheader *dheader;
+	SVGACBHeader *cb_hdr;
+
+	if (WARN_ON_ONCE(size > VMW_CMDBUF_INLINE_SIZE))
+		return -ENOMEM;
+
+	dheader = dma_pool_alloc(man->dheaders, GFP_KERNEL,
+				 &header->handle);
+	if (!dheader)
+		return -ENOMEM;
+
+	header->inline_space = true;
+	header->size = VMW_CMDBUF_INLINE_SIZE;
+	cb_hdr = &dheader->cb_header;
+	header->cb_header = cb_hdr;
+	header->cmd = dheader->cmd;
+	memset(dheader, 0, sizeof(*dheader));
+	cb_hdr->status = SVGA_CB_STATUS_NONE;
+	cb_hdr->flags = SVGA_CB_FLAG_NONE;
+	cb_hdr->ptr.pa = (u64)header->handle +
+		(u64)offsetof(struct vmw_cmdbuf_dheader, cmd);
+
+	return 0;
+}
+
+/**
+ * vmw_cmdbuf_alloc - Allocate a command buffer header complete with
+ * command buffer space.
+ *
+ * @man: The command buffer manager.
+ * @size: The requested size of the buffer space.
+ * @interruptible: Whether to sleep interruptible while waiting for space.
+ * @p_header: points to a header pointer to populate on successful return.
+ *
+ * Returns a pointer to command buffer space if successful. Otherwise
+ * returns an error pointer. The header pointer returned in @p_header should
+ * be used for upcoming calls to vmw_cmdbuf_reserve() and vmw_cmdbuf_commit().
+ */
+void *vmw_cmdbuf_alloc(struct vmw_cmdbuf_man *man,
+		       size_t size, bool interruptible,
+		       struct vmw_cmdbuf_header **p_header)
+{
+	struct vmw_cmdbuf_header *header;
+	int ret = 0;
+
+	*p_header = NULL;
+
+	header = kzalloc(sizeof(*header), GFP_KERNEL);
+	if (!header)
+		return ERR_PTR(-ENOMEM);
+
+	if (size <= VMW_CMDBUF_INLINE_SIZE)
+		ret = vmw_cmdbuf_space_inline(man, header, size);
+	else
+		ret = vmw_cmdbuf_space_pool(man, header, size, interruptible);
+
+	if (ret) {
+		kfree(header);
+		return ERR_PTR(ret);
+	}
+
+	header->man = man;
+	INIT_LIST_HEAD(&header->list);
+	header->cb_header->status = SVGA_CB_STATUS_NONE;
+	*p_header = header;
+
+	return header->cmd;
+}
+
+/**
+ * vmw_cmdbuf_reserve_cur - Reserve space for commands in the current
+ * command buffer.
+ *
+ * @man: The command buffer manager.
+ * @size: The requested size of the commands.
+ * @ctx_id: The context id if any. Otherwise set to SVGA3D_REG_INVALID.
+ * @interruptible: Whether to sleep interruptible while waiting for space.
+ *
+ * Returns a pointer to command buffer space if successful. Otherwise
+ * returns an error pointer.
+ */
+static void *vmw_cmdbuf_reserve_cur(struct vmw_cmdbuf_man *man,
+				    size_t size,
+				    int ctx_id,
+				    bool interruptible)
+{
+	struct vmw_cmdbuf_header *cur;
+	void *ret;
+
+	if (vmw_cmdbuf_cur_lock(man, interruptible))
+		return ERR_PTR(-ERESTARTSYS);
+
+	cur = man->cur;
+	if (cur && (size + man->cur_pos > cur->size ||
+	    (ctx_id != SVGA3D_INVALID_ID &&
+	     (cur->cb_header->flags & SVGA_CB_FLAG_DX_CONTEXT) &&
+	     ctx_id != cur->cb_header->dxContext)))
+		__vmw_cmdbuf_cur_flush(man);
+
+	if (!man->cur) {
+		ret = vmw_cmdbuf_alloc(man,
+				       max_t(size_t, size, man->default_size),
+				       interruptible, &man->cur);
+		if (IS_ERR(ret)) {
+			vmw_cmdbuf_cur_unlock(man);
+			return ret;
+		}
+
+		cur = man->cur;
+	}
+
+	if (ctx_id != SVGA3D_INVALID_ID) {
+		cur->cb_header->flags |= SVGA_CB_FLAG_DX_CONTEXT;
+		cur->cb_header->dxContext = ctx_id;
+	}
+
+	cur->reserved = size;
+
+	return (void *) (man->cur->cmd + man->cur_pos);
+}
+
+/**
+ * vmw_cmdbuf_commit_cur - Commit commands in the current command buffer.
+ *
+ * @man: The command buffer manager.
+ * @size: The size of the commands actually written.
+ * @flush: Whether to flush the command buffer immediately.
+ */
+static void vmw_cmdbuf_commit_cur(struct vmw_cmdbuf_man *man,
+				  size_t size, bool flush)
+{
+	struct vmw_cmdbuf_header *cur = man->cur;
+
+	WARN_ON(!mutex_is_locked(&man->cur_mutex));
+
+	WARN_ON(size > cur->reserved);
+	man->cur_pos += size;
+	if (!size)
+		cur->cb_header->flags &= ~SVGA_CB_FLAG_DX_CONTEXT;
+	if (flush)
+		__vmw_cmdbuf_cur_flush(man);
+	vmw_cmdbuf_cur_unlock(man);
+}
+
+/**
+ * vmw_cmdbuf_reserve - Reserve space for commands in a command buffer.
+ *
+ * @man: The command buffer manager.
+ * @size: The requested size of the commands.
+ * @ctx_id: The context id if any. Otherwise set to SVGA3D_REG_INVALID.
+ * @interruptible: Whether to sleep interruptible while waiting for space.
+ * @header: Header of the command buffer. NULL if the current command buffer
+ * should be used.
+ *
+ * Returns a pointer to command buffer space if successful. Otherwise
+ * returns an error pointer.
+ */
+void *vmw_cmdbuf_reserve(struct vmw_cmdbuf_man *man, size_t size,
+			 int ctx_id, bool interruptible,
+			 struct vmw_cmdbuf_header *header)
+{
+	if (!header)
+		return vmw_cmdbuf_reserve_cur(man, size, ctx_id, interruptible);
+
+	if (size > header->size)
+		return ERR_PTR(-EINVAL);
+
+	if (ctx_id != SVGA3D_INVALID_ID) {
+		header->cb_header->flags |= SVGA_CB_FLAG_DX_CONTEXT;
+		header->cb_header->dxContext = ctx_id;
+	}
+
+	header->reserved = size;
+	return header->cmd;
+}
+
+/**
+ * vmw_cmdbuf_commit - Commit commands in a command buffer.
+ *
+ * @man: The command buffer manager.
+ * @size: The size of the commands actually written.
+ * @header: Header of the command buffer. NULL if the current command buffer
+ * should be used.
+ * @flush: Whether to flush the command buffer immediately.
+ */
+void vmw_cmdbuf_commit(struct vmw_cmdbuf_man *man, size_t size,
+		       struct vmw_cmdbuf_header *header, bool flush)
+{
+	if (!header) {
+		vmw_cmdbuf_commit_cur(man, size, flush);
+		return;
+	}
+
+	(void) vmw_cmdbuf_cur_lock(man, false);
+	__vmw_cmdbuf_cur_flush(man);
+	WARN_ON(size > header->reserved);
+	man->cur = header;
+	man->cur_pos = size;
+	if (!size)
+		header->cb_header->flags &= ~SVGA_CB_FLAG_DX_CONTEXT;
+	if (flush)
+		__vmw_cmdbuf_cur_flush(man);
+	vmw_cmdbuf_cur_unlock(man);
+}
+
+/**
+ * vmw_cmdbuf_tasklet_schedule - Schedule the interrupt handler bottom half.
+ *
+ * @man: The command buffer manager.
+ */
+void vmw_cmdbuf_tasklet_schedule(struct vmw_cmdbuf_man *man)
+{
+	if (!man)
+		return;
+
+	tasklet_schedule(&man->tasklet);
+}
+
+/**
+ * vmw_cmdbuf_send_device_command - Send a command through the device context.
+ *
+ * @man: The command buffer manager.
+ * @command: Pointer to the command to send.
+ * @size: Size of the command.
+ *
+ * Synchronously sends a device context command.
+ */
+static int vmw_cmdbuf_send_device_command(struct vmw_cmdbuf_man *man,
+					  const void *command,
+					  size_t size)
+{
+	struct vmw_cmdbuf_header *header;
+	int status;
+	void *cmd = vmw_cmdbuf_alloc(man, size, false, &header);
+
+	if (IS_ERR(cmd))
+		return PTR_ERR(cmd);
+
+	memcpy(cmd, command, size);
+	header->cb_header->length = size;
+	header->cb_context = SVGA_CB_CONTEXT_DEVICE;
+	spin_lock_bh(&man->lock);
+	status = vmw_cmdbuf_header_submit(header);
+	spin_unlock_bh(&man->lock);
+	vmw_cmdbuf_header_free(header);
+
+	if (status != SVGA_CB_STATUS_COMPLETED) {
+		DRM_ERROR("Device context command failed with status %d\n",
+			  status);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * vmw_cmdbuf_startstop - Send a start / stop command through the device
+ * context.
+ *
+ * @man: The command buffer manager.
+ * @enable: Whether to enable or disable the context.
+ *
+ * Synchronously sends a device start / stop context command.
+ */
+static int vmw_cmdbuf_startstop(struct vmw_cmdbuf_man *man,
+				bool enable)
+{
+	struct {
+		uint32 id;
+		SVGADCCmdStartStop body;
+	} __packed cmd;
+
+	cmd.id = SVGA_DC_CMD_START_STOP_CONTEXT;
+	cmd.body.enable = (enable) ? 1 : 0;
+	cmd.body.context = SVGA_CB_CONTEXT_0;
+
+	return vmw_cmdbuf_send_device_command(man, &cmd, sizeof(cmd));
+}
+
+/**
+ * vmw_cmdbuf_set_pool_size - Set command buffer manager sizes
+ *
+ * @man: The command buffer manager.
+ * @size: The size of the main space pool.
+ * @default_size: The default size of the command buffer for small kernel
+ * submissions.
+ *
+ * Set the size and allocate the main command buffer space pool,
+ * as well as the default size of the command buffer for
+ * small kernel submissions. If successful, this enables large command
+ * submissions. Note that this function requires that rudimentary command
+ * submission is already available and that the MOB memory manager is alive.
+ * Returns 0 on success. Negative error code on failure.
+ */
+int vmw_cmdbuf_set_pool_size(struct vmw_cmdbuf_man *man,
+			     size_t size, size_t default_size)
+{
+	struct vmw_private *dev_priv = man->dev_priv;
+	bool dummy;
+	int ret;
+
+	if (man->has_pool)
+		return -EINVAL;
+
+	/* First, try to allocate a huge chunk of DMA memory */
+	size = PAGE_ALIGN(size);
+	man->map = dma_alloc_coherent(&dev_priv->dev->pdev->dev, size,
+				      &man->handle, GFP_KERNEL);
+	if (man->map) {
+		man->using_mob = false;
+	} else {
+		/*
+		 * DMA memory failed. If we can have command buffers in a
+		 * MOB, try to use that instead. Note that this will
+		 * actually call into the already enabled manager, when
+		 * binding the MOB.
+		 */
+		if (!(dev_priv->capabilities & SVGA_CAP_CMD_BUFFERS_3))
+			return -ENOMEM;
+
+		ret = ttm_bo_create(&dev_priv->bdev, size, ttm_bo_type_device,
+				    &vmw_mob_ne_placement, 0, false, NULL,
+				    &man->cmd_space);
+		if (ret)
+			return ret;
+
+		man->using_mob = true;
+		ret = ttm_bo_kmap(man->cmd_space, 0, size >> PAGE_SHIFT,
+				  &man->map_obj);
+		if (ret)
+			goto out_no_map;
+
+		man->map = ttm_kmap_obj_virtual(&man->map_obj, &dummy);
+	}
+
+	man->size = size;
+	drm_mm_init(&man->mm, 0, size >> PAGE_SHIFT);
+
+	man->has_pool = true;
+	man->default_size = default_size;
+	DRM_INFO("Using command buffers with %s pool.\n",
+		 (man->using_mob) ? "MOB" : "DMA");
+
+	return 0;
+
+out_no_map:
+	if (man->using_mob)
+		ttm_bo_unref(&man->cmd_space);
+
+	return ret;
+}
+
+/**
+ * vmw_cmdbuf_man_create: Create a command buffer manager and enable it for
+ * inline command buffer submissions only.
+ *
+ * @dev_priv: Pointer to device private structure.
+ *
+ * Returns a pointer to a cummand buffer manager to success or error pointer
+ * on failure. The command buffer manager will be enabled for submissions of
+ * size VMW_CMDBUF_INLINE_SIZE only.
+ */
+struct vmw_cmdbuf_man *vmw_cmdbuf_man_create(struct vmw_private *dev_priv)
+{
+	struct vmw_cmdbuf_man *man;
+	struct vmw_cmdbuf_context *ctx;
+	int i;
+	int ret;
+
+	if (!(dev_priv->capabilities & SVGA_CAP_COMMAND_BUFFERS))
+		return ERR_PTR(-ENOSYS);
+
+	man = kzalloc(sizeof(*man), GFP_KERNEL);
+	if (!man)
+		return ERR_PTR(-ENOMEM);
+
+	man->headers = dma_pool_create("vmwgfx cmdbuf",
+				       &dev_priv->dev->pdev->dev,
+				       sizeof(SVGACBHeader),
+				       64, PAGE_SIZE);
+	if (!man->headers) {
+		ret = -ENOMEM;
+		goto out_no_pool;
+	}
+
+	man->dheaders = dma_pool_create("vmwgfx inline cmdbuf",
+					&dev_priv->dev->pdev->dev,
+					sizeof(struct vmw_cmdbuf_dheader),
+					64, PAGE_SIZE);
+	if (!man->dheaders) {
+		ret = -ENOMEM;
+		goto out_no_dpool;
+	}
+
+	for_each_cmdbuf_ctx(man, i, ctx)
+		vmw_cmdbuf_ctx_init(ctx);
+
+	INIT_LIST_HEAD(&man->error);
+	spin_lock_init(&man->lock);
+	mutex_init(&man->cur_mutex);
+	mutex_init(&man->space_mutex);
+	tasklet_init(&man->tasklet, vmw_cmdbuf_man_tasklet,
+		     (unsigned long) man);
+	man->default_size = VMW_CMDBUF_INLINE_SIZE;
+	init_waitqueue_head(&man->alloc_queue);
+	init_waitqueue_head(&man->idle_queue);
+	man->dev_priv = dev_priv;
+	man->max_hw_submitted = SVGA_CB_MAX_QUEUED_PER_CONTEXT - 1;
+	INIT_WORK(&man->work, &vmw_cmdbuf_work_func);
+	vmw_generic_waiter_add(dev_priv, SVGA_IRQFLAG_ERROR,
+			       &dev_priv->error_waiters);
+	ret = vmw_cmdbuf_startstop(man, true);
+	if (ret) {
+		DRM_ERROR("Failed starting command buffer context 0.\n");
+		vmw_cmdbuf_man_destroy(man);
+		return ERR_PTR(ret);
+	}
+
+	return man;
+
+out_no_dpool:
+	dma_pool_destroy(man->headers);
+out_no_pool:
+	kfree(man);
+
+	return ERR_PTR(ret);
+}
+
+/**
+ * vmw_cmdbuf_remove_pool - Take down the main buffer space pool.
+ *
+ * @man: Pointer to a command buffer manager.
+ *
+ * This function removes the main buffer space pool, and should be called
+ * before MOB memory management is removed. When this function has been called,
+ * only small command buffer submissions of size VMW_CMDBUF_INLINE_SIZE or
+ * less are allowed, and the default size of the command buffer for small kernel
+ * submissions is also set to this size.
+ */
+void vmw_cmdbuf_remove_pool(struct vmw_cmdbuf_man *man)
+{
+	if (!man->has_pool)
+		return;
+
+	man->has_pool = false;
+	man->default_size = VMW_CMDBUF_INLINE_SIZE;
+	(void) vmw_cmdbuf_idle(man, false, 10*HZ);
+	if (man->using_mob) {
+		(void) ttm_bo_kunmap(&man->map_obj);
+		ttm_bo_unref(&man->cmd_space);
+	} else {
+		dma_free_coherent(&man->dev_priv->dev->pdev->dev,
+				  man->size, man->map, man->handle);
+	}
+}
+
+/**
+ * vmw_cmdbuf_man_destroy - Take down a command buffer manager.
+ *
+ * @man: Pointer to a command buffer manager.
+ *
+ * This function idles and then destroys a command buffer manager.
+ */
+void vmw_cmdbuf_man_destroy(struct vmw_cmdbuf_man *man)
+{
+	WARN_ON_ONCE(man->has_pool);
+	(void) vmw_cmdbuf_idle(man, false, 10*HZ);
+	if (vmw_cmdbuf_startstop(man, false))
+		DRM_ERROR("Failed stopping command buffer context 0.\n");
+
+	vmw_generic_waiter_remove(man->dev_priv, SVGA_IRQFLAG_ERROR,
+				  &man->dev_priv->error_waiters);
+	tasklet_kill(&man->tasklet);
+	(void) cancel_work_sync(&man->work);
+	dma_pool_destroy(man->dheaders);
+	dma_pool_destroy(man->headers);
+	mutex_destroy(&man->cur_mutex);
+	mutex_destroy(&man->space_mutex);
+	kfree(man);
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index a4766acd0ea2..7e2b3c84119b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -278,6 +278,8 @@ static void vmw_print_capabilities(uint32_t capabilities)
 		DRM_INFO("  Command Buffers 2.\n");
 	if (capabilities & SVGA_CAP_GBOBJECTS)
 		DRM_INFO("  Guest Backed Resources.\n");
+	if (capabilities & SVGA_CAP_CMD_BUFFERS_3)
+		DRM_INFO("  Command Buffers 3.\n");
 }
 
 /**
@@ -362,6 +364,17 @@ static int vmw_request_device_late(struct vmw_private *dev_priv)
 		}
 	}
 
+	if (dev_priv->cman) {
+		ret = vmw_cmdbuf_set_pool_size(dev_priv->cman,
+					       256*4096, 2*4096);
+		if (ret) {
+			struct vmw_cmdbuf_man *man = dev_priv->cman;
+
+			dev_priv->cman = NULL;
+			vmw_cmdbuf_man_destroy(man);
+		}
+	}
+
 	return 0;
 }
 
@@ -375,6 +388,9 @@ static int vmw_request_device(struct vmw_private *dev_priv)
 		return ret;
 	}
 	vmw_fence_fifo_up(dev_priv->fman);
+	dev_priv->cman = vmw_cmdbuf_man_create(dev_priv);
+	if (IS_ERR(dev_priv->cman))
+		dev_priv->cman = NULL;
 
 	ret = vmw_request_device_late(dev_priv);
 	if (ret)
@@ -387,10 +403,14 @@ static int vmw_request_device(struct vmw_private *dev_priv)
 	return 0;
 
 out_no_query_bo:
+	if (dev_priv->cman)
+		vmw_cmdbuf_remove_pool(dev_priv->cman);
 	if (dev_priv->has_mob) {
 		(void) ttm_bo_evict_mm(&dev_priv->bdev, VMW_PL_MOB);
 		vmw_otables_takedown(dev_priv);
 	}
+	if (dev_priv->cman)
+		vmw_cmdbuf_man_destroy(dev_priv->cman);
 out_no_mob:
 	vmw_fence_fifo_down(dev_priv->fman);
 	vmw_fifo_release(dev_priv, &dev_priv->fifo);
@@ -415,6 +435,9 @@ static void vmw_release_device_early(struct vmw_private *dev_priv)
 	BUG_ON(dev_priv->pinned_bo != NULL);
 
 	ttm_bo_unref(&dev_priv->dummy_query_bo);
+	if (dev_priv->cman)
+		vmw_cmdbuf_remove_pool(dev_priv->cman);
+
 	if (dev_priv->has_mob) {
 		ttm_bo_evict_mm(&dev_priv->bdev, VMW_PL_MOB);
 		vmw_otables_takedown(dev_priv);
@@ -432,6 +455,9 @@ static void vmw_release_device_early(struct vmw_private *dev_priv)
 static void vmw_release_device_late(struct vmw_private *dev_priv)
 {
 	vmw_fence_fifo_down(dev_priv->fman);
+	if (dev_priv->cman)
+		vmw_cmdbuf_man_destroy(dev_priv->cman);
+
 	vmw_fifo_release(dev_priv, &dev_priv->fifo);
 }
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index a5f221eaf076..8fd40c6bad06 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -453,6 +453,8 @@ struct vmw_private {
 	spinlock_t waiter_lock;
 	int fence_queue_waiters; /* Protected by waiter_lock */
 	int goal_queue_waiters; /* Protected by waiter_lock */
+	int cmdbuf_waiters; /* Protected by irq_lock */
+	int error_waiters; /* Protected by irq_lock */
 	atomic_t fifo_queue_waiters;
 	uint32_t last_read_seqno;
 	spinlock_t irq_lock;
@@ -535,6 +537,8 @@ struct vmw_private {
 	 */
 	struct ttm_buffer_object *otable_bo;
 	struct vmw_otable *otables;
+
+	struct vmw_cmdbuf_man *cman;
 };
 
 static inline struct vmw_surface *vmw_res_to_srf(struct vmw_resource *res)
@@ -729,6 +733,8 @@ extern bool vmw_fifo_have_3d(struct vmw_private *dev_priv);
 extern bool vmw_fifo_have_pitchlock(struct vmw_private *dev_priv);
 extern int vmw_fifo_emit_dummy_query(struct vmw_private *dev_priv,
 				     uint32_t cid);
+extern int vmw_fifo_flush(struct vmw_private *dev_priv,
+			  bool interruptible);
 
 /**
  * TTM glue - vmwgfx_ttm_glue.c
@@ -753,6 +759,7 @@ extern struct ttm_placement vmw_sys_ne_placement;
 extern struct ttm_placement vmw_evictable_placement;
 extern struct ttm_placement vmw_srf_placement;
 extern struct ttm_placement vmw_mob_placement;
+extern struct ttm_placement vmw_mob_ne_placement;
 extern struct ttm_bo_driver vmw_bo_driver;
 extern int vmw_dma_quiescent(struct drm_device *dev);
 extern int vmw_bo_map_dma(struct ttm_buffer_object *bo);
@@ -855,6 +862,10 @@ extern void vmw_seqno_waiter_add(struct vmw_private *dev_priv);
 extern void vmw_seqno_waiter_remove(struct vmw_private *dev_priv);
 extern void vmw_goal_waiter_add(struct vmw_private *dev_priv);
 extern void vmw_goal_waiter_remove(struct vmw_private *dev_priv);
+extern void vmw_generic_waiter_add(struct vmw_private *dev_priv, u32 flag,
+				   int *waiter_count);
+extern void vmw_generic_waiter_remove(struct vmw_private *dev_priv,
+				      u32 flag, int *waiter_count);
 
 /**
  * Rudimentary fence-like objects currently used only for throttling -
@@ -1077,6 +1088,35 @@ extern int vmw_cmdbuf_res_remove(struct vmw_cmdbuf_res_manager *man,
 				 struct list_head *list);
 
 
+/*
+ * Command buffer managerment vmwgfx_cmdbuf.c
+ */
+struct vmw_cmdbuf_man;
+struct vmw_cmdbuf_header;
+
+extern struct vmw_cmdbuf_man *
+vmw_cmdbuf_man_create(struct vmw_private *dev_priv);
+extern int vmw_cmdbuf_set_pool_size(struct vmw_cmdbuf_man *man,
+				    size_t size, size_t default_size);
+extern void vmw_cmdbuf_remove_pool(struct vmw_cmdbuf_man *man);
+extern void vmw_cmdbuf_man_destroy(struct vmw_cmdbuf_man *man);
+extern int vmw_cmdbuf_idle(struct vmw_cmdbuf_man *man, bool interruptible,
+			   unsigned long timeout);
+extern void *vmw_cmdbuf_reserve(struct vmw_cmdbuf_man *man, size_t size,
+				int ctx_id, bool interruptible,
+				struct vmw_cmdbuf_header *header);
+extern void vmw_cmdbuf_commit(struct vmw_cmdbuf_man *man, size_t size,
+			      struct vmw_cmdbuf_header *header,
+			      bool flush);
+extern void vmw_cmdbuf_tasklet_schedule(struct vmw_cmdbuf_man *man);
+extern void *vmw_cmdbuf_alloc(struct vmw_cmdbuf_man *man,
+			      size_t size, bool interruptible,
+			      struct vmw_cmdbuf_header **p_header);
+extern void vmw_cmdbuf_header_free(struct vmw_cmdbuf_header *header);
+extern int vmw_cmdbuf_cur_flush(struct vmw_cmdbuf_man *man,
+				bool interruptible);
+
+
 /**
  * Inline helper functions
  */
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 654c8daeb5ab..0792d8d59315 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -2417,7 +2417,126 @@ vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv,
 	}
 }
 
+/**
+ * vmw_execbuf_submit_fifo - Patch a command batch and submit it using
+ * the fifo.
+ *
+ * @dev_priv: Pointer to a device private structure.
+ * @kernel_commands: Pointer to the unpatched command batch.
+ * @command_size: Size of the unpatched command batch.
+ * @sw_context: Structure holding the relocation lists.
+ *
+ * Side effects: If this function returns 0, then the command batch
+ * pointed to by @kernel_commands will have been modified.
+ */
+static int vmw_execbuf_submit_fifo(struct vmw_private *dev_priv,
+				   void *kernel_commands,
+				   u32 command_size,
+				   struct vmw_sw_context *sw_context)
+{
+	void *cmd = vmw_fifo_reserve(dev_priv, command_size);
+
+	if (!cmd) {
+		DRM_ERROR("Failed reserving fifo space for commands.\n");
+		return -ENOMEM;
+	}
+
+	vmw_apply_relocations(sw_context);
+	memcpy(cmd, kernel_commands, command_size);
+	vmw_resource_relocations_apply(cmd, &sw_context->res_relocations);
+	vmw_resource_relocations_free(&sw_context->res_relocations);
+	vmw_fifo_commit(dev_priv, command_size);
+
+	return 0;
+}
 
+/**
+ * vmw_execbuf_submit_cmdbuf - Patch a command batch and submit it using
+ * the command buffer manager.
+ *
+ * @dev_priv: Pointer to a device private structure.
+ * @header: Opaque handle to the command buffer allocation.
+ * @command_size: Size of the unpatched command batch.
+ * @sw_context: Structure holding the relocation lists.
+ *
+ * Side effects: If this function returns 0, then the command buffer
+ * represented by @header will have been modified.
+ */
+static int vmw_execbuf_submit_cmdbuf(struct vmw_private *dev_priv,
+				     struct vmw_cmdbuf_header *header,
+				     u32 command_size,
+				     struct vmw_sw_context *sw_context)
+{
+	void *cmd = vmw_cmdbuf_reserve(dev_priv->cman, command_size,
+				       SVGA3D_INVALID_ID, false, header);
+
+	vmw_apply_relocations(sw_context);
+	vmw_resource_relocations_apply(cmd, &sw_context->res_relocations);
+	vmw_resource_relocations_free(&sw_context->res_relocations);
+	vmw_cmdbuf_commit(dev_priv->cman, command_size, header, false);
+
+	return 0;
+}
+
+/**
+ * vmw_execbuf_cmdbuf - Prepare, if possible, a user-space command batch for
+ * submission using a command buffer.
+ *
+ * @dev_priv: Pointer to a device private structure.
+ * @user_commands: User-space pointer to the commands to be submitted.
+ * @command_size: Size of the unpatched command batch.
+ * @header: Out parameter returning the opaque pointer to the command buffer.
+ *
+ * This function checks whether we can use the command buffer manager for
+ * submission and if so, creates a command buffer of suitable size and
+ * copies the user data into that buffer.
+ *
+ * On successful return, the function returns a pointer to the data in the
+ * command buffer and *@header is set to non-NULL.
+ * If command buffers could not be used, the function will return the value
+ * of @kernel_commands on function call. That value may be NULL. In that case,
+ * the value of *@header will be set to NULL.
+ * If an error is encountered, the function will return a pointer error value.
+ * If the function is interrupted by a signal while sleeping, it will return
+ * -ERESTARTSYS casted to a pointer error value.
+ */
+void *vmw_execbuf_cmdbuf(struct vmw_private *dev_priv,
+			 void __user *user_commands,
+			 void *kernel_commands,
+			 u32 command_size,
+			 struct vmw_cmdbuf_header **header)
+{
+	size_t cmdbuf_size;
+	int ret;
+
+	*header = NULL;
+	if (!dev_priv->cman || kernel_commands)
+		return kernel_commands;
+
+	if (command_size > SVGA_CB_MAX_SIZE) {
+		DRM_ERROR("Command buffer is too large.\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* If possible, add a little space for fencing. */
+	cmdbuf_size = command_size + 512;
+	cmdbuf_size = min_t(size_t, cmdbuf_size, SVGA_CB_MAX_SIZE);
+	kernel_commands = vmw_cmdbuf_alloc(dev_priv->cman, cmdbuf_size,
+					   true, header);
+	if (IS_ERR(kernel_commands))
+		return kernel_commands;
+
+	ret = copy_from_user(kernel_commands, user_commands,
+			     command_size);
+	if (ret) {
+		DRM_ERROR("Failed copying commands.\n");
+		vmw_cmdbuf_header_free(*header);
+		*header = NULL;
+		return ERR_PTR(-EFAULT);
+	}
+
+	return kernel_commands;
+}
 
 int vmw_execbuf_process(struct drm_file *file_priv,
 			struct vmw_private *dev_priv,
@@ -2432,18 +2551,33 @@ int vmw_execbuf_process(struct drm_file *file_priv,
 	struct vmw_fence_obj *fence = NULL;
 	struct vmw_resource *error_resource;
 	struct list_head resource_list;
+	struct vmw_cmdbuf_header *header;
 	struct ww_acquire_ctx ticket;
 	uint32_t handle;
-	void *cmd;
 	int ret;
 
+     	if (throttle_us) {
+		ret = vmw_wait_lag(dev_priv, &dev_priv->fifo.marker_queue,
+				   throttle_us);
+		
+		if (ret)
+			return ret;
+	}
+	
+	kernel_commands = vmw_execbuf_cmdbuf(dev_priv, user_commands,
+					     kernel_commands, command_size,
+					     &header);
+	if (IS_ERR(kernel_commands))
+		return PTR_ERR(kernel_commands);
+
 	ret = mutex_lock_interruptible(&dev_priv->cmdbuf_mutex);
-	if (unlikely(ret != 0))
-		return -ERESTARTSYS;
+	if (ret) {
+		ret = -ERESTARTSYS;
+		goto out_free_header;
+	}
 
+	sw_context->kernel = false;
 	if (kernel_commands == NULL) {
-		sw_context->kernel = false;
-
 		ret = vmw_resize_cmd_bounce(sw_context, command_size);
 		if (unlikely(ret != 0))
 			goto out_unlock;
@@ -2458,7 +2592,7 @@ int vmw_execbuf_process(struct drm_file *file_priv,
 			goto out_unlock;
 		}
 		kernel_commands = sw_context->cmd_bounce;
-	} else
+	} else if (!header)
 		sw_context->kernel = true;
 
 	sw_context->fp = vmw_fpriv(file_priv);
@@ -2478,7 +2612,6 @@ int vmw_execbuf_process(struct drm_file *file_priv,
 		sw_context->res_ht_initialized = true;
 	}
 	INIT_LIST_HEAD(&sw_context->staged_cmd_res);
-
 	INIT_LIST_HEAD(&resource_list);
 	ret = vmw_cmd_check_all(dev_priv, sw_context, kernel_commands,
 				command_size);
@@ -2502,14 +2635,6 @@ int vmw_execbuf_process(struct drm_file *file_priv,
 	if (unlikely(ret != 0))
 		goto out_err;
 
-	if (throttle_us) {
-		ret = vmw_wait_lag(dev_priv, &dev_priv->fifo.marker_queue,
-				   throttle_us);
-
-		if (unlikely(ret != 0))
-			goto out_err;
-	}
-
 	ret = mutex_lock_interruptible(&dev_priv->binding_mutex);
 	if (unlikely(ret != 0)) {
 		ret = -ERESTARTSYS;
@@ -2522,20 +2647,16 @@ int vmw_execbuf_process(struct drm_file *file_priv,
 			goto out_unlock_binding;
 	}
 
-	cmd = vmw_fifo_reserve(dev_priv, command_size);
-	if (unlikely(cmd == NULL)) {
-		DRM_ERROR("Failed reserving fifo space for commands.\n");
-		ret = -ENOMEM;
-		goto out_unlock_binding;
+	if (!header) {
+		ret = vmw_execbuf_submit_fifo(dev_priv, kernel_commands,
+					      command_size, sw_context);
+	} else {
+		ret = vmw_execbuf_submit_cmdbuf(dev_priv, header, command_size,
+						sw_context);
+		header = NULL;
 	}
-
-	vmw_apply_relocations(sw_context);
-	memcpy(cmd, kernel_commands, command_size);
-
-	vmw_resource_relocations_apply(cmd, &sw_context->res_relocations);
-	vmw_resource_relocations_free(&sw_context->res_relocations);
-
-	vmw_fifo_commit(dev_priv, command_size);
+	if (ret)
+		goto out_unlock_binding;
 
 	vmw_query_bo_switch_commit(dev_priv, sw_context);
 	ret = vmw_execbuf_fence_commands(file_priv, dev_priv,
@@ -2610,6 +2731,9 @@ out_unlock:
 	vmw_resource_list_unreference(&resource_list);
 	if (unlikely(error_resource != NULL))
 		vmw_resource_unreference(&error_resource);
+out_free_header:
+	if (header)
+		vmw_cmdbuf_header_free(header);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
index ecdc8d99f2fb..d0a3bcf5c0d2 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
@@ -257,6 +257,7 @@ static void vmw_fb_dirty_flush(struct vmw_fb_par *par)
 	cmd->body.width = cpu_to_le32(w);
 	cmd->body.height = cpu_to_le32(h);
 	vmw_fifo_commit(vmw_priv, sizeof(*cmd));
+	vmw_fifo_flush(vmw_priv, false);
 }
 
 static void vmw_fb_dirty_mark(struct vmw_fb_par *par,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
index cd5d9f3fe0e0..189102d0ac8b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
@@ -310,7 +310,8 @@ static int vmw_fifo_wait(struct vmw_private *dev_priv,
  * Returns:
  *   Pointer to the fifo, or null on error (possible hardware hang).
  */
-void *vmw_fifo_reserve(struct vmw_private *dev_priv, uint32_t bytes)
+static void *vmw_local_fifo_reserve(struct vmw_private *dev_priv,
+				    uint32_t bytes)
 {
 	struct vmw_fifo_state *fifo_state = &dev_priv->fifo;
 	__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
@@ -389,9 +390,29 @@ void *vmw_fifo_reserve(struct vmw_private *dev_priv, uint32_t bytes)
 out_err:
 	fifo_state->reserved_size = 0;
 	mutex_unlock(&fifo_state->fifo_mutex);
+
 	return NULL;
 }
 
+void *vmw_fifo_reserve(struct vmw_private *dev_priv, uint32_t bytes)
+{
+	void *ret;
+
+	if (dev_priv->cman)
+		ret = vmw_cmdbuf_reserve(dev_priv->cman, bytes,
+					 SVGA3D_INVALID_ID, false, NULL);
+	else
+		ret = vmw_local_fifo_reserve(dev_priv, bytes);
+	if (IS_ERR_OR_NULL(ret)) {
+		DRM_ERROR("Fifo reserve failure of %u bytes.\n",
+			  (unsigned) bytes);
+		dump_stack();
+		return NULL;
+	}
+
+	return ret;
+}
+
 static void vmw_fifo_res_copy(struct vmw_fifo_state *fifo_state,
 			      __le32 __iomem *fifo_mem,
 			      uint32_t next_cmd,
@@ -434,7 +455,7 @@ static void vmw_fifo_slow_copy(struct vmw_fifo_state *fifo_state,
 	}
 }
 
-void vmw_fifo_commit(struct vmw_private *dev_priv, uint32_t bytes)
+void vmw_local_fifo_commit(struct vmw_private *dev_priv, uint32_t bytes)
 {
 	struct vmw_fifo_state *fifo_state = &dev_priv->fifo;
 	__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
@@ -480,6 +501,46 @@ void vmw_fifo_commit(struct vmw_private *dev_priv, uint32_t bytes)
 	mutex_unlock(&fifo_state->fifo_mutex);
 }
 
+void vmw_fifo_commit(struct vmw_private *dev_priv, uint32_t bytes)
+{
+	if (dev_priv->cman)
+		vmw_cmdbuf_commit(dev_priv->cman, bytes, NULL, false);
+	else
+		vmw_local_fifo_commit(dev_priv, bytes);
+}
+
+
+/**
+ * vmw_fifo_commit_flush - Commit fifo space and flush any buffered commands.
+ *
+ * @dev_priv: Pointer to device private structure.
+ * @bytes: Number of bytes to commit.
+ */
+static void vmw_fifo_commit_flush(struct vmw_private *dev_priv, uint32_t bytes)
+{
+	if (dev_priv->cman)
+		vmw_cmdbuf_commit(dev_priv->cman, bytes, NULL, true);
+	else
+		vmw_local_fifo_commit(dev_priv, bytes);
+}
+
+/**
+ * vmw_fifo_flush - Flush any buffered commands and make sure command processing
+ * starts.
+ *
+ * @dev_priv: Pointer to device private structure.
+ * @interruptible: Whether to wait interruptible if function needs to sleep.
+ */
+int vmw_fifo_flush(struct vmw_private *dev_priv, bool interruptible)
+{
+	might_sleep();
+
+	if (dev_priv->cman)
+		return vmw_cmdbuf_cur_flush(dev_priv->cman, interruptible);
+	else
+		return 0;
+}
+
 int vmw_fifo_send_fence(struct vmw_private *dev_priv, uint32_t *seqno)
 {
 	struct vmw_fifo_state *fifo_state = &dev_priv->fifo;
@@ -517,7 +578,7 @@ int vmw_fifo_send_fence(struct vmw_private *dev_priv, uint32_t *seqno)
 	    ((unsigned long)fm + sizeof(__le32));
 
 	iowrite32(*seqno, &cmd_fence->fence);
-	vmw_fifo_commit(dev_priv, bytes);
+	vmw_fifo_commit_flush(dev_priv, bytes);
 	(void) vmw_marker_push(&fifo_state->marker_queue, *seqno);
 	vmw_update_seqno(dev_priv, fifo_state);
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
index 9fe9827ee499..87964bb0704e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
@@ -56,6 +56,9 @@ irqreturn_t vmw_irq_handler(int irq, void *arg)
 	if (masked_status & SVGA_IRQFLAG_FIFO_PROGRESS)
 		wake_up_all(&dev_priv->fifo_queue);
 
+	if (masked_status & (SVGA_IRQFLAG_COMMAND_BUFFER |
+			     SVGA_IRQFLAG_ERROR))
+		vmw_cmdbuf_tasklet_schedule(dev_priv->cman);
 
 	return IRQ_HANDLED;
 }
@@ -131,8 +134,16 @@ int vmw_fallback_wait(struct vmw_private *dev_priv,
 	 * Block command submission while waiting for idle.
 	 */
 
-	if (fifo_idle)
+	if (fifo_idle) {
 		down_read(&fifo_state->rwsem);
+		if (dev_priv->cman) {
+			ret = vmw_cmdbuf_idle(dev_priv->cman, interruptible,
+					      10*HZ);
+			if (ret)
+				goto out_err;
+		}
+	}
+
 	signal_seq = atomic_read(&dev_priv->marker_seq);
 	ret = 0;
 
@@ -171,6 +182,7 @@ int vmw_fallback_wait(struct vmw_private *dev_priv,
 		iowrite32(signal_seq, fifo_mem + SVGA_FIFO_FENCE);
 	}
 	wake_up_all(&dev_priv->fence_queue);
+out_err:
 	if (fifo_idle)
 		up_read(&fifo_state->rwsem);
 
@@ -315,3 +327,30 @@ void vmw_irq_uninstall(struct drm_device *dev)
 	status = inl(dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
 	outl(status, dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
 }
+
+void vmw_generic_waiter_add(struct vmw_private *dev_priv,
+			    u32 flag, int *waiter_count)
+{
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
+	if ((*waiter_count)++ == 0) {
+		outl(flag, dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
+		dev_priv->irq_mask |= flag;
+		vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask);
+	}
+	spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
+}
+
+void vmw_generic_waiter_remove(struct vmw_private *dev_priv,
+			       u32 flag, int *waiter_count)
+{
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
+	if (--(*waiter_count) == 0) {
+		dev_priv->irq_mask &= ~flag;
+		vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask);
+	}
+	spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 07cda8cbbddb..b5632c25d94e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -631,6 +631,7 @@ static int vmw_framebuffer_surface_dirty(struct drm_framebuffer *framebuffer,
 				   flags, color,
 				   clips, num_clips, inc, NULL);
 
+	vmw_fifo_flush(dev_priv, false);
 	ttm_read_unlock(&dev_priv->reservation_sem);
 
 	drm_modeset_unlock_all(dev_priv->dev);
@@ -987,6 +988,7 @@ static int vmw_framebuffer_dmabuf_dirty(struct drm_framebuffer *framebuffer,
 					  clips, num_clips, increment, NULL);
 	}
 
+	vmw_fifo_flush(dev_priv, false);
 	ttm_read_unlock(&dev_priv->reservation_sem);
 
 	drm_modeset_unlock_all(dev_priv->dev);
@@ -1347,6 +1349,8 @@ int vmw_kms_present(struct vmw_private *dev_priv,
 			break;
 	}
 
+	vmw_fifo_flush(dev_priv, false);
+
 	kfree(cmd);
 out_free_tmp:
 	kfree(tmp);
-- 
2.11.0