drm/i915: Restore engine->submit_request before unwedging

author Chris Wilson <chris@chris-wilson.co.uk>

Thu, 16 Mar 2017 17:13:04 +0000 (17:13 +0000)

committer Chris Wilson <chris@chris-wilson.co.uk>

Thu, 16 Mar 2017 17:17:14 +0000 (17:17 +0000)
author Chris Wilson <chris@chris-wilson.co.uk>
Thu, 16 Mar 2017 17:13:04 +0000 (17:13 +0000)
committer Chris Wilson <chris@chris-wilson.co.uk>
Thu, 16 Mar 2017 17:17:14 +0000 (17:17 +0000)
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c

index be3c812..03d9e45 100644 (file)
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1821,7 +1821,9 @@ void i915_reset(struct drm_i915_private *dev_priv)
                 return;
  
         /* Clear any previous failed attempts at recovery. Time to try again. */
-       __clear_bit(I915_WEDGED, &error->flags);
+       if (!i915_gem_unset_wedged(dev_priv))
+               goto wakeup;
+
         error->reset_count++;
  
         pr_notice("drm/i915: Resetting chip after gpu hang\n");
@@ -1867,17 +1869,18 @@ void i915_reset(struct drm_i915_private *dev_priv)
  
         i915_queue_hangcheck(dev_priv);
  
-wakeup:
+finish:
         i915_gem_reset_finish(dev_priv);
         enable_irq(dev_priv->drm.irq);
  
+wakeup:
         clear_bit(I915_RESET_HANDOFF, &error->flags);
         wake_up_bit(&error->flags, I915_RESET_HANDOFF);
         return;
  
  error:
         i915_gem_set_wedged(dev_priv);
-       goto wakeup;
+       goto finish;
  }
  
  static int i915_pm_suspend(struct device *kdev)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h

index fcc8184..2f28d3c 100644 (file)
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3441,6 +3441,7 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
  void i915_gem_reset(struct drm_i915_private *dev_priv);
  void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
  void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
+bool i915_gem_unset_wedged(struct drm_i915_private *dev_priv);
  
  void i915_gem_init_mmio(struct drm_i915_private *i915);
  int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c

index ecd1b03..fd611b4 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2997,6 +2997,65 @@ void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
         mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
  }
  
+bool i915_gem_unset_wedged(struct drm_i915_private *i915)
+{
+       struct i915_gem_timeline *tl;
+       int i;
+
+       lockdep_assert_held(&i915->drm.struct_mutex);
+       if (!test_bit(I915_WEDGED, &i915->gpu_error.flags))
+               return true;
+
+       /* Before unwedging, make sure that all pending operations
+        * are flushed and errored out - we may have requests waiting upon
+        * third party fences. We marked all inflight requests as EIO, and
+        * every execbuf since returned EIO, for consistency we want all
+        * the currently pending requests to also be marked as EIO, which
+        * is done inside our nop_submit_request - and so we must wait.
+        *
+        * No more can be submitted until we reset the wedged bit.
+        */
+       list_for_each_entry(tl, &i915->gt.timelines, link) {
+               for (i = 0; i < ARRAY_SIZE(tl->engine); i++) {
+                       struct drm_i915_gem_request *rq;
+
+                       rq = i915_gem_active_peek(&tl->engine[i].last_request,
+                                                 &i915->drm.struct_mutex);
+                       if (!rq)
+                               continue;
+
+                       /* We can't use our normal waiter as we want to
+                        * avoid recursively trying to handle the current
+                        * reset. The basic dma_fence_default_wait() installs
+                        * a callback for dma_fence_signal(), which is
+                        * triggered by our nop handler (indirectly, the
+                        * callback enables the signaler thread which is
+                        * woken by the nop_submit_request() advancing the seqno
+                        * and when the seqno passes the fence, the signaler
+                        * then signals the fence waking us up).
+                        */
+                       if (dma_fence_default_wait(&rq->fence, true,
+                                                  MAX_SCHEDULE_TIMEOUT) < 0)
+                               return false;
+               }
+       }
+
+       /* Undo nop_submit_request. We prevent all new i915 requests from
+        * being queued (by disallowing execbuf whilst wedged) so having
+        * waited for all active requests above, we know the system is idle
+        * and do not have to worry about a thread being inside
+        * engine->submit_request() as we swap over. So unlike installing
+        * the nop_submit_request on reset, we can do this from normal
+        * context and do not require stop_machine().
+        */
+       intel_engines_reset_default_submission(i915);
+
+       smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
+       clear_bit(I915_WEDGED, &i915->gpu_error.flags);
+
+       return true;
+}
+
  static void
  i915_gem_retire_work_handler(struct work_struct *work)
  {
author	Chris Wilson <chris@chris-wilson.co.uk>
	Thu, 16 Mar 2017 17:13:04 +0000 (17:13 +0000)
committer	Chris Wilson <chris@chris-wilson.co.uk>
	Thu, 16 Mar 2017 17:17:14 +0000 (17:17 +0000)
drivers/gpu/drm/i915/i915_drv.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_drv.h		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem.c		patch \| blob \| history