drm/i915: Convert hangcheck from a timer into a delayed work item

author Chris Wilson <chris@chris-wilson.co.uk>

Mon, 26 Jan 2015 16:03:03 +0000 (18:03 +0200)

committer Daniel Vetter <daniel.vetter@ffwll.ch>

Wed, 28 Jan 2015 16:22:12 +0000 (17:22 +0100)
author Chris Wilson <chris@chris-wilson.co.uk>
Mon, 26 Jan 2015 16:03:03 +0000 (18:03 +0200)
committer Daniel Vetter <daniel.vetter@ffwll.ch>
Wed, 28 Jan 2015 16:22:12 +0000 (17:22 +0100)
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c

index 51e8fe5..6eaf795 100644 (file)
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -790,6 +790,14 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
                 goto out_freewq;
         }
  
+       dev_priv->gpu_error.hangcheck_wq =
+               alloc_ordered_workqueue("i915-hangcheck", 0);
+       if (dev_priv->gpu_error.hangcheck_wq == NULL) {
+               DRM_ERROR("Failed to create our hangcheck workqueue.\n");
+               ret = -ENOMEM;
+               goto out_freedpwq;
+       }
+
         intel_irq_init(dev_priv);
         intel_uncore_sanitize(dev);
  
@@ -864,6 +872,8 @@ out_gem_unload:
         intel_teardown_gmbus(dev);
         intel_teardown_mchbar(dev);
         pm_qos_remove_request(&dev_priv->pm_qos);
+       destroy_workqueue(dev_priv->gpu_error.hangcheck_wq);
+out_freedpwq:
         destroy_workqueue(dev_priv->dp_wq);
  out_freewq:
         destroy_workqueue(dev_priv->wq);
@@ -934,7 +944,7 @@ int i915_driver_unload(struct drm_device *dev)
         }
  
         /* Free error state after interrupts are fully disabled. */
-       del_timer_sync(&dev_priv->gpu_error.hangcheck_timer);
+       cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
         cancel_work_sync(&dev_priv->gpu_error.work);
         i915_destroy_error_state(dev);
  
@@ -960,6 +970,7 @@ int i915_driver_unload(struct drm_device *dev)
  
         destroy_workqueue(dev_priv->dp_wq);
         destroy_workqueue(dev_priv->wq);
+       destroy_workqueue(dev_priv->gpu_error.hangcheck_wq);
         pm_qos_remove_request(&dev_priv->pm_qos);
  
         i915_global_gtt_cleanup(dev);
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c

index 9da4e60..5f50e70 100644 (file)
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1402,7 +1402,7 @@ static int intel_runtime_suspend(struct device *device)
                 return ret;
         }
  
-       del_timer_sync(&dev_priv->gpu_error.hangcheck_timer);
+       cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
         intel_uncore_forcewake_reset(dev, false);
         dev_priv->pm.suspended = true;
  
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h

index 760d239..7aee7d5 100644 (file)
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1345,7 +1345,8 @@ struct i915_gpu_error {
         /* Hang gpu twice in this window and your context gets banned */
  #define DRM_I915_CTX_BAN_PERIOD DIV_ROUND_UP(8*DRM_I915_HANGCHECK_PERIOD, 1000)
  
-       struct timer_list hangcheck_timer;
+       struct workqueue_struct *hangcheck_wq;
+       struct delayed_work hangcheck_work;
  
         /* For reset and error_state handling. */
         spinlock_t lock;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c

index 9c7c95a..361d18b 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4608,7 +4608,7 @@ i915_gem_suspend(struct drm_device *dev)
         i915_gem_stop_ringbuffers(dev);
         mutex_unlock(&dev->struct_mutex);
  
-       del_timer_sync(&dev_priv->gpu_error.hangcheck_timer);
+       cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
         cancel_delayed_work_sync(&dev_priv->mm.retire_work);
         flush_delayed_work(&dev_priv->mm.idle_work);
  
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c

index 2399eae..23bfe22 100644 (file)
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2974,7 +2974,7 @@ ring_stuck(struct intel_engine_cs *ring, u64 acthd)
         return HANGCHECK_HUNG;
  }
  
-/**
+/*
   * This is called when the chip hasn't reported back with completed
   * batchbuffers in a long time. We keep track per ring seqno progress and
   * if there are no progress, hangcheck score for that ring is increased.
@@ -2982,10 +2982,12 @@ ring_stuck(struct intel_engine_cs *ring, u64 acthd)
   * we kick the ring. If we see no progress on three subsequent calls
   * we assume chip is wedged and try to fix it by resetting the chip.
   */
-static void i915_hangcheck_elapsed(unsigned long data)
+static void i915_hangcheck_elapsed(struct work_struct *work)
  {
-       struct drm_device *dev = (struct drm_device *)data;
-       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct drm_i915_private *dev_priv =
+               container_of(work, typeof(*dev_priv),
+                            gpu_error.hangcheck_work.work);
+       struct drm_device *dev = dev_priv->dev;
         struct intel_engine_cs *ring;
         int i;
         int busy_count = 0, rings_hung = 0;
@@ -3099,17 +3101,18 @@ static void i915_hangcheck_elapsed(unsigned long data)
  
  void i915_queue_hangcheck(struct drm_device *dev)
  {
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       struct timer_list *timer = &dev_priv->gpu_error.hangcheck_timer;
+       struct i915_gpu_error *e = &to_i915(dev)->gpu_error;
  
         if (!i915.enable_hangcheck)
                 return;
  
-       /* Don't continually defer the hangcheck, but make sure it is active */
-       if (timer_pending(timer))
-               return;
-       mod_timer(timer,
-                 round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES));
+       /* Don't continually defer the hangcheck so that it is always run at
+        * least once after work has been scheduled on any ring. Otherwise,
+        * we will ignore a hung ring if a second ring is kept busy.
+        */
+
+       queue_delayed_work(e->hangcheck_wq, &e->hangcheck_work,
+                          round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES));
  }
  
  static void ibx_irq_reset(struct drm_device *dev)
@@ -4353,9 +4356,8 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
         else
                 dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
  
-       setup_timer(&dev_priv->gpu_error.hangcheck_timer,
-                   i915_hangcheck_elapsed,
-                   (unsigned long) dev);
+       INIT_DELAYED_WORK(&dev_priv->gpu_error.hangcheck_work,
+                         i915_hangcheck_elapsed);
         INIT_DELAYED_WORK(&dev_priv->hotplug_reenable_work,
                           intel_hpd_irq_reenable_work);
author	Chris Wilson <chris@chris-wilson.co.uk>
	Mon, 26 Jan 2015 16:03:03 +0000 (18:03 +0200)
committer	Daniel Vetter <daniel.vetter@ffwll.ch>
	Wed, 28 Jan 2015 16:22:12 +0000 (17:22 +0100)
drivers/gpu/drm/i915/i915_dma.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_drv.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_drv.h		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_irq.c		patch \| blob \| history