OSDN Git Service

drm/i915: Apply i915_request_skip() on submission
authorChris Wilson <chris@chris-wilson.co.uk>
Wed, 4 Mar 2020 12:18:48 +0000 (12:18 +0000)
committerChris Wilson <chris@chris-wilson.co.uk>
Wed, 4 Mar 2020 14:29:50 +0000 (14:29 +0000)
Trying to use i915_request_skip() prior to i915_request_add() causes us
to try and fill the ring upto request->postfix, which has not yet been
set, and so may cause us to memset() past the end of the ring.

Instead of skipping the request immediately, just flag the error on the
request (only accepting the first fatal error we see) and then clear the
request upon submission.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200304121849.2448028-1-chris@chris-wilson.co.uk
14 files changed:
drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
drivers/gpu/drm/i915/gt/intel_lrc.c
drivers/gpu/drm/i915/gt/intel_reset.c
drivers/gpu/drm/i915/gt/intel_ring_submission.c
drivers/gpu/drm/i915/gt/mock_engine.c
drivers/gpu/drm/i915/gt/selftest_hangcheck.c
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
drivers/gpu/drm/i915/i915_request.c
drivers/gpu/drm/i915/i915_request.h
drivers/gpu/drm/i915/selftests/igt_spinner.c

index 81366aa..0598e53 100644 (file)
@@ -217,7 +217,7 @@ static void clear_pages_worker(struct work_struct *work)
                                           0);
 out_request:
        if (unlikely(err)) {
-               i915_request_skip(rq, err);
+               i915_request_set_error_once(rq, err);
                err = 0;
        }
 
index a1636c1..7bb27f3 100644 (file)
@@ -1169,7 +1169,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
        goto out_pool;
 
 skip_request:
-       i915_request_skip(rq, err);
+       i915_request_set_error_once(rq, err);
 err_request:
        i915_request_add(rq);
 err_unpin:
@@ -1850,7 +1850,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
        return 0;
 
 err_skip:
-       i915_request_skip(eb->request, err);
+       i915_request_set_error_once(eb->request, err);
        return err;
 }
 
@@ -2579,7 +2579,8 @@ static void eb_request_add(struct i915_execbuffer *eb)
                        attr.priority |= I915_PRIORITY_WAIT;
        } else {
                /* Serialise with context_close via the add_to_timeline */
-               i915_request_skip(rq, -ENOENT);
+               i915_request_set_error_once(rq, -ENOENT);
+               __i915_request_skip(rq);
        }
 
        local_bh_disable();
index 70809d8..39b8a05 100644 (file)
@@ -186,7 +186,7 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
                                        0);
 out_request:
        if (unlikely(err))
-               i915_request_skip(rq, err);
+               i915_request_set_error_once(rq, err);
 
        i915_request_add(rq);
 out_batch:
@@ -385,7 +385,7 @@ out_unlock:
        drm_gem_unlock_reservations(objs, ARRAY_SIZE(objs), &acquire);
 out_request:
        if (unlikely(err))
-               i915_request_skip(rq, err);
+               i915_request_set_error_once(rq, err);
 
        i915_request_add(rq);
 out_batch:
index 375d864..77c7e65 100644 (file)
@@ -1004,7 +1004,7 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
        return 0;
 
 skip_request:
-       i915_request_skip(rq, err);
+       i915_request_set_error_once(rq, err);
 err_request:
        i915_request_add(rq);
 err_batch:
@@ -1559,7 +1559,7 @@ static int write_to_scratch(struct i915_gem_context *ctx,
 
        goto out_vm;
 skip_request:
-       i915_request_skip(rq, err);
+       i915_request_set_error_once(rq, err);
 err_request:
        i915_request_add(rq);
 err_unpin:
@@ -1708,7 +1708,7 @@ static int read_from_scratch(struct i915_gem_context *ctx,
 
        goto out_vm;
 skip_request:
-       i915_request_skip(rq, err);
+       i915_request_set_error_once(rq, err);
 err_request:
        i915_request_add(rq);
 err_unpin:
index 6718da2..772d8cb 100644 (file)
@@ -159,7 +159,7 @@ int igt_gpu_fill_dw(struct intel_context *ce,
        return 0;
 
 skip_request:
-       i915_request_skip(rq, err);
+       i915_request_set_error_once(rq, err);
 err_request:
        i915_request_add(rq);
 err_batch:
index b9b3f78..d123dd7 100644 (file)
@@ -245,7 +245,7 @@ static void mark_eio(struct i915_request *rq)
 
        GEM_BUG_ON(i915_request_signaled(rq));
 
-       dma_fence_set_error(&rq->fence, -EIO);
+       i915_request_set_error_once(rq, -EIO);
        i915_request_mark_complete(rq);
 }
 
@@ -4903,7 +4903,7 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
        mask = rq->execution_mask;
        if (unlikely(!mask)) {
                /* Invalid selection, submit to a random engine in error */
-               i915_request_skip(rq, -ENODEV);
+               i915_request_set_error_once(rq, -ENODEV);
                mask = ve->siblings[0]->mask;
        }
 
index aef6ab5..10ad816 100644 (file)
@@ -48,8 +48,10 @@ static void engine_skip_context(struct i915_request *rq)
 
        lockdep_assert_held(&engine->active.lock);
        list_for_each_entry_continue(rq, &engine->active.requests, sched.link)
-               if (rq->context == hung_ctx)
-                       i915_request_skip(rq, -EIO);
+               if (rq->context == hung_ctx) {
+                       i915_request_set_error_once(rq, -EIO);
+                       __i915_request_skip(rq);
+               }
 }
 
 static void client_mark_guilty(struct i915_gem_context *ctx, bool banned)
@@ -154,11 +156,12 @@ void __i915_request_reset(struct i915_request *rq, bool guilty)
 
        rcu_read_lock(); /* protect the GEM context */
        if (guilty) {
-               i915_request_skip(rq, -EIO);
+               i915_request_set_error_once(rq, -EIO);
+               __i915_request_skip(rq);
                if (mark_guilty(rq))
                        engine_skip_context(rq);
        } else {
-               dma_fence_set_error(&rq->fence, -EAGAIN);
+               i915_request_set_error_once(rq, -EAGAIN);
                mark_innocent(rq);
        }
        rcu_read_unlock();
@@ -785,7 +788,7 @@ static void nop_submit_request(struct i915_request *request)
        unsigned long flags;
 
        RQ_TRACE(request, "-EIO\n");
-       dma_fence_set_error(&request->fence, -EIO);
+       i915_request_set_error_once(request, -EIO);
 
        spin_lock_irqsave(&engine->active.lock, flags);
        __i915_request_submit(request);
index fee7436..ee241b7 100644 (file)
@@ -895,9 +895,7 @@ static void reset_cancel(struct intel_engine_cs *engine)
 
        /* Mark all submitted requests as skipped. */
        list_for_each_entry(request, &engine->active.requests, sched.link) {
-               if (!i915_request_signaled(request))
-                       dma_fence_set_error(&request->fence, -EIO);
-
+               i915_request_set_error_once(request, -EIO);
                i915_request_mark_complete(request);
        }
 
index 5633515..4a53ded 100644 (file)
@@ -244,9 +244,7 @@ static void mock_reset_cancel(struct intel_engine_cs *engine)
 
        /* Mark all submitted requests as skipped. */
        list_for_each_entry(request, &engine->active.requests, sched.link) {
-               if (!i915_request_signaled(request))
-                       dma_fence_set_error(&request->fence, -EIO);
-
+               i915_request_set_error_once(request, -EIO);
                i915_request_mark_complete(request);
        }
 
index c3514ec..2b2efff 100644 (file)
@@ -268,7 +268,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
 
 cancel_rq:
        if (err) {
-               i915_request_skip(rq, err);
+               i915_request_set_error_once(rq, err);
                i915_request_add(rq);
        }
 unpin_hws:
index 1beaa77..fe7778c 100644 (file)
@@ -456,9 +456,7 @@ static void guc_reset_cancel(struct intel_engine_cs *engine)
 
        /* Mark all executing requests as skipped. */
        list_for_each_entry(rq, &engine->active.requests, sched.link) {
-               if (!i915_request_signaled(rq))
-                       dma_fence_set_error(&rq->fence, -EIO);
-
+               i915_request_set_error_once(rq, -EIO);
                i915_request_mark_complete(rq);
        }
 
index d837c13..4bfe68e 100644 (file)
@@ -363,6 +363,50 @@ __await_execution(struct i915_request *rq,
        return 0;
 }
 
+static bool fatal_error(int error)
+{
+       switch (error) {
+       case 0: /* not an error! */
+       case -EAGAIN: /* innocent victim of a GT reset (__i915_request_reset) */
+       case -ETIMEDOUT: /* waiting for Godot (timer_i915_sw_fence_wake) */
+               return false;
+       default:
+               return true;
+       }
+}
+
+void __i915_request_skip(struct i915_request *rq)
+{
+       GEM_BUG_ON(!fatal_error(rq->fence.error));
+
+       if (rq->infix == rq->postfix)
+               return;
+
+       /*
+        * As this request likely depends on state from the lost
+        * context, clear out all the user operations leaving the
+        * breadcrumb at the end (so we get the fence notifications).
+        */
+       __i915_request_fill(rq, 0);
+       rq->infix = rq->postfix;
+}
+
+void i915_request_set_error_once(struct i915_request *rq, int error)
+{
+       int old;
+
+       GEM_BUG_ON(!IS_ERR_VALUE((long)error));
+
+       if (i915_request_signaled(rq))
+               return;
+
+       old = READ_ONCE(rq->fence.error);
+       do {
+               if (fatal_error(old))
+                       return;
+       } while (!try_cmpxchg(&rq->fence.error, &old, error));
+}
+
 bool __i915_request_submit(struct i915_request *request)
 {
        struct intel_engine_cs *engine = request->engine;
@@ -392,8 +436,10 @@ bool __i915_request_submit(struct i915_request *request)
        if (i915_request_completed(request))
                goto xfer;
 
-       if (intel_context_is_banned(request->context))
-               i915_request_skip(request, -EIO);
+       if (unlikely(intel_context_is_banned(request->context)))
+               i915_request_set_error_once(request, -EIO);
+       if (unlikely(fatal_error(request->fence.error)))
+               __i915_request_skip(request);
 
        /*
         * Are we using semaphores when the gpu is already saturated?
@@ -519,7 +565,7 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
                trace_i915_request_submit(request);
 
                if (unlikely(fence->error))
-                       i915_request_skip(request, fence->error);
+                       i915_request_set_error_once(request, fence->error);
 
                /*
                 * We need to serialize use of the submit_request() callback
@@ -1209,23 +1255,6 @@ i915_request_await_object(struct i915_request *to,
        return ret;
 }
 
-void i915_request_skip(struct i915_request *rq, int error)
-{
-       GEM_BUG_ON(!IS_ERR_VALUE((long)error));
-       dma_fence_set_error(&rq->fence, error);
-
-       if (rq->infix == rq->postfix)
-               return;
-
-       /*
-        * As this request likely depends on state from the lost
-        * context, clear out all the user operations leaving the
-        * breadcrumb at the end (so we get the fence notifications).
-        */
-       __i915_request_fill(rq, 0);
-       rq->infix = rq->postfix;
-}
-
 static struct i915_request *
 __i915_request_add_to_timeline(struct i915_request *rq)
 {
index da8420f..d4bae16 100644 (file)
@@ -303,6 +303,9 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp);
 struct i915_request * __must_check
 i915_request_create(struct intel_context *ce);
 
+void i915_request_set_error_once(struct i915_request *rq, int error);
+void __i915_request_skip(struct i915_request *rq);
+
 struct i915_request *__i915_request_commit(struct i915_request *request);
 void __i915_request_queue(struct i915_request *rq,
                          const struct i915_sched_attr *attr);
@@ -352,8 +355,6 @@ void i915_request_add(struct i915_request *rq);
 bool __i915_request_submit(struct i915_request *request);
 void i915_request_submit(struct i915_request *request);
 
-void i915_request_skip(struct i915_request *request, int error);
-
 void __i915_request_unsubmit(struct i915_request *request);
 void i915_request_unsubmit(struct i915_request *request);
 
index e8a58fe..9ad4ab0 100644 (file)
@@ -183,7 +183,7 @@ igt_spinner_create_request(struct igt_spinner *spin,
 
 cancel_rq:
        if (err) {
-               i915_request_skip(rq, err);
+               i915_request_set_error_once(rq, err);
                i915_request_add(rq);
        }
 unpin_hws: