From b0a5303d4e140ed8e534e44b278ca5d07401851d Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 5 Aug 2021 12:46:48 +0200 Subject: [PATCH] drm/sched: Barriers are needed for entity->last_scheduled MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit It might be good enough on x86 with just READ_ONCE, but the write side should then at least be WRITE_ONCE because x86 has total store order. It's definitely not enough on arm. Fix this proplery, which means - explain the need for the barrier in both places - point at the other side in each comment Also pull out the !sched_list case as the first check, so that the code flow is clearer. While at it sprinkle some comments around because it was very non-obvious to me what's actually going on here and why. Note that we really need full barriers here, at first I thought store-release and load-acquire on ->last_scheduled would be enough, but we actually requiring ordering between that and the queue state. v2: Put smp_rmp() in the right place and fix up comment (Andrey) Reviewed-by: Christian König Acked-by: Melissa Wen Signed-off-by: Daniel Vetter Cc: "Christian König" Cc: Steven Price Cc: Daniel Vetter Cc: Andrey Grodzovsky Cc: Lee Jones Cc: Boris Brezillon Link: https://patchwork.freedesktop.org/patch/msgid/20210805104705.862416-4-daniel.vetter@ffwll.ch --- drivers/gpu/drm/scheduler/sched_entity.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index f7347c284886..89e3f6eaf519 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -439,8 +439,16 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity) dma_fence_set_error(&sched_job->s_fence->finished, -ECANCELED); dma_fence_put(entity->last_scheduled); + entity->last_scheduled = dma_fence_get(&sched_job->s_fence->finished); + /* + * If the queue is empty we allow drm_sched_entity_select_rq() to + * locklessly access ->last_scheduled. This only works if we set the + * pointer before we dequeue and if we a write barrier here. + */ + smp_wmb(); + spsc_queue_pop(&entity->job_queue); return sched_job; } @@ -459,10 +467,25 @@ void drm_sched_entity_select_rq(struct drm_sched_entity *entity) struct drm_gpu_scheduler *sched; struct drm_sched_rq *rq; - if (spsc_queue_count(&entity->job_queue) || !entity->sched_list) + /* single possible engine and already selected */ + if (!entity->sched_list) + return; + + /* queue non-empty, stay on the same engine */ + if (spsc_queue_count(&entity->job_queue)) return; - fence = READ_ONCE(entity->last_scheduled); + /* + * Only when the queue is empty are we guaranteed that the scheduler + * thread cannot change ->last_scheduled. To enforce ordering we need + * a read barrier here. See drm_sched_entity_pop_job() for the other + * side. + */ + smp_rmb(); + + fence = entity->last_scheduled; + + /* stay on the same engine if the previous job hasn't finished */ if (fence && !dma_fence_is_signaled(fence)) return; -- 2.11.0