From f7fe64ad0f22ff034f8ebcfbd7299ee9cc9b57d7 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 30 Oct 2023 20:24:37 -0700 Subject: [PATCH] drm/sched: Split free_job into own work item Rather than call free_job and run_job in same work item have a dedicated work item for each. This aligns with the design and intended use of work queues. v2: - Test for DMA_FENCE_FLAG_TIMESTAMP_BIT before setting timestamp in free_job() work item (Danilo) v3: - Drop forward dec of drm_sched_select_entity (Boris) - Return in drm_sched_run_job_work if entity NULL (Boris) v4: - Replace dequeue with peek and invert logic (Luben) - Wrap to 100 lines (Luben) - Update comments for *_queue / *_queue_if_ready functions (Luben) v5: - Drop peek argument, blindly reinit idle (Luben) - s/drm_sched_free_job_queue_if_ready/drm_sched_free_job_queue_if_done (Luben) - Update work_run_job & work_free_job kernel doc (Luben) v6: - Do not move drm_sched_select_entity in file (Luben) Signed-off-by: Matthew Brost Link: https://lore.kernel.org/r/20231031032439.1558703-4-matthew.brost@intel.com Reviewed-by: Luben Tuikov Signed-off-by: Luben Tuikov --- drivers/gpu/drm/scheduler/sched_main.c | 156 ++++++++++++++++--------- include/drm/gpu_scheduler.h | 4 +- 2 files changed, 106 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index d1ae05bded15..3b1b2f8eafe8 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -265,6 +265,32 @@ static void drm_sched_run_job_queue(struct drm_gpu_scheduler *sched) queue_work(sched->submit_wq, &sched->work_run_job); } +/** + * drm_sched_free_job_queue - enqueue free-job work + * @sched: scheduler instance + */ +static void drm_sched_free_job_queue(struct drm_gpu_scheduler *sched) +{ + if (!READ_ONCE(sched->pause_submit)) + queue_work(sched->submit_wq, &sched->work_free_job); +} + +/** + * drm_sched_free_job_queue_if_done - enqueue free-job work if ready + * @sched: scheduler instance + */ +static void drm_sched_free_job_queue_if_done(struct drm_gpu_scheduler *sched) +{ + struct drm_sched_job *job; + + spin_lock(&sched->job_list_lock); + job = list_first_entry_or_null(&sched->pending_list, + struct drm_sched_job, list); + if (job && dma_fence_is_signaled(&job->s_fence->finished)) + drm_sched_free_job_queue(sched); + spin_unlock(&sched->job_list_lock); +} + /** * drm_sched_job_done - complete a job * @s_job: pointer to the job which is done @@ -284,7 +310,7 @@ static void drm_sched_job_done(struct drm_sched_job *s_job, int result) dma_fence_get(&s_fence->finished); drm_sched_fence_finished(s_fence, result); dma_fence_put(&s_fence->finished); - drm_sched_run_job_queue(sched); + drm_sched_free_job_queue(sched); } /** @@ -943,8 +969,10 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched) typeof(*next), list); if (next) { - next->s_fence->scheduled.timestamp = - dma_fence_timestamp(&job->s_fence->finished); + if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, + &next->s_fence->scheduled.flags)) + next->s_fence->scheduled.timestamp = + dma_fence_timestamp(&job->s_fence->finished); /* start TO timer for next job */ drm_sched_start_timeout(sched); } @@ -994,7 +1022,40 @@ drm_sched_pick_best(struct drm_gpu_scheduler **sched_list, EXPORT_SYMBOL(drm_sched_pick_best); /** - * drm_sched_run_job_work - main scheduler thread + * drm_sched_run_job_queue_if_ready - enqueue run-job work if ready + * @sched: scheduler instance + */ +static void drm_sched_run_job_queue_if_ready(struct drm_gpu_scheduler *sched) +{ + if (drm_sched_select_entity(sched)) + drm_sched_run_job_queue(sched); +} + +/** + * drm_sched_free_job_work - worker to call free_job + * + * @w: free job work + */ +static void drm_sched_free_job_work(struct work_struct *w) +{ + struct drm_gpu_scheduler *sched = + container_of(w, struct drm_gpu_scheduler, work_free_job); + struct drm_sched_job *cleanup_job; + + if (READ_ONCE(sched->pause_submit)) + return; + + cleanup_job = drm_sched_get_cleanup_job(sched); + if (cleanup_job) { + sched->ops->free_job(cleanup_job); + + drm_sched_free_job_queue_if_done(sched); + drm_sched_run_job_queue_if_ready(sched); + } +} + +/** + * drm_sched_run_job_work - worker to call run_job * * @w: run job work */ @@ -1003,65 +1064,51 @@ static void drm_sched_run_job_work(struct work_struct *w) struct drm_gpu_scheduler *sched = container_of(w, struct drm_gpu_scheduler, work_run_job); struct drm_sched_entity *entity; - struct drm_sched_job *cleanup_job; + struct dma_fence *fence; + struct drm_sched_fence *s_fence; + struct drm_sched_job *sched_job; int r; if (READ_ONCE(sched->pause_submit)) return; - cleanup_job = drm_sched_get_cleanup_job(sched); entity = drm_sched_select_entity(sched); + if (!entity) + return; - if (!entity && !cleanup_job) - return; /* No more work */ - - if (cleanup_job) - sched->ops->free_job(cleanup_job); - - if (entity) { - struct dma_fence *fence; - struct drm_sched_fence *s_fence; - struct drm_sched_job *sched_job; - - sched_job = drm_sched_entity_pop_job(entity); - if (!sched_job) { - complete_all(&entity->entity_idle); - if (!cleanup_job) - return; /* No more work */ - goto again; - } - - s_fence = sched_job->s_fence; - - atomic_inc(&sched->hw_rq_count); - drm_sched_job_begin(sched_job); - - trace_drm_run_job(sched_job, entity); - fence = sched->ops->run_job(sched_job); + sched_job = drm_sched_entity_pop_job(entity); + if (!sched_job) { complete_all(&entity->entity_idle); - drm_sched_fence_scheduled(s_fence, fence); - - if (!IS_ERR_OR_NULL(fence)) { - /* Drop for original kref_init of the fence */ - dma_fence_put(fence); - - r = dma_fence_add_callback(fence, &sched_job->cb, - drm_sched_job_done_cb); - if (r == -ENOENT) - drm_sched_job_done(sched_job, fence->error); - else if (r) - DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n", - r); - } else { - drm_sched_job_done(sched_job, IS_ERR(fence) ? - PTR_ERR(fence) : 0); - } - - wake_up(&sched->job_scheduled); + return; /* No more work */ } -again: - drm_sched_run_job_queue(sched); + s_fence = sched_job->s_fence; + + atomic_inc(&sched->hw_rq_count); + drm_sched_job_begin(sched_job); + + trace_drm_run_job(sched_job, entity); + fence = sched->ops->run_job(sched_job); + complete_all(&entity->entity_idle); + drm_sched_fence_scheduled(s_fence, fence); + + if (!IS_ERR_OR_NULL(fence)) { + /* Drop for original kref_init of the fence */ + dma_fence_put(fence); + + r = dma_fence_add_callback(fence, &sched_job->cb, + drm_sched_job_done_cb); + if (r == -ENOENT) + drm_sched_job_done(sched_job, fence->error); + else if (r) + DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n", r); + } else { + drm_sched_job_done(sched_job, IS_ERR(fence) ? + PTR_ERR(fence) : 0); + } + + wake_up(&sched->job_scheduled); + drm_sched_run_job_queue_if_ready(sched); } /** @@ -1145,6 +1192,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, atomic_set(&sched->hw_rq_count, 0); INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout); INIT_WORK(&sched->work_run_job, drm_sched_run_job_work); + INIT_WORK(&sched->work_free_job, drm_sched_free_job_work); atomic_set(&sched->_score, 0); atomic64_set(&sched->job_id_count, 0); sched->pause_submit = false; @@ -1274,6 +1322,7 @@ void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched) { WRITE_ONCE(sched->pause_submit, true); cancel_work_sync(&sched->work_run_job); + cancel_work_sync(&sched->work_free_job); } EXPORT_SYMBOL(drm_sched_wqueue_stop); @@ -1286,5 +1335,6 @@ void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched) { WRITE_ONCE(sched->pause_submit, false); queue_work(sched->submit_wq, &sched->work_run_job); + queue_work(sched->submit_wq, &sched->work_free_job); } EXPORT_SYMBOL(drm_sched_wqueue_start); diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 4bf94aca2631..6655aeeef9bb 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -480,9 +480,10 @@ struct drm_sched_backend_ops { * finished. * @hw_rq_count: the number of jobs currently in the hardware queue. * @job_id_count: used to assign unique id to the each job. - * @submit_wq: workqueue used to queue @work_run_job + * @submit_wq: workqueue used to queue @work_run_job and @work_free_job * @timeout_wq: workqueue used to queue @work_tdr * @work_run_job: work which calls run_job op of each scheduler. + * @work_free_job: work which calls free_job op of each scheduler. * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the * timeout interval is over. * @pending_list: the list of jobs which are currently in the job queue. @@ -512,6 +513,7 @@ struct drm_gpu_scheduler { struct workqueue_struct *submit_wq; struct workqueue_struct *timeout_wq; struct work_struct work_run_job; + struct work_struct work_free_job; struct delayed_work work_tdr; struct list_head pending_list; spinlock_t job_list_lock;