drm/sched: implement dynamic job-flow control

Currently, job flow control is implemented simply by limiting the number
of jobs in flight. Therefore, a scheduler is initialized with a credit
limit that corresponds to the number of jobs which can be sent to the
hardware.

This implies that for each job, drivers need to account for the maximum
job size possible in order to not overflow the ring buffer.

However, there are drivers, such as Nouveau, where the job size has a
rather large range. For such drivers it can easily happen that job
submissions not even filling the ring by 1% can block subsequent
submissions, which, in the worst case, can lead to the ring run dry.

In order to overcome this issue, allow for tracking the actual job size
instead of the number of jobs. Therefore, add a field to track a job's
credit count, which represents the number of credits a job contributes
to the scheduler's credit limit.

Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Reviewed-by: Luben Tuikov <ltuikov89@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231110001638.71750-1-dakr@redhat.com
This commit is contained in:
Danilo Krummrich 2023-11-10 01:16:33 +01:00
parent 36245bd02e
commit a78422e9df
14 changed files with 175 additions and 51 deletions

View File

@ -552,6 +552,12 @@ Overview
.. kernel-doc:: drivers/gpu/drm/scheduler/sched_main.c
:doc: Overview
Flow Control
------------
.. kernel-doc:: drivers/gpu/drm/scheduler/sched_main.c
:doc: Flow Control
Scheduler Function References
-----------------------------

View File

@ -115,7 +115,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
if (!entity)
return 0;
return drm_sched_job_init(&(*job)->base, entity, owner);
return drm_sched_job_init(&(*job)->base, entity, 1, owner);
}
int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev,

View File

@ -535,7 +535,7 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
ret = drm_sched_job_init(&submit->sched_job,
&ctx->sched_entity[args->pipe],
submit->ctx);
1, submit->ctx);
if (ret)
goto err_submit_put;

View File

@ -1917,7 +1917,7 @@ static int etnaviv_gpu_rpm_suspend(struct device *dev)
u32 idle, mask;
/* If there are any jobs in the HW queue, we're not idle */
if (atomic_read(&gpu->sched.hw_rq_count))
if (atomic_read(&gpu->sched.credit_count))
return -EBUSY;
/* Check whether the hardware (except FE and MC) is idle */

View File

@ -514,7 +514,7 @@ int lima_device_suspend(struct device *dev)
/* check any task running */
for (i = 0; i < lima_pipe_num; i++) {
if (atomic_read(&ldev->pipe[i].base.hw_rq_count))
if (atomic_read(&ldev->pipe[i].base.credit_count))
return -EBUSY;
}

View File

@ -123,7 +123,7 @@ int lima_sched_task_init(struct lima_sched_task *task,
for (i = 0; i < num_bos; i++)
drm_gem_object_get(&bos[i]->base.base);
err = drm_sched_job_init(&task->base, &context->base, vm);
err = drm_sched_job_init(&task->base, &context->base, 1, vm);
if (err) {
kfree(task->bos);
return err;

View File

@ -48,7 +48,7 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev,
return ERR_PTR(ret);
}
ret = drm_sched_job_init(&submit->base, queue->entity, queue);
ret = drm_sched_job_init(&submit->base, queue->entity, 1, queue);
if (ret) {
kfree(submit->hw_fence);
kfree(submit);

View File

@ -89,7 +89,7 @@ nouveau_job_init(struct nouveau_job *job,
}
ret = drm_sched_job_init(&job->base, &entity->base, NULL);
ret = drm_sched_job_init(&job->base, &entity->base, 1, NULL);
if (ret)
goto err_free_chains;

View File

@ -274,7 +274,7 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
ret = drm_sched_job_init(&job->base,
&file_priv->sched_entity[slot],
NULL);
1, NULL);
if (ret)
goto out_put_job;

View File

@ -963,7 +963,7 @@ int panfrost_job_is_idle(struct panfrost_device *pfdev)
for (i = 0; i < NUM_JOB_SLOTS; i++) {
/* If there are any jobs in the HW queue, we're not idle */
if (atomic_read(&js->queue[i].sched.hw_rq_count))
if (atomic_read(&js->queue[i].sched.credit_count))
return false;
}

View File

@ -51,7 +51,7 @@ DECLARE_EVENT_CLASS(drm_sched_job,
__assign_str(name, sched_job->sched->name);
__entry->job_count = spsc_queue_count(&entity->job_queue);
__entry->hw_job_count = atomic_read(
&sched_job->sched->hw_rq_count);
&sched_job->sched->credit_count);
),
TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d",
__entry->entity, __entry->id,

View File

@ -48,6 +48,30 @@
* through the jobs entity pointer.
*/
/**
* DOC: Flow Control
*
* The DRM GPU scheduler provides a flow control mechanism to regulate the rate
* in which the jobs fetched from scheduler entities are executed.
*
* In this context the &drm_gpu_scheduler keeps track of a driver specified
* credit limit representing the capacity of this scheduler and a credit count;
* every &drm_sched_job carries a driver specified number of credits.
*
* Once a job is executed (but not yet finished), the job's credits contribute
* to the scheduler's credit count until the job is finished. If by executing
* one more job the scheduler's credit count would exceed the scheduler's
* credit limit, the job won't be executed. Instead, the scheduler will wait
* until the credit count has decreased enough to not overflow its credit limit.
* This implies waiting for previously executed jobs.
*
* Optionally, drivers may register a callback (update_job_credits) provided by
* struct drm_sched_backend_ops to update the job's credits dynamically. The
* scheduler executes this callback every time the scheduler considers a job for
* execution and subsequently checks whether the job fits the scheduler's credit
* limit.
*/
#include <linux/wait.h>
#include <linux/sched.h>
#include <linux/completion.h>
@ -75,6 +99,51 @@ int drm_sched_policy = DRM_SCHED_POLICY_FIFO;
MODULE_PARM_DESC(sched_policy, "Specify the scheduling policy for entities on a run-queue, " __stringify(DRM_SCHED_POLICY_RR) " = Round Robin, " __stringify(DRM_SCHED_POLICY_FIFO) " = FIFO (default).");
module_param_named(sched_policy, drm_sched_policy, int, 0444);
static u32 drm_sched_available_credits(struct drm_gpu_scheduler *sched)
{
u32 credits;
drm_WARN_ON(sched, check_sub_overflow(sched->credit_limit,
atomic_read(&sched->credit_count),
&credits));
return credits;
}
/**
* drm_sched_can_queue -- Can we queue more to the hardware?
* @sched: scheduler instance
* @entity: the scheduler entity
*
* Return true if we can push at least one more job from @entity, false
* otherwise.
*/
static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched,
struct drm_sched_entity *entity)
{
struct drm_sched_job *s_job;
s_job = to_drm_sched_job(spsc_queue_peek(&entity->job_queue));
if (!s_job)
return false;
if (sched->ops->update_job_credits) {
s_job->credits = sched->ops->update_job_credits(s_job);
drm_WARN(sched, !s_job->credits,
"Jobs with zero credits bypass job-flow control.\n");
}
/* If a job exceeds the credit limit, truncate it to the credit limit
* itself to guarantee forward progress.
*/
if (drm_WARN(sched, s_job->credits > sched->credit_limit,
"Jobs may not exceed the credit limit, truncate.\n"))
s_job->credits = sched->credit_limit;
return drm_sched_available_credits(sched) >= s_job->credits;
}
static __always_inline bool drm_sched_entity_compare_before(struct rb_node *a,
const struct rb_node *b)
{
@ -186,12 +255,18 @@ void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
/**
* drm_sched_rq_select_entity_rr - Select an entity which could provide a job to run
*
* @sched: the gpu scheduler
* @rq: scheduler run queue to check.
*
* Try to find a ready entity, returns NULL if none found.
* Try to find the next ready entity.
*
* Return an entity if one is found; return an error-pointer (!NULL) if an
* entity was ready, but the scheduler had insufficient credits to accommodate
* its job; return NULL, if no ready entity was found.
*/
static struct drm_sched_entity *
drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq)
drm_sched_rq_select_entity_rr(struct drm_gpu_scheduler *sched,
struct drm_sched_rq *rq)
{
struct drm_sched_entity *entity;
@ -201,6 +276,14 @@ drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq)
if (entity) {
list_for_each_entry_continue(entity, &rq->entities, list) {
if (drm_sched_entity_is_ready(entity)) {
/* If we can't queue yet, preserve the current
* entity in terms of fairness.
*/
if (!drm_sched_can_queue(sched, entity)) {
spin_unlock(&rq->lock);
return ERR_PTR(-ENOSPC);
}
rq->current_entity = entity;
reinit_completion(&entity->entity_idle);
spin_unlock(&rq->lock);
@ -210,8 +293,15 @@ drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq)
}
list_for_each_entry(entity, &rq->entities, list) {
if (drm_sched_entity_is_ready(entity)) {
/* If we can't queue yet, preserve the current entity in
* terms of fairness.
*/
if (!drm_sched_can_queue(sched, entity)) {
spin_unlock(&rq->lock);
return ERR_PTR(-ENOSPC);
}
rq->current_entity = entity;
reinit_completion(&entity->entity_idle);
spin_unlock(&rq->lock);
@ -230,12 +320,18 @@ drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq)
/**
* drm_sched_rq_select_entity_fifo - Select an entity which provides a job to run
*
* @sched: the gpu scheduler
* @rq: scheduler run queue to check.
*
* Find oldest waiting ready entity, returns NULL if none found.
* Find oldest waiting ready entity.
*
* Return an entity if one is found; return an error-pointer (!NULL) if an
* entity was ready, but the scheduler had insufficient credits to accommodate
* its job; return NULL, if no ready entity was found.
*/
static struct drm_sched_entity *
drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
drm_sched_rq_select_entity_fifo(struct drm_gpu_scheduler *sched,
struct drm_sched_rq *rq)
{
struct rb_node *rb;
@ -245,6 +341,14 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
entity = rb_entry(rb, struct drm_sched_entity, rb_tree_node);
if (drm_sched_entity_is_ready(entity)) {
/* If we can't queue yet, preserve the current entity in
* terms of fairness.
*/
if (!drm_sched_can_queue(sched, entity)) {
spin_unlock(&rq->lock);
return ERR_PTR(-ENOSPC);
}
rq->current_entity = entity;
reinit_completion(&entity->entity_idle);
break;
@ -302,7 +406,7 @@ static void drm_sched_job_done(struct drm_sched_job *s_job, int result)
struct drm_sched_fence *s_fence = s_job->s_fence;
struct drm_gpu_scheduler *sched = s_fence->sched;
atomic_dec(&sched->hw_rq_count);
atomic_sub(s_job->credits, &sched->credit_count);
atomic_dec(sched->score);
trace_drm_sched_process_job(s_fence);
@ -525,7 +629,7 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
&s_job->cb)) {
dma_fence_put(s_job->s_fence->parent);
s_job->s_fence->parent = NULL;
atomic_dec(&sched->hw_rq_count);
atomic_sub(s_job->credits, &sched->credit_count);
} else {
/*
* remove job from pending_list.
@ -586,7 +690,7 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)
list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) {
struct dma_fence *fence = s_job->s_fence->parent;
atomic_inc(&sched->hw_rq_count);
atomic_add(s_job->credits, &sched->credit_count);
if (!full_recovery)
continue;
@ -667,6 +771,8 @@ EXPORT_SYMBOL(drm_sched_resubmit_jobs);
* drm_sched_job_init - init a scheduler job
* @job: scheduler job to init
* @entity: scheduler entity to use
* @credits: the number of credits this job contributes to the schedulers
* credit limit
* @owner: job owner for debugging
*
* Refer to drm_sched_entity_push_job() documentation
@ -684,7 +790,7 @@ EXPORT_SYMBOL(drm_sched_resubmit_jobs);
*/
int drm_sched_job_init(struct drm_sched_job *job,
struct drm_sched_entity *entity,
void *owner)
u32 credits, void *owner)
{
if (!entity->rq) {
/* This will most likely be followed by missing frames
@ -695,7 +801,13 @@ int drm_sched_job_init(struct drm_sched_job *job,
return -ENOENT;
}
if (unlikely(!credits)) {
pr_err("*ERROR* %s: credits cannot be 0!\n", __func__);
return -EINVAL;
}
job->entity = entity;
job->credits = credits;
job->s_fence = drm_sched_fence_alloc(entity, owner);
if (!job->s_fence)
return -ENOMEM;
@ -907,21 +1019,10 @@ void drm_sched_job_cleanup(struct drm_sched_job *job)
}
EXPORT_SYMBOL(drm_sched_job_cleanup);
/**
* drm_sched_can_queue -- Can we queue more to the hardware?
* @sched: scheduler instance
*
* Return true if we can push more jobs to the hw, otherwise false.
*/
static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched)
{
return atomic_read(&sched->hw_rq_count) <
sched->hw_submission_limit;
}
/**
* drm_sched_wakeup - Wake up the scheduler if it is ready to queue
* @sched: scheduler instance
* @entity: the scheduler entity
*
* Wake up the scheduler if we can queue jobs.
*/
@ -929,7 +1030,7 @@ void drm_sched_wakeup(struct drm_gpu_scheduler *sched,
struct drm_sched_entity *entity)
{
if (drm_sched_entity_is_ready(entity))
if (drm_sched_can_queue(sched))
if (drm_sched_can_queue(sched, entity))
drm_sched_run_job_queue(sched);
}
@ -938,7 +1039,11 @@ void drm_sched_wakeup(struct drm_gpu_scheduler *sched,
*
* @sched: scheduler instance
*
* Returns the entity to process or NULL if none are found.
* Return an entity to process or NULL if none are found.
*
* Note, that we break out of the for-loop when "entity" is non-null, which can
* also be an error-pointer--this assures we don't process lower priority
* run-queues. See comments in the respectively called functions.
*/
static struct drm_sched_entity *
drm_sched_select_entity(struct drm_gpu_scheduler *sched)
@ -946,19 +1051,16 @@ drm_sched_select_entity(struct drm_gpu_scheduler *sched)
struct drm_sched_entity *entity;
int i;
if (!drm_sched_can_queue(sched))
return NULL;
/* Kernel run queue has higher priority than normal run queue*/
for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
entity = drm_sched_policy == DRM_SCHED_POLICY_FIFO ?
drm_sched_rq_select_entity_fifo(sched->sched_rq[i]) :
drm_sched_rq_select_entity_rr(sched->sched_rq[i]);
drm_sched_rq_select_entity_fifo(sched, sched->sched_rq[i]) :
drm_sched_rq_select_entity_rr(sched, sched->sched_rq[i]);
if (entity)
break;
}
return entity;
return IS_ERR(entity) ? NULL : entity;
}
/**
@ -1094,7 +1196,7 @@ static void drm_sched_run_job_work(struct work_struct *w)
s_fence = sched_job->s_fence;
atomic_inc(&sched->hw_rq_count);
atomic_add(sched_job->credits, &sched->credit_count);
drm_sched_job_begin(sched_job);
trace_drm_run_job(sched_job, entity);
@ -1129,7 +1231,7 @@ static void drm_sched_run_job_work(struct work_struct *w)
* @submit_wq: workqueue to use for submission. If NULL, an ordered wq is
* allocated and used
* @num_rqs: number of runqueues, one for each priority, up to DRM_SCHED_PRIORITY_COUNT
* @hw_submission: number of hw submissions that can be in flight
* @credit_limit: the number of credits this scheduler can hold from all jobs
* @hang_limit: number of times to allow a job to hang before dropping it
* @timeout: timeout value in jiffies for the scheduler
* @timeout_wq: workqueue to use for timeout work. If NULL, the system_wq is
@ -1143,14 +1245,14 @@ static void drm_sched_run_job_work(struct work_struct *w)
int drm_sched_init(struct drm_gpu_scheduler *sched,
const struct drm_sched_backend_ops *ops,
struct workqueue_struct *submit_wq,
u32 num_rqs, uint32_t hw_submission, unsigned int hang_limit,
u32 num_rqs, u32 credit_limit, unsigned int hang_limit,
long timeout, struct workqueue_struct *timeout_wq,
atomic_t *score, const char *name, struct device *dev)
{
int i, ret;
sched->ops = ops;
sched->hw_submission_limit = hw_submission;
sched->credit_limit = credit_limit;
sched->name = name;
sched->timeout = timeout;
sched->timeout_wq = timeout_wq ? : system_wq;
@ -1199,7 +1301,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
init_waitqueue_head(&sched->job_scheduled);
INIT_LIST_HEAD(&sched->pending_list);
spin_lock_init(&sched->job_list_lock);
atomic_set(&sched->hw_rq_count, 0);
atomic_set(&sched->credit_count, 0);
INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
INIT_WORK(&sched->work_run_job, drm_sched_run_job_work);
INIT_WORK(&sched->work_free_job, drm_sched_free_job_work);

View File

@ -418,7 +418,7 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
job->file = file_priv;
ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue],
v3d_priv);
1, v3d_priv);
if (ret)
goto fail;

View File

@ -321,6 +321,7 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f);
* @sched: the scheduler instance on which this job is scheduled.
* @s_fence: contains the fences for the scheduling of job.
* @finish_cb: the callback for the finished fence.
* @credits: the number of credits this job contributes to the scheduler
* @work: Helper to reschdeule job kill to different context.
* @id: a unique id assigned to each job scheduled on the scheduler.
* @karma: increment on every hang caused by this job. If this exceeds the hang
@ -340,6 +341,8 @@ struct drm_sched_job {
struct drm_gpu_scheduler *sched;
struct drm_sched_fence *s_fence;
u32 credits;
/*
* work is used only after finish_cb has been used and will not be
* accessed anymore.
@ -463,13 +466,27 @@ struct drm_sched_backend_ops {
* and it's time to clean it up.
*/
void (*free_job)(struct drm_sched_job *sched_job);
/**
* @update_job_credits: Called when the scheduler is considering this
* job for execution.
*
* This callback returns the number of credits the job would take if
* pushed to the hardware. Drivers may use this to dynamically update
* the job's credit count. For instance, deduct the number of credits
* for already signalled native fences.
*
* This callback is optional.
*/
u32 (*update_job_credits)(struct drm_sched_job *sched_job);
};
/**
* struct drm_gpu_scheduler - scheduler instance-specific data
*
* @ops: backend operations provided by the driver.
* @hw_submission_limit: the max size of the hardware queue.
* @credit_limit: the credit limit of this scheduler
* @credit_count: the current credit count of this scheduler
* @timeout: the time after which a job is removed from the scheduler.
* @name: name of the ring for which this scheduler is being used.
* @num_rqs: Number of run-queues. This is at most DRM_SCHED_PRIORITY_COUNT,
@ -478,7 +495,6 @@ struct drm_sched_backend_ops {
* @job_scheduled: once @drm_sched_entity_do_release is called the scheduler
* waits on this wait queue until all the scheduled jobs are
* finished.
* @hw_rq_count: the number of jobs currently in the hardware queue.
* @job_id_count: used to assign unique id to the each job.
* @submit_wq: workqueue used to queue @work_run_job and @work_free_job
* @timeout_wq: workqueue used to queue @work_tdr
@ -502,13 +518,13 @@ struct drm_sched_backend_ops {
*/
struct drm_gpu_scheduler {
const struct drm_sched_backend_ops *ops;
uint32_t hw_submission_limit;
u32 credit_limit;
atomic_t credit_count;
long timeout;
const char *name;
u32 num_rqs;
struct drm_sched_rq **sched_rq;
wait_queue_head_t job_scheduled;
atomic_t hw_rq_count;
atomic64_t job_id_count;
struct workqueue_struct *submit_wq;
struct workqueue_struct *timeout_wq;
@ -530,14 +546,14 @@ struct drm_gpu_scheduler {
int drm_sched_init(struct drm_gpu_scheduler *sched,
const struct drm_sched_backend_ops *ops,
struct workqueue_struct *submit_wq,
u32 num_rqs, uint32_t hw_submission, unsigned int hang_limit,
u32 num_rqs, u32 credit_limit, unsigned int hang_limit,
long timeout, struct workqueue_struct *timeout_wq,
atomic_t *score, const char *name, struct device *dev);
void drm_sched_fini(struct drm_gpu_scheduler *sched);
int drm_sched_job_init(struct drm_sched_job *job,
struct drm_sched_entity *entity,
void *owner);
u32 credits, void *owner);
void drm_sched_job_arm(struct drm_sched_job *job);
int drm_sched_job_add_dependency(struct drm_sched_job *job,
struct dma_fence *fence);