diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 03e6bd8a1a42..f8f0e1c19002 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -293,8 +293,10 @@ static void drm_sched_job_timedout(struct work_struct *work) * Guilty job did complete and hence needs to be manually removed * See drm_sched_stop doc. */ - if (list_empty(&job->node)) + if (sched->free_guilty) { job->sched->ops->free_job(job); + sched->free_guilty = false; + } spin_lock_irqsave(&sched->job_list_lock, flags); drm_sched_start_timeout(sched); @@ -395,10 +397,13 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad) /* * We must keep bad job alive for later use during - * recovery by some of the drivers + * recovery by some of the drivers but leave a hint + * that the guilty job must be released. */ if (bad != s_job) sched->ops->free_job(s_job); + else + sched->free_guilty = true; } } diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 9ee0f2735d71..57b4121c750a 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -259,6 +259,7 @@ struct drm_sched_backend_ops { * guilty and it will be considered for scheduling further. * @num_jobs: the number of jobs in queue in the scheduler * @ready: marks if the underlying HW is ready to work + * @free_guilty: A hit to time out handler to free the guilty job. * * One scheduler is implemented for each hardware ring. */ @@ -279,6 +280,7 @@ struct drm_gpu_scheduler { int hang_limit; atomic_t num_jobs; bool ready; + bool free_guilty; }; int drm_sched_init(struct drm_gpu_scheduler *sched,