mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-10-11 11:09:13 +00:00
drm/amdkfd: svm range restore work deadlock when process exit
[ Upstream commit 6225bb3a88
]
kfd_process_notifier_release flush svm_range_restore_work
which calls svm_range_list_lock_and_flush_work to flush deferred_list
work, but if deferred_list work mmput release the last user, it will
call exit_mmap -> notifier_release, it is deadlock with below backtrace.
Move flush svm_range_restore_work to kfd_process_wq_release to avoid
deadlock. Then svm_range_restore_work take task->mm ref to avoid mm is
gone while validating and mapping ranges to GPU.
Workqueue: events svm_range_deferred_list_work [amdgpu]
Call Trace:
wait_for_completion+0x94/0x100
__flush_work+0x12a/0x1e0
__cancel_work_timer+0x10e/0x190
cancel_delayed_work_sync+0x13/0x20
kfd_process_notifier_release+0x98/0x2a0 [amdgpu]
__mmu_notifier_release+0x74/0x1f0
exit_mmap+0x170/0x200
mmput+0x5d/0x130
svm_range_deferred_list_work+0x104/0x230 [amdgpu]
process_one_work+0x220/0x3c0
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reported-by: Ruili Ji <ruili.ji@amd.com>
Tested-by: Ruili Ji <ruili.ji@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
parent
699d03880a
commit
858822905f
2 changed files with 9 additions and 7 deletions
|
@ -1150,7 +1150,6 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
|
||||||
|
|
||||||
cancel_delayed_work_sync(&p->eviction_work);
|
cancel_delayed_work_sync(&p->eviction_work);
|
||||||
cancel_delayed_work_sync(&p->restore_work);
|
cancel_delayed_work_sync(&p->restore_work);
|
||||||
cancel_delayed_work_sync(&p->svms.restore_work);
|
|
||||||
|
|
||||||
mutex_lock(&p->mutex);
|
mutex_lock(&p->mutex);
|
||||||
|
|
||||||
|
|
|
@ -1643,13 +1643,14 @@ static void svm_range_restore_work(struct work_struct *work)
|
||||||
|
|
||||||
pr_debug("restore svm ranges\n");
|
pr_debug("restore svm ranges\n");
|
||||||
|
|
||||||
/* kfd_process_notifier_release destroys this worker thread. So during
|
|
||||||
* the lifetime of this thread, kfd_process and mm will be valid.
|
|
||||||
*/
|
|
||||||
p = container_of(svms, struct kfd_process, svms);
|
p = container_of(svms, struct kfd_process, svms);
|
||||||
mm = p->mm;
|
|
||||||
if (!mm)
|
/* Keep mm reference when svm_range_validate_and_map ranges */
|
||||||
|
mm = get_task_mm(p->lead_thread);
|
||||||
|
if (!mm) {
|
||||||
|
pr_debug("svms 0x%p process mm gone\n", svms);
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
svm_range_list_lock_and_flush_work(svms, mm);
|
svm_range_list_lock_and_flush_work(svms, mm);
|
||||||
mutex_lock(&svms->lock);
|
mutex_lock(&svms->lock);
|
||||||
|
@ -1703,6 +1704,7 @@ static void svm_range_restore_work(struct work_struct *work)
|
||||||
out_reschedule:
|
out_reschedule:
|
||||||
mutex_unlock(&svms->lock);
|
mutex_unlock(&svms->lock);
|
||||||
mmap_write_unlock(mm);
|
mmap_write_unlock(mm);
|
||||||
|
mmput(mm);
|
||||||
|
|
||||||
/* If validation failed, reschedule another attempt */
|
/* If validation failed, reschedule another attempt */
|
||||||
if (evicted_ranges) {
|
if (evicted_ranges) {
|
||||||
|
@ -2840,6 +2842,8 @@ void svm_range_list_fini(struct kfd_process *p)
|
||||||
|
|
||||||
pr_debug("pasid 0x%x svms 0x%p\n", p->pasid, &p->svms);
|
pr_debug("pasid 0x%x svms 0x%p\n", p->pasid, &p->svms);
|
||||||
|
|
||||||
|
cancel_delayed_work_sync(&p->svms.restore_work);
|
||||||
|
|
||||||
/* Ensure list work is finished before process is destroyed */
|
/* Ensure list work is finished before process is destroyed */
|
||||||
flush_work(&p->svms.deferred_list_work);
|
flush_work(&p->svms.deferred_list_work);
|
||||||
|
|
||||||
|
@ -2850,7 +2854,6 @@ void svm_range_list_fini(struct kfd_process *p)
|
||||||
atomic_inc(&p->svms.drain_pagefaults);
|
atomic_inc(&p->svms.drain_pagefaults);
|
||||||
svm_range_drain_retry_fault(&p->svms);
|
svm_range_drain_retry_fault(&p->svms);
|
||||||
|
|
||||||
|
|
||||||
list_for_each_entry_safe(prange, next, &p->svms.list, list) {
|
list_for_each_entry_safe(prange, next, &p->svms.list, list) {
|
||||||
svm_range_unlink(prange);
|
svm_range_unlink(prange);
|
||||||
svm_range_remove_notifier(prange);
|
svm_range_remove_notifier(prange);
|
||||||
|
|
Loading…
Reference in a new issue