drm/amdkfd: Fix eviction fence handling

Handle case that dma_fence_get_rcu_safe returns NULL.

If restore work is already scheduled, only update its timer. The same
work item cannot be queued twice, so undo the extra queue eviction.

Fixes: 9a1c1339ab ("drm/amdkfd: Run restore_workers on freezable WQs")
Signed-off-by: Felix Kuehling <felix.kuehling@amd.com>
Reviewed-by: Philip Yang <Philip.Yang@amd.com>
Tested-by: Gang BA <Gang.Ba@amd.com>
Reviewed-by: Gang BA <Gang.Ba@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
This commit is contained in:
Felix Kuehling 2024-04-17 21:13:59 -04:00 committed by Alex Deucher
parent 2eb9dd497a
commit 37865e02e6
1 changed files with 5 additions and 4 deletions

View File

@ -1922,6 +1922,8 @@ static int signal_eviction_fence(struct kfd_process *p)
rcu_read_lock();
ef = dma_fence_get_rcu_safe(&p->ef);
rcu_read_unlock();
if (!ef)
return -EINVAL;
ret = dma_fence_signal(ef);
dma_fence_put(ef);
@ -1949,10 +1951,9 @@ static void evict_process_worker(struct work_struct *work)
* they are responsible stopping the queues and scheduling
* the restore work.
*/
if (!signal_eviction_fence(p))
queue_delayed_work(kfd_restore_wq, &p->restore_work,
msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
else
if (signal_eviction_fence(p) ||
mod_delayed_work(kfd_restore_wq, &p->restore_work,
msecs_to_jiffies(PROCESS_RESTORE_TIME_MS)))
kfd_process_restore_queues(p);
pr_debug("Finished evicting pasid 0x%x\n", p->pasid);