mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-09-29 22:02:02 +00:00
drm/amdgpu: Add work_struct for GPU reset from kfd.
We need to have a work_struct to cancel this reset if another already in progress. Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
2f83658ffc
commit
b5fd0cf3ea
3 changed files with 15 additions and 32 deletions
|
@ -33,6 +33,7 @@
|
|||
#include <uapi/linux/kfd_ioctl.h>
|
||||
#include "amdgpu_ras.h"
|
||||
#include "amdgpu_umc.h"
|
||||
#include "amdgpu_reset.h"
|
||||
|
||||
/* Total memory size in system memory and all GPU VRAM. Used to
|
||||
* estimate worst case amount of memory to reserve for page tables
|
||||
|
@ -122,6 +123,15 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
static void amdgpu_amdkfd_reset_work(struct work_struct *work)
|
||||
{
|
||||
struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
|
||||
kfd.reset_work);
|
||||
|
||||
amdgpu_device_gpu_recover_imp(adev, NULL);
|
||||
}
|
||||
|
||||
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
|
||||
{
|
||||
int i;
|
||||
|
@ -180,6 +190,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
|
|||
|
||||
adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
|
||||
adev_to_drm(adev), &gpu_resources);
|
||||
|
||||
INIT_WORK(&adev->kfd.reset_work, amdgpu_amdkfd_reset_work);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -247,7 +259,8 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
|
|||
void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev)
|
||||
{
|
||||
if (amdgpu_device_should_recover_gpu(adev))
|
||||
amdgpu_device_gpu_recover(adev, NULL);
|
||||
amdgpu_reset_domain_schedule(adev->reset_domain,
|
||||
&adev->kfd.reset_work);
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
|
||||
|
|
|
@ -97,6 +97,7 @@ struct amdgpu_kfd_dev {
|
|||
struct kfd_dev *dev;
|
||||
uint64_t vram_used;
|
||||
bool init_complete;
|
||||
struct work_struct reset_work;
|
||||
};
|
||||
|
||||
enum kgd_engine_type {
|
||||
|
|
|
@ -5323,37 +5323,6 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
|
|||
return r;
|
||||
}
|
||||
|
||||
struct amdgpu_recover_work_struct {
|
||||
struct work_struct base;
|
||||
struct amdgpu_device *adev;
|
||||
struct amdgpu_job *job;
|
||||
int ret;
|
||||
};
|
||||
|
||||
static void amdgpu_device_queue_gpu_recover_work(struct work_struct *work)
|
||||
{
|
||||
struct amdgpu_recover_work_struct *recover_work = container_of(work, struct amdgpu_recover_work_struct, base);
|
||||
|
||||
amdgpu_device_gpu_recover_imp(recover_work->adev, recover_work->job);
|
||||
}
|
||||
/*
|
||||
* Serialize gpu recover into reset domain single threaded wq
|
||||
*/
|
||||
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
||||
struct amdgpu_job *job)
|
||||
{
|
||||
struct amdgpu_recover_work_struct work = {.adev = adev, .job = job};
|
||||
|
||||
INIT_WORK(&work.base, amdgpu_device_queue_gpu_recover_work);
|
||||
|
||||
if (!amdgpu_reset_domain_schedule(adev->reset_domain, &work.base))
|
||||
return -EAGAIN;
|
||||
|
||||
flush_work(&work.base);
|
||||
|
||||
return atomic_read(&adev->reset_domain->reset_res);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
|
||||
*
|
||||
|
|
Loading…
Reference in a new issue