drm/amdkfd: add sdma poison consumption handling

Follow the same apporach as GFX to handle SDMA
poison consumption. Send SIGBUS to application
when receives SDMA_ECC interrupt and issue gpu
reset either mode 2 or mode 1 to get the engine
back

Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Dennis Li<dennis.li@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Hawking Zhang 2021-06-03 16:10:04 +08:00 committed by Alex Deucher
parent 0dc2bafb08
commit 4a1d4b6d38
2 changed files with 7 additions and 1 deletions

View file

@ -249,8 +249,13 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,
client_id == SOC15_IH_CLIENTID_SDMA5 ||
client_id == SOC15_IH_CLIENTID_SDMA6 ||
client_id == SOC15_IH_CLIENTID_SDMA7) {
if (source_id == SOC15_INTSRC_SDMA_TRAP)
if (source_id == SOC15_INTSRC_SDMA_TRAP) {
kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28);
} else if (source_id == SOC15_INTSRC_SDMA_ECC) {
kfd_signal_poison_consumed_event(dev, pasid);
amdgpu_amdkfd_gpu_reset(dev->kgd);
return;
}
} else if (client_id == SOC15_IH_CLIENTID_VMC ||
client_id == SOC15_IH_CLIENTID_VMC1 ||
client_id == SOC15_IH_CLIENTID_UTCL2) {

View file

@ -30,6 +30,7 @@
#define SOC15_INTSRC_SQ_INTERRUPT_MSG 239
#define SOC15_INTSRC_VMC_FAULT 0
#define SOC15_INTSRC_SDMA_TRAP 224
#define SOC15_INTSRC_SDMA_ECC 220
#define SOC15_CLIENT_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) & 0xff)