accel/habanalabs: stop fetching MME SBTE error cause

Because in this case we have only a single possible cause, we can
safely stop fetching the cause from firmware.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
Ofir Bitton 2023-05-31 12:40:41 +03:00 committed by Oded Gabbay
parent e4a97d6b62
commit f17182d036

View file

@ -66,7 +66,6 @@
#define GAUDI2_NUM_OF_TPC_INTR_CAUSE 31
#define GAUDI2_NUM_OF_DEC_ERR_CAUSE 25
#define GAUDI2_NUM_OF_MME_ERR_CAUSE 16
#define GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE 5
#define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE 7
#define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE 8
#define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE 19
@ -916,14 +915,6 @@ static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] =
"sbte_prtn_intr_4",
};
static const char * const guadi2_mme_sbte_error_cause[GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE] = {
"i0",
"i1",
"i2",
"i3",
"i4",
};
static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = {
"WBC ERR RESP_0",
"WBC ERR RESP_1",
@ -8781,21 +8772,16 @@ static int gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, u16 event
return error_count;
}
static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type,
u64 intr_cause_data)
static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type)
{
int i, error_count = 0;
for (i = 0 ; i < GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE ; i++)
if (intr_cause_data & BIT(i)) {
gaudi2_print_event(hdev, event_type, true,
"err cause: %s", guadi2_mme_sbte_error_cause[i]);
error_count++;
}
/*
* We have a single error cause here but the report mechanism is
* buggy. Hence there is no good reason to fetch the cause so we
* just check for glbl_errors and exit.
*/
hl_check_for_glbl_errors(hdev);
return error_count;
return GAUDI2_NA_EVENT_CAUSE;
}
static int gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
@ -9856,8 +9842,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP:
case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP:
case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP:
error_count = gaudi2_handle_mme_sbte_err(hdev, event_type,
le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
error_count = gaudi2_handle_mme_sbte_err(hdev, event_type);
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
break;
case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B: