From f17182d03680044649d6e124a4580a33c65990e2 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Wed, 31 May 2023 12:40:41 +0300 Subject: [PATCH] accel/habanalabs: stop fetching MME SBTE error cause Because in this case we have only a single possible cause, we can safely stop fetching the cause from firmware. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/accel/habanalabs/gaudi2/gaudi2.c | 31 ++++++------------------ 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c index ed3b0b6225d2..899b1c4b53f6 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c @@ -66,7 +66,6 @@ #define GAUDI2_NUM_OF_TPC_INTR_CAUSE 31 #define GAUDI2_NUM_OF_DEC_ERR_CAUSE 25 #define GAUDI2_NUM_OF_MME_ERR_CAUSE 16 -#define GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE 5 #define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE 7 #define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE 8 #define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE 19 @@ -916,14 +915,6 @@ static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] = "sbte_prtn_intr_4", }; -static const char * const guadi2_mme_sbte_error_cause[GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE] = { - "i0", - "i1", - "i2", - "i3", - "i4", -}; - static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = { "WBC ERR RESP_0", "WBC ERR RESP_1", @@ -8781,21 +8772,16 @@ static int gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, u16 event return error_count; } -static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type, - u64 intr_cause_data) +static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type) { - int i, error_count = 0; - - for (i = 0 ; i < GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE ; i++) - if (intr_cause_data & BIT(i)) { - gaudi2_print_event(hdev, event_type, true, - "err cause: %s", guadi2_mme_sbte_error_cause[i]); - error_count++; - } - + /* + * We have a single error cause here but the report mechanism is + * buggy. Hence there is no good reason to fetch the cause so we + * just check for glbl_errors and exit. + */ hl_check_for_glbl_errors(hdev); - return error_count; + return GAUDI2_NA_EVENT_CAUSE; } static int gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, u16 event_type, @@ -9856,8 +9842,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP: case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP: case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP: - error_count = gaudi2_handle_mme_sbte_err(hdev, event_type, - le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); + error_count = gaudi2_handle_mme_sbte_err(hdev, event_type); event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B: