net: qed: adding hw_err states and handling

Here we introduce qed device error tracking flags and error types.

qed_hw_err_notify is an entrace point to report errors.
It'll notify higher level drivers (qede/qedr/etc) to handle and recover
the error.

List of posible errors comes from hardware interfaces, but could be
extended in future.

Signed-off-by: Ariel Elior <ariel.elior@marvell.com>
Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Igor Russkikh 2020-05-14 12:57:17 +03:00 committed by David S. Miller
parent c8a867a38f
commit d639836ab3
5 changed files with 90 additions and 0 deletions

View File

@ -1020,6 +1020,8 @@ u32 qed_unzip_data(struct qed_hwfn *p_hwfn,
u32 input_len, u8 *input_buf,
u32 max_size, u8 *unzip_buf);
void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn);
void qed_hw_error_occurred(struct qed_hwfn *p_hwfn,
enum qed_hw_err_type err_type);
void qed_get_protocol_stats(struct qed_dev *cdev,
enum qed_mcp_protocol_type type,
union qed_mcp_protocol_stats *stats);

View File

@ -837,6 +837,38 @@ int qed_dmae_host2host(struct qed_hwfn *p_hwfn,
return rc;
}
void qed_hw_err_notify(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt,
enum qed_hw_err_type err_type, char *fmt, ...)
{
char buf[QED_HW_ERR_MAX_STR_SIZE];
va_list vl;
int len;
if (fmt) {
va_start(vl, fmt);
len = vsnprintf(buf, QED_HW_ERR_MAX_STR_SIZE, fmt, vl);
va_end(vl);
if (len > QED_HW_ERR_MAX_STR_SIZE - 1)
len = QED_HW_ERR_MAX_STR_SIZE - 1;
DP_NOTICE(p_hwfn, "%s", buf);
}
/* Fan failure cannot be masked by handling of another HW error */
if (p_hwfn->cdev->recov_in_prog &&
err_type != QED_HW_ERR_FAN_FAIL) {
DP_VERBOSE(p_hwfn,
NETIF_MSG_DRV,
"Recovery is in progress. Avoid notifying about HW error %d.\n",
err_type);
return;
}
qed_hw_error_occurred(p_hwfn, err_type);
}
int qed_dmae_sanity(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt, const char *phase)
{

View File

@ -315,4 +315,19 @@ int qed_init_fw_data(struct qed_dev *cdev,
int qed_dmae_sanity(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt, const char *phase);
#define QED_HW_ERR_MAX_STR_SIZE 256
/**
* @brief qed_hw_err_notify - Notify upper layer driver and management FW
* about a HW error.
*
* @param p_hwfn
* @param p_ptt
* @param err_type
* @param fmt - debug data buffer to send to the MFW
* @param ... - buffer format args
*/
void qed_hw_err_notify(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt,
enum qed_hw_err_type err_type, char *fmt, ...);
#endif

View File

@ -2468,6 +2468,35 @@ void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn)
ops->schedule_recovery_handler(cookie);
}
char *qed_hw_err_type_descr[] = {
[QED_HW_ERR_FAN_FAIL] = "Fan Failure",
[QED_HW_ERR_MFW_RESP_FAIL] = "MFW Response Failure",
[QED_HW_ERR_HW_ATTN] = "HW Attention",
[QED_HW_ERR_DMAE_FAIL] = "DMAE Failure",
[QED_HW_ERR_RAMROD_FAIL] = "Ramrod Failure",
[QED_HW_ERR_FW_ASSERT] = "FW Assertion",
[QED_HW_ERR_LAST] = "Unknown",
};
void qed_hw_error_occurred(struct qed_hwfn *p_hwfn,
enum qed_hw_err_type err_type)
{
struct qed_common_cb_ops *ops = p_hwfn->cdev->protocol_ops.common;
void *cookie = p_hwfn->cdev->ops_cookie;
char *err_str;
if (err_type > QED_HW_ERR_LAST)
err_type = QED_HW_ERR_LAST;
err_str = qed_hw_err_type_descr[err_type];
DP_NOTICE(p_hwfn, "HW error occurred [%s]\n", err_str);
/* Call the HW error handler of the protocol driver
*/
if (ops && ops->schedule_hw_err_handler)
ops->schedule_hw_err_handler(cookie, err_type);
}
static int qed_set_coalesce(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal,
void *handle)
{

View File

@ -607,6 +607,16 @@ struct qed_sb_info {
struct qed_dev *cdev;
};
enum qed_hw_err_type {
QED_HW_ERR_FAN_FAIL,
QED_HW_ERR_MFW_RESP_FAIL,
QED_HW_ERR_HW_ATTN,
QED_HW_ERR_DMAE_FAIL,
QED_HW_ERR_RAMROD_FAIL,
QED_HW_ERR_FW_ASSERT,
QED_HW_ERR_LAST,
};
enum qed_dev_type {
QED_DEV_TYPE_BB,
QED_DEV_TYPE_AH,
@ -814,6 +824,8 @@ struct qed_common_cb_ops {
void (*link_update)(void *dev,
struct qed_link_output *link);
void (*schedule_recovery_handler)(void *dev);
void (*schedule_hw_err_handler)(void *dev,
enum qed_hw_err_type err_type);
void (*dcbx_aen)(void *dev, struct qed_dcbx_get *get, u32 mib_type);
void (*get_generic_tlv_data)(void *dev, struct qed_generic_tlvs *data);
void (*get_protocol_tlv_data)(void *dev, void *data);