Merge branch 'bnxt_en-aer-fixes'

Michael Chan says:

====================
bnxt_en: AER fixes

This patchset fixes issues in the AER recovery logic.  The first patch
refactors the code to make a shutdown function available for AER fatal
errors.  The second patch fixes the AER fatal recovery logic.  The
third patch fixes the health register logic to fix AER recovery failure
for the new P7 chips.
====================

Signed-off-by: Arınç ÜNAL <arinc.unal@arinc9.com>
This commit is contained in:
David S. Miller 2024-04-22 14:13:19 +01:00
commit 9e91bf75dd
1 changed files with 28 additions and 10 deletions

View File

@ -9089,7 +9089,7 @@ static void bnxt_try_map_fw_health_reg(struct bnxt *bp)
BNXT_FW_HEALTH_WIN_BASE +
BNXT_GRC_REG_CHIP_NUM);
}
if (!BNXT_CHIP_P5(bp))
if (!BNXT_CHIP_P5_PLUS(bp))
return;
status_loc = BNXT_GRC_REG_STATUS_P5 |
@ -13037,6 +13037,16 @@ static void bnxt_rx_ring_reset(struct bnxt *bp)
bnxt_rtnl_unlock_sp(bp);
}
static void bnxt_fw_fatal_close(struct bnxt *bp)
{
bnxt_tx_disable(bp);
bnxt_disable_napi(bp);
bnxt_disable_int_sync(bp);
bnxt_free_irq(bp);
bnxt_clear_int_mode(bp);
pci_disable_device(bp->pdev);
}
static void bnxt_fw_reset_close(struct bnxt *bp)
{
bnxt_ulp_stop(bp);
@ -13050,12 +13060,7 @@ static void bnxt_fw_reset_close(struct bnxt *bp)
pci_read_config_word(bp->pdev, PCI_SUBSYSTEM_ID, &val);
if (val == 0xffff)
bp->fw_reset_min_dsecs = 0;
bnxt_tx_disable(bp);
bnxt_disable_napi(bp);
bnxt_disable_int_sync(bp);
bnxt_free_irq(bp);
bnxt_clear_int_mode(bp);
pci_disable_device(bp->pdev);
bnxt_fw_fatal_close(bp);
}
__bnxt_close_nic(bp, true, false);
bnxt_vf_reps_free(bp);
@ -15373,6 +15378,7 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev,
{
struct net_device *netdev = pci_get_drvdata(pdev);
struct bnxt *bp = netdev_priv(netdev);
bool abort = false;
netdev_info(netdev, "PCI I/O error detected\n");
@ -15381,16 +15387,27 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev,
bnxt_ulp_stop(bp);
if (state == pci_channel_io_perm_failure) {
if (test_and_set_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) {
netdev_err(bp->dev, "Firmware reset already in progress\n");
abort = true;
}
if (abort || state == pci_channel_io_perm_failure) {
rtnl_unlock();
return PCI_ERS_RESULT_DISCONNECT;
}
if (state == pci_channel_io_frozen)
/* Link is not reliable anymore if state is pci_channel_io_frozen
* so we disable bus master to prevent any potential bad DMAs before
* freeing kernel memory.
*/
if (state == pci_channel_io_frozen) {
set_bit(BNXT_STATE_PCI_CHANNEL_IO_FROZEN, &bp->state);
bnxt_fw_fatal_close(bp);
}
if (netif_running(netdev))
bnxt_close(netdev);
__bnxt_close_nic(bp, true, true);
if (pci_is_enabled(pdev))
pci_disable_device(pdev);
@ -15474,6 +15491,7 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev)
}
reset_exit:
clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
bnxt_clear_reservations(bp, true);
rtnl_unlock();