bnxt_en: Fix the PCI-AER routines

[ Upstream commit a1acdc226b ]

We do not support two simultaneous recoveries so check for reset
flag, BNXT_STATE_IN_FW_RESET, and do not proceed with AER further.
When the pci channel state is pci_channel_io_frozen, the PCIe link
can not be trusted so we disable the traffic immediately and stop
BAR access by calling bnxt_fw_fatal_close().  BAR access after
AER fatal error can cause an NMI.

Fixes: f75d9a0aa9 ("bnxt_en: Re-write PCI BARs after PCI fatal error.")
Signed-off-by: Vikas Gupta <vikas.gupta@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
Vikas Gupta 2024-04-19 11:34:48 -07:00 committed by Greg Kroah-Hartman
parent b018e90cc1
commit 2e34f9d681
1 changed files with 16 additions and 3 deletions

View File

@ -15045,6 +15045,7 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev,
{
struct net_device *netdev = pci_get_drvdata(pdev);
struct bnxt *bp = netdev_priv(netdev);
bool abort = false;
netdev_info(netdev, "PCI I/O error detected\n");
@ -15053,16 +15054,27 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev,
bnxt_ulp_stop(bp);
if (state == pci_channel_io_perm_failure) {
if (test_and_set_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) {
netdev_err(bp->dev, "Firmware reset already in progress\n");
abort = true;
}
if (abort || state == pci_channel_io_perm_failure) {
rtnl_unlock();
return PCI_ERS_RESULT_DISCONNECT;
}
if (state == pci_channel_io_frozen)
/* Link is not reliable anymore if state is pci_channel_io_frozen
* so we disable bus master to prevent any potential bad DMAs before
* freeing kernel memory.
*/
if (state == pci_channel_io_frozen) {
set_bit(BNXT_STATE_PCI_CHANNEL_IO_FROZEN, &bp->state);
bnxt_fw_fatal_close(bp);
}
if (netif_running(netdev))
bnxt_close(netdev);
__bnxt_close_nic(bp, true, true);
if (pci_is_enabled(pdev))
pci_disable_device(pdev);
@ -15146,6 +15158,7 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev)
}
reset_exit:
clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
bnxt_clear_reservations(bp, true);
rtnl_unlock();