Merge branch 'pci/err'

- Recognize disconnected devices so we don't bother trying to set them to
  "frozen" or "normal" state (Christoph Hellwig)

- Clear PCI Status register during enumeration in case firmware left errors
  logged (Kai-Heng Feng)

- Configure ECRC for every device, including hot-added ones (Stefan Roese)

- Keep AER error reporting enabled for switches (Stefan Roese)

- Enable error reporting for all devices that support AER (Stefan Roese)

- Iterate over error counters instead of error strings to avoid printing
  junk in AER sysfs counters (Mohamed Khalfella)

* pci/err:
  PCI/AER: Iterate over error counters instead of error strings
  PCI/AER: Enable error reporting when AER is native
  PCI/portdrv: Don't disable AER reporting in get_port_device_capability()
  PCI/AER: Configure ECRC for every device
  PCI: Clear PCI_STATUS when setting up device
  PCI/ERR: Recognize disconnected devices in report_error_detected()
This commit is contained in:
Bjorn Helgaas 2022-08-04 11:41:52 -05:00
commit 5a20930f27
4 changed files with 23 additions and 16 deletions

View File

@ -392,6 +392,11 @@ void pci_aer_init(struct pci_dev *dev)
pci_add_ext_cap_save_buffer(dev, PCI_EXT_CAP_ID_ERR, sizeof(u32) * n);
pci_aer_clear_status(dev);
if (pci_aer_available())
pci_enable_pcie_error_reporting(dev);
pcie_set_ecrc_checking(dev);
}
void pci_aer_exit(struct pci_dev *dev)
@ -538,7 +543,7 @@ static const char *aer_agent_string[] = {
u64 *stats = pdev->aer_stats->stats_array; \
size_t len = 0; \
\
for (i = 0; i < ARRAY_SIZE(strings_array); i++) { \
for (i = 0; i < ARRAY_SIZE(pdev->aer_stats->stats_array); i++) {\
if (strings_array[i]) \
len += sysfs_emit_at(buf, len, "%s %llu\n", \
strings_array[i], \
@ -1228,9 +1233,6 @@ static int set_device_error_reporting(struct pci_dev *dev, void *data)
pci_disable_pcie_error_reporting(dev);
}
if (enable)
pcie_set_ecrc_checking(dev);
return 0;
}
@ -1347,6 +1349,11 @@ static int aer_probe(struct pcie_device *dev)
struct device *device = &dev->device;
struct pci_dev *port = dev->port;
BUILD_BUG_ON(ARRAY_SIZE(aer_correctable_error_string) <
AER_MAX_TYPEOF_COR_ERRS);
BUILD_BUG_ON(ARRAY_SIZE(aer_uncorrectable_error_string) <
AER_MAX_TYPEOF_UNCOR_ERRS);
/* Limit to Root Ports or Root Complex Event Collectors */
if ((pci_pcie_type(port) != PCI_EXP_TYPE_RC_EC) &&
(pci_pcie_type(port) != PCI_EXP_TYPE_ROOT_PORT))

View File

@ -55,10 +55,14 @@ static int report_error_detected(struct pci_dev *dev,
device_lock(&dev->dev);
pdrv = dev->driver;
if (!pci_dev_set_io_state(dev, state) ||
!pdrv ||
!pdrv->err_handler ||
!pdrv->err_handler->error_detected) {
if (pci_dev_is_disconnected(dev)) {
vote = PCI_ERS_RESULT_DISCONNECT;
} else if (!pci_dev_set_io_state(dev, state)) {
pci_info(dev, "can't recover (state transition %u -> %u invalid)\n",
dev->error_state, state);
vote = PCI_ERS_RESULT_NONE;
} else if (!pdrv || !pdrv->err_handler ||
!pdrv->err_handler->error_detected) {
/*
* If any device in the subtree does not have an error_detected
* callback, PCI_ERS_RESULT_NO_AER_DRIVER prevents subsequent

View File

@ -222,15 +222,8 @@ static int get_port_device_capability(struct pci_dev *dev)
#ifdef CONFIG_PCIEAER
if (dev->aer_cap && pci_aer_available() &&
(pcie_ports_native || host->native_aer)) {
(pcie_ports_native || host->native_aer))
services |= PCIE_PORT_SERVICE_AER;
/*
* Disable AER on this port in case it's been enabled by the
* BIOS (the AER service driver will enable it when necessary).
*/
pci_disable_pcie_error_reporting(dev);
}
#endif
/* Root Ports and Root Complex Event Collectors may generate PMEs */

View File

@ -1890,6 +1890,9 @@ int pci_setup_device(struct pci_dev *dev)
dev->broken_intx_masking = pci_intx_mask_broken(dev);
/* Clear errors left from system firmware */
pci_write_config_word(dev, PCI_STATUS, 0xffff);
switch (dev->hdr_type) { /* header type */
case PCI_HEADER_TYPE_NORMAL: /* standard header */
if (class == PCI_CLASS_BRIDGE_PCI)