Merge branch 'pci/error'

- Use pci_channel_state_t instead of enum pci_channel_state (Luc Van
  Oostenryck)

- Simplify __aer_print_error() (Bjorn Helgaas)

- Log AER correctable errors as warning, not error (Matt Jolly)

- Rename pci_aer_clear_device_status() to pcie_clear_device_status() (Bjorn
  Helgaas)

- Clear PCIe Device Status errors only if OS owns AER (Jonathan Cameron)

* pci/error:
  PCI/ERR: Clear PCIe Device Status errors only if OS owns AER
  PCI/ERR: Rename pci_aer_clear_device_status() to pcie_clear_device_status()
  PCI/AER: Log correctable errors as warning, not error
  PCI/AER: Simplify __aer_print_error()
  PCI: Use 'pci_channel_state_t' instead of 'enum pci_channel_state'
This commit is contained in:
Bjorn Helgaas 2020-08-05 18:24:15 -05:00
commit b0735e8d2c
20 changed files with 83 additions and 57 deletions

View file

@ -79,7 +79,7 @@ This structure has the form::
struct pci_error_handlers
{
int (*error_detected)(struct pci_dev *dev, enum pci_channel_state);
int (*error_detected)(struct pci_dev *dev, pci_channel_state_t);
int (*mmio_enabled)(struct pci_dev *dev);
int (*slot_reset)(struct pci_dev *dev);
void (*resume)(struct pci_dev *dev);
@ -87,11 +87,11 @@ This structure has the form::
The possible channel states are::
enum pci_channel_state {
typedef enum {
pci_channel_io_normal, /* I/O channel is in normal state */
pci_channel_io_frozen, /* I/O to channel is blocked */
pci_channel_io_perm_failure, /* PCI card is dead */
};
} pci_channel_state_t;
Possible return values are::
@ -348,7 +348,7 @@ STEP 6: Permanent Failure
-------------------------
A "permanent failure" has occurred, and the platform cannot recover
the device. The platform will call error_detected() with a
pci_channel_state value of pci_channel_io_perm_failure.
pci_channel_state_t value of pci_channel_io_perm_failure.
The device driver should, at this point, assume the worst. It should
cancel all pending I/O, refuse all new I/O, returning -EIO to

View file

@ -214,7 +214,7 @@ static void eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
pci_save_state(pdev);
}
static void eeh_set_channel_state(struct eeh_pe *root, enum pci_channel_state s)
static void eeh_set_channel_state(struct eeh_pe *root, pci_channel_state_t s)
{
struct eeh_pe *pe;
struct eeh_dev *edev, *tmp;

View file

@ -625,7 +625,7 @@ static int rsxx_eeh_fifo_flush_poll(struct rsxx_cardinfo *card)
}
static pci_ers_result_t rsxx_error_detected(struct pci_dev *dev,
enum pci_channel_state error)
pci_channel_state_t error)
{
int st;

View file

@ -1267,7 +1267,7 @@ static void ioat_resume(struct ioatdma_device *ioat_dma)
#define DRV_NAME "ioatdma"
static pci_ers_result_t ioat_pcie_error_detected(struct pci_dev *pdev,
enum pci_channel_state error)
pci_channel_state_t error)
{
dev_dbg(&pdev->dev, "%s: PCIe AER error %d\n", DRV_NAME, error);

View file

@ -1186,7 +1186,7 @@ MODULE_DEVICE_TABLE(pci, ngene_id_tbl);
/****************************************************************************/
static pci_ers_result_t ngene_error_detected(struct pci_dev *dev,
enum pci_channel_state state)
pci_channel_state_t state)
{
dev_err(&dev->dev, "PCI error\n");
if (state == pci_channel_io_perm_failure)

View file

@ -1240,7 +1240,7 @@ static void genwqe_remove(struct pci_dev *pci_dev)
* error is detected.
*/
static pci_ers_result_t genwqe_err_error_detected(struct pci_dev *pci_dev,
enum pci_channel_state state)
pci_channel_state_t state)
{
struct genwqe_dev *cd;

View file

@ -15465,7 +15465,7 @@ static void i40e_remove(struct pci_dev *pdev)
* remediation.
**/
static pci_ers_result_t i40e_pci_error_detected(struct pci_dev *pdev,
enum pci_channel_state error)
pci_channel_state_t error)
{
struct i40e_pf *pf = pci_get_drvdata(pdev);

View file

@ -3586,7 +3586,7 @@ static void ice_remove(struct pci_dev *pdev)
* is in progress. Allows the driver to gracefully prepare/handle PCI errors.
*/
static pci_ers_result_t
ice_pci_err_detected(struct pci_dev *pdev, enum pci_channel_state err)
ice_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t err)
{
struct ice_pf *pf = pci_get_drvdata(pdev);

View file

@ -82,7 +82,7 @@ static int ixgb_vlan_rx_kill_vid(struct net_device *netdev,
static void ixgb_restore_vlan(struct ixgb_adapter *adapter);
static pci_ers_result_t ixgb_io_error_detected (struct pci_dev *pdev,
enum pci_channel_state state);
pci_channel_state_t state);
static pci_ers_result_t ixgb_io_slot_reset (struct pci_dev *pdev);
static void ixgb_io_resume (struct pci_dev *pdev);
@ -2194,7 +2194,7 @@ ixgb_restore_vlan(struct ixgb_adapter *adapter)
* a PCI bus error is detected.
*/
static pci_ers_result_t ixgb_io_error_detected(struct pci_dev *pdev,
enum pci_channel_state state)
pci_channel_state_t state)
{
struct net_device *netdev = pci_get_drvdata(pdev);
struct ixgb_adapter *adapter = netdev_priv(netdev);

View file

@ -1519,7 +1519,7 @@ static const struct dev_pm_ops efx_pm_ops = {
* Stop the software path and request a slot reset.
*/
static pci_ers_result_t efx_io_error_detected(struct pci_dev *pdev,
enum pci_channel_state state)
pci_channel_state_t state)
{
pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
struct efx_nic *efx = pci_get_drvdata(pdev);

View file

@ -3118,7 +3118,7 @@ static const struct dev_pm_ops ef4_pm_ops = {
* Stop the software path and request a slot reset.
*/
static pci_ers_result_t ef4_io_error_detected(struct pci_dev *pdev,
enum pci_channel_state state)
pci_channel_state_t state)
{
pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
struct ef4_nic *efx = pci_get_drvdata(pdev);

View file

@ -2173,6 +2173,14 @@ int pci_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
}
EXPORT_SYMBOL_GPL(pci_set_pcie_reset_state);
void pcie_clear_device_status(struct pci_dev *dev)
{
u16 sta;
pcie_capability_read_word(dev, PCI_EXP_DEVSTA, &sta);
pcie_capability_write_word(dev, PCI_EXP_DEVSTA, sta);
}
/**
* pcie_clear_root_pme_status - Clear root port PME interrupt status.
* @dev: PCIe root port or event collector.

View file

@ -92,6 +92,7 @@ void pci_refresh_power_state(struct pci_dev *dev);
int pci_power_up(struct pci_dev *dev);
void pci_disable_enabled_device(struct pci_dev *dev);
int pci_finish_runtime_suspend(struct pci_dev *dev);
void pcie_clear_device_status(struct pci_dev *dev);
void pcie_clear_root_pme_status(struct pci_dev *dev);
bool pci_check_pme_status(struct pci_dev *dev);
void pci_pme_wakeup_bus(struct pci_bus *bus);
@ -555,7 +556,7 @@ static inline int pci_dev_specific_disable_acs_redir(struct pci_dev *dev)
/* PCI error reporting and recovery */
pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
enum pci_channel_state state,
pci_channel_state_t state,
pci_ers_result_t (*reset_link)(struct pci_dev *pdev));
bool pcie_wait_for_link(struct pci_dev *pdev, bool active);
@ -658,7 +659,6 @@ void pci_aer_init(struct pci_dev *dev);
void pci_aer_exit(struct pci_dev *dev);
extern const struct attribute_group aer_stats_attr_group;
void pci_aer_clear_fatal_status(struct pci_dev *dev);
void pci_aer_clear_device_status(struct pci_dev *dev);
int pci_aer_clear_status(struct pci_dev *dev);
int pci_aer_raw_clear_status(struct pci_dev *dev);
#else
@ -666,7 +666,6 @@ static inline void pci_no_aer(void) { }
static inline void pci_aer_init(struct pci_dev *d) { }
static inline void pci_aer_exit(struct pci_dev *d) { }
static inline void pci_aer_clear_fatal_status(struct pci_dev *dev) { }
static inline void pci_aer_clear_device_status(struct pci_dev *dev) { }
static inline int pci_aer_clear_status(struct pci_dev *dev) { return -EINVAL; }
static inline int pci_aer_raw_clear_status(struct pci_dev *dev) { return -EINVAL; }
#endif

View file

@ -241,14 +241,6 @@ int pci_disable_pcie_error_reporting(struct pci_dev *dev)
}
EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting);
void pci_aer_clear_device_status(struct pci_dev *dev)
{
u16 sta;
pcie_capability_read_word(dev, PCI_EXP_DEVSTA, &sta);
pcie_capability_write_word(dev, PCI_EXP_DEVSTA, sta);
}
int pci_aer_clear_nonfatal_status(struct pci_dev *dev)
{
int aer = dev->aer_cap;
@ -447,7 +439,7 @@ static const char *aer_error_layer[] = {
"Transaction Layer"
};
static const char *aer_correctable_error_string[AER_MAX_TYPEOF_COR_ERRS] = {
static const char *aer_correctable_error_string[] = {
"RxErr", /* Bit Position 0 */
NULL,
NULL,
@ -464,9 +456,25 @@ static const char *aer_correctable_error_string[AER_MAX_TYPEOF_COR_ERRS] = {
"NonFatalErr", /* Bit Position 13 */
"CorrIntErr", /* Bit Position 14 */
"HeaderOF", /* Bit Position 15 */
NULL, /* Bit Position 16 */
NULL, /* Bit Position 17 */
NULL, /* Bit Position 18 */
NULL, /* Bit Position 19 */
NULL, /* Bit Position 20 */
NULL, /* Bit Position 21 */
NULL, /* Bit Position 22 */
NULL, /* Bit Position 23 */
NULL, /* Bit Position 24 */
NULL, /* Bit Position 25 */
NULL, /* Bit Position 26 */
NULL, /* Bit Position 27 */
NULL, /* Bit Position 28 */
NULL, /* Bit Position 29 */
NULL, /* Bit Position 30 */
NULL, /* Bit Position 31 */
};
static const char *aer_uncorrectable_error_string[AER_MAX_TYPEOF_UNCOR_ERRS] = {
static const char *aer_uncorrectable_error_string[] = {
"Undefined", /* Bit Position 0 */
NULL,
NULL,
@ -494,6 +502,11 @@ static const char *aer_uncorrectable_error_string[AER_MAX_TYPEOF_UNCOR_ERRS] = {
"AtomicOpBlocked", /* Bit Position 24 */
"TLPBlockedErr", /* Bit Position 25 */
"PoisonTLPBlocked", /* Bit Position 26 */
NULL, /* Bit Position 27 */
NULL, /* Bit Position 28 */
NULL, /* Bit Position 29 */
NULL, /* Bit Position 30 */
NULL, /* Bit Position 31 */
};
static const char *aer_agent_string[] = {
@ -650,24 +663,26 @@ static void __print_tlp_header(struct pci_dev *dev,
static void __aer_print_error(struct pci_dev *dev,
struct aer_err_info *info)
{
const char **strings;
unsigned long status = info->status & ~info->mask;
const char *errmsg = NULL;
const char *level, *errmsg;
int i;
for_each_set_bit(i, &status, 32) {
if (info->severity == AER_CORRECTABLE)
errmsg = i < ARRAY_SIZE(aer_correctable_error_string) ?
aer_correctable_error_string[i] : NULL;
else
errmsg = i < ARRAY_SIZE(aer_uncorrectable_error_string) ?
aer_uncorrectable_error_string[i] : NULL;
if (info->severity == AER_CORRECTABLE) {
strings = aer_correctable_error_string;
level = KERN_WARNING;
} else {
strings = aer_uncorrectable_error_string;
level = KERN_ERR;
}
if (errmsg)
pci_err(dev, " [%2d] %-22s%s\n", i, errmsg,
for_each_set_bit(i, &status, 32) {
errmsg = strings[i];
if (!errmsg)
errmsg = "Unknown Error Bit";
pci_printk(level, dev, " [%2d] %-22s%s\n", i, errmsg,
info->first_error == i ? " (First)" : "");
else
pci_err(dev, " [%2d] Unknown Error Bit%s\n",
i, info->first_error == i ? " (First)" : "");
}
pci_dev_aer_stats_incr(dev, info);
}
@ -676,6 +691,7 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
{
int layer, agent;
int id = ((dev->bus->number << 8) | dev->devfn);
const char *level;
if (!info->status) {
pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n",
@ -686,13 +702,14 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
layer = AER_GET_LAYER_ERROR(info->severity, info->status);
agent = AER_GET_AGENT(info->severity, info->status);
pci_err(dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
aer_error_severity_string[info->severity],
aer_error_layer[layer], aer_agent_string[agent]);
level = (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR;
pci_err(dev, " device [%04x:%04x] error status/mask=%08x/%08x\n",
dev->vendor, dev->device,
info->status, info->mask);
pci_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
aer_error_severity_string[info->severity],
aer_error_layer[layer], aer_agent_string[agent]);
pci_printk(level, dev, " device [%04x:%04x] error status/mask=%08x/%08x\n",
dev->vendor, dev->device, info->status, info->mask);
__aer_print_error(dev, info);
@ -922,7 +939,8 @@ static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info)
if (aer)
pci_write_config_dword(dev, aer + PCI_ERR_COR_STATUS,
info->status);
pci_aer_clear_device_status(dev);
if (pcie_aer_is_native(dev))
pcie_clear_device_status(dev);
} else if (info->severity == AER_NONFATAL)
pcie_do_recovery(dev, pci_channel_io_normal, aer_root_reset);
else if (info->severity == AER_FATAL)

View file

@ -46,7 +46,7 @@ static pci_ers_result_t merge_result(enum pci_ers_result orig,
}
static int report_error_detected(struct pci_dev *dev,
enum pci_channel_state state,
pci_channel_state_t state,
enum pci_ers_result *result)
{
pci_ers_result_t vote;
@ -147,7 +147,7 @@ static int report_resume(struct pci_dev *dev, void *data)
}
pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
enum pci_channel_state state,
pci_channel_state_t state,
pci_ers_result_t (*reset_link)(struct pci_dev *pdev))
{
pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
@ -197,7 +197,8 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
pci_dbg(dev, "broadcast resume message\n");
pci_walk_bus(bus, report_resume, &status);
pci_aer_clear_device_status(dev);
if (pcie_aer_is_native(dev))
pcie_clear_device_status(dev);
pci_aer_clear_nonfatal_status(dev);
pci_info(dev, "device recovery successful\n");
return status;

View file

@ -146,7 +146,7 @@ static void pcie_portdrv_remove(struct pci_dev *dev)
}
static pci_ers_result_t pcie_portdrv_error_detected(struct pci_dev *dev,
enum pci_channel_state error)
pci_channel_state_t error)
{
/* Root Port has no impact. Always recovers. */
return PCI_ERS_RESULT_CAN_RECOVER;

View file

@ -2002,7 +2002,7 @@ static void aac_remove_one(struct pci_dev *pdev)
}
static pci_ers_result_t aac_pci_error_detected(struct pci_dev *pdev,
enum pci_channel_state error)
pci_channel_state_t error)
{
struct Scsi_Host *shost = pci_get_drvdata(pdev);
struct aac_dev *aac = shost_priv(shost);

View file

@ -1743,7 +1743,7 @@ static void sym2_remove(struct pci_dev *pdev)
* @state: current state of the PCI slot
*/
static pci_ers_result_t sym2_io_error_detected(struct pci_dev *pdev,
enum pci_channel_state state)
pci_channel_state_t state)
{
/* If slot is permanently frozen, turn everything off */
if (state == pci_channel_io_perm_failure) {

View file

@ -4678,7 +4678,7 @@ static void ql_eeh_close(struct net_device *ndev)
* a PCI bus error is detected.
*/
static pci_ers_result_t qlge_io_error_detected(struct pci_dev *pdev,
enum pci_channel_state state)
pci_channel_state_t state)
{
struct net_device *ndev = pci_get_drvdata(pdev);
struct ql_adapter *qdev = netdev_priv(ndev);

View file

@ -179,7 +179,7 @@ static inline const char *pci_power_name(pci_power_t state)
*/
typedef unsigned int __bitwise pci_channel_state_t;
enum pci_channel_state {
enum {
/* I/O channel is in normal state */
pci_channel_io_normal = (__force pci_channel_state_t) 1,
@ -792,7 +792,7 @@ enum pci_ers_result {
struct pci_error_handlers {
/* PCI bus error detected on this device */
pci_ers_result_t (*error_detected)(struct pci_dev *dev,
enum pci_channel_state error);
pci_channel_state_t error);
/* MMIO has been re-enabled, but not DMA */
pci_ers_result_t (*mmio_enabled)(struct pci_dev *dev);