linux-stable/drivers/pci/pcie/dpc.c

475 lines
13 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
/*
* PCI Express Downstream Port Containment services driver
* Author: Keith Busch <keith.busch@intel.com>
*
* Copyright (C) 2016 Intel Corp.
*/
#define dev_fmt(fmt) "DPC: " fmt
#include <linux/aer.h>
#include <linux/bitfield.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/pci.h>
#include "portdrv.h"
#include "../pci.h"
#define PCI_EXP_DPC_CTL_EN_MASK (PCI_EXP_DPC_CTL_EN_FATAL | \
PCI_EXP_DPC_CTL_EN_NONFATAL)
static const char * const rp_pio_error_string[] = {
"Configuration Request received UR Completion", /* Bit Position 0 */
"Configuration Request received CA Completion", /* Bit Position 1 */
"Configuration Request Completion Timeout", /* Bit Position 2 */
NULL,
NULL,
NULL,
NULL,
NULL,
"I/O Request received UR Completion", /* Bit Position 8 */
"I/O Request received CA Completion", /* Bit Position 9 */
"I/O Request Completion Timeout", /* Bit Position 10 */
NULL,
NULL,
NULL,
NULL,
NULL,
"Memory Request received UR Completion", /* Bit Position 16 */
"Memory Request received CA Completion", /* Bit Position 17 */
"Memory Request Completion Timeout", /* Bit Position 18 */
};
void pci_save_dpc_state(struct pci_dev *dev)
{
struct pci_cap_saved_state *save_state;
u16 *cap;
if (!pci_is_pcie(dev))
return;
save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_DPC);
if (!save_state)
return;
cap = (u16 *)&save_state->cap.data[0];
pci_read_config_word(dev, dev->dpc_cap + PCI_EXP_DPC_CTL, cap);
}
void pci_restore_dpc_state(struct pci_dev *dev)
{
struct pci_cap_saved_state *save_state;
u16 *cap;
if (!pci_is_pcie(dev))
return;
save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_DPC);
if (!save_state)
return;
cap = (u16 *)&save_state->cap.data[0];
pci_write_config_word(dev, dev->dpc_cap + PCI_EXP_DPC_CTL, *cap);
}
PCI: pciehp: Ignore Link Down/Up caused by DPC Downstream Port Containment (PCIe r5.0, sec. 6.2.10) disables the link upon an error and attempts to re-enable it when instructed by the DPC driver. A slot which is both DPC- and hotplug-capable is currently powered off by pciehp once DPC is triggered (due to the link change) and powered back up on successful recovery. That's undesirable, the slot should remain powered so the hotplugged device remains bound to its driver. DPC notifies the driver of the error and of successful recovery in pcie_do_recovery() and the driver may then restore the device to working state. Moreover, Sinan points out that turning off slot power by pciehp may foil recovery by DPC: Power off/on is a cold reset concurrently to DPC's warm reset. Sathyanarayanan reports extended delays or failure in link retraining by DPC if pciehp brings down the slot. Fix by detecting whether a Link Down event is caused by DPC and awaiting recovery if so. On successful recovery, ignore both the Link Down and the subsequent Link Up event. Afterwards, check whether the link is down to detect surprise-removal or another DPC event immediately after DPC recovery. Ensure that the corresponding DLLSC event is not ignored by synthesizing it and invoking irq_wake_thread() to trigger a re-run of pciehp_ist(). The IRQ threads of the hotplug and DPC drivers, pciehp_ist() and dpc_handler(), race against each other. If pciehp is faster than DPC, it will wait until DPC recovery completes. Recovery consists of two steps: The first step (waiting for link disablement) is recognizable by pciehp through a set DPC Trigger Status bit. The second step (waiting for link retraining) is recognizable through a newly introduced PCI_DPC_RECOVERING flag. If DPC is faster than pciehp, neither of the two flags will be set and pciehp may glean the recovery status from the new PCI_DPC_RECOVERED flag. The flag is zero if DPC didn't occur at all, hence DLLSC events are not ignored by default. pciehp waits up to 4 seconds before assuming that DPC recovery failed and bringing down the slot. This timeout is not taken from the spec (it doesn't mandate one) but based on a report from Yicong Yang that DPC may take a bit more than 3 seconds on HiSilicon's Kunpeng platform. The timeout is necessary because the DPC Trigger Status bit may never clear: On Root Ports which support RP Extensions for DPC, the DPC driver polls the DPC RP Busy bit for up to 1 second before giving up on DPC recovery. Without the timeout, pciehp would then wait indefinitely for DPC to complete. This commit draws inspiration from previous attempts to synchronize DPC with pciehp: By Sinan Kaya, August 2018: https://lore.kernel.org/linux-pci/20180818065126.77912-1-okaya@kernel.org/ By Ethan Zhao, October 2020: https://lore.kernel.org/linux-pci/20201007113158.48933-1-haifeng.zhao@intel.com/ By Kuppuswamy Sathyanarayanan, March 2021: https://lore.kernel.org/linux-pci/59cb30f5e5ac6d65427ceaadf1012b2ba8dbf66c.1615606143.git.sathyanarayanan.kuppuswamy@linux.intel.com/ Link: https://lore.kernel.org/r/0be565d97438fe2a6d57354b3aa4e8626952a00b.1619857124.git.lukas@wunner.de Reported-by: Sinan Kaya <okaya@kernel.org> Reported-by: Ethan Zhao <haifeng.zhao@intel.com> Reported-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> Tested-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> Tested-by: Yicong Yang <yangyicong@hisilicon.com> Signed-off-by: Lukas Wunner <lukas@wunner.de> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Ashok Raj <ashok.raj@intel.com> Cc: Keith Busch <kbusch@kernel.org>
2021-05-01 08:29:00 +00:00
static DECLARE_WAIT_QUEUE_HEAD(dpc_completed_waitqueue);
#ifdef CONFIG_HOTPLUG_PCI_PCIE
static bool dpc_completed(struct pci_dev *pdev)
{
u16 status;
pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_STATUS, &status);
if ((!PCI_POSSIBLE_ERROR(status)) && (status & PCI_EXP_DPC_STATUS_TRIGGER))
PCI: pciehp: Ignore Link Down/Up caused by DPC Downstream Port Containment (PCIe r5.0, sec. 6.2.10) disables the link upon an error and attempts to re-enable it when instructed by the DPC driver. A slot which is both DPC- and hotplug-capable is currently powered off by pciehp once DPC is triggered (due to the link change) and powered back up on successful recovery. That's undesirable, the slot should remain powered so the hotplugged device remains bound to its driver. DPC notifies the driver of the error and of successful recovery in pcie_do_recovery() and the driver may then restore the device to working state. Moreover, Sinan points out that turning off slot power by pciehp may foil recovery by DPC: Power off/on is a cold reset concurrently to DPC's warm reset. Sathyanarayanan reports extended delays or failure in link retraining by DPC if pciehp brings down the slot. Fix by detecting whether a Link Down event is caused by DPC and awaiting recovery if so. On successful recovery, ignore both the Link Down and the subsequent Link Up event. Afterwards, check whether the link is down to detect surprise-removal or another DPC event immediately after DPC recovery. Ensure that the corresponding DLLSC event is not ignored by synthesizing it and invoking irq_wake_thread() to trigger a re-run of pciehp_ist(). The IRQ threads of the hotplug and DPC drivers, pciehp_ist() and dpc_handler(), race against each other. If pciehp is faster than DPC, it will wait until DPC recovery completes. Recovery consists of two steps: The first step (waiting for link disablement) is recognizable by pciehp through a set DPC Trigger Status bit. The second step (waiting for link retraining) is recognizable through a newly introduced PCI_DPC_RECOVERING flag. If DPC is faster than pciehp, neither of the two flags will be set and pciehp may glean the recovery status from the new PCI_DPC_RECOVERED flag. The flag is zero if DPC didn't occur at all, hence DLLSC events are not ignored by default. pciehp waits up to 4 seconds before assuming that DPC recovery failed and bringing down the slot. This timeout is not taken from the spec (it doesn't mandate one) but based on a report from Yicong Yang that DPC may take a bit more than 3 seconds on HiSilicon's Kunpeng platform. The timeout is necessary because the DPC Trigger Status bit may never clear: On Root Ports which support RP Extensions for DPC, the DPC driver polls the DPC RP Busy bit for up to 1 second before giving up on DPC recovery. Without the timeout, pciehp would then wait indefinitely for DPC to complete. This commit draws inspiration from previous attempts to synchronize DPC with pciehp: By Sinan Kaya, August 2018: https://lore.kernel.org/linux-pci/20180818065126.77912-1-okaya@kernel.org/ By Ethan Zhao, October 2020: https://lore.kernel.org/linux-pci/20201007113158.48933-1-haifeng.zhao@intel.com/ By Kuppuswamy Sathyanarayanan, March 2021: https://lore.kernel.org/linux-pci/59cb30f5e5ac6d65427ceaadf1012b2ba8dbf66c.1615606143.git.sathyanarayanan.kuppuswamy@linux.intel.com/ Link: https://lore.kernel.org/r/0be565d97438fe2a6d57354b3aa4e8626952a00b.1619857124.git.lukas@wunner.de Reported-by: Sinan Kaya <okaya@kernel.org> Reported-by: Ethan Zhao <haifeng.zhao@intel.com> Reported-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> Tested-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> Tested-by: Yicong Yang <yangyicong@hisilicon.com> Signed-off-by: Lukas Wunner <lukas@wunner.de> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Ashok Raj <ashok.raj@intel.com> Cc: Keith Busch <kbusch@kernel.org>
2021-05-01 08:29:00 +00:00
return false;
if (test_bit(PCI_DPC_RECOVERING, &pdev->priv_flags))
return false;
return true;
}
/**
* pci_dpc_recovered - whether DPC triggered and has recovered successfully
* @pdev: PCI device
*
* Return true if DPC was triggered for @pdev and has recovered successfully.
* Wait for recovery if it hasn't completed yet. Called from the PCIe hotplug
* driver to recognize and ignore Link Down/Up events caused by DPC.
*/
bool pci_dpc_recovered(struct pci_dev *pdev)
{
struct pci_host_bridge *host;
if (!pdev->dpc_cap)
return false;
/*
* Synchronization between hotplug and DPC is not supported
* if DPC is owned by firmware and EDR is not enabled.
*/
host = pci_find_host_bridge(pdev->bus);
if (!host->native_dpc && !IS_ENABLED(CONFIG_PCIE_EDR))
return false;
/*
* Need a timeout in case DPC never completes due to failure of
* dpc_wait_rp_inactive(). The spec doesn't mandate a time limit,
* but reports indicate that DPC completes within 4 seconds.
*/
wait_event_timeout(dpc_completed_waitqueue, dpc_completed(pdev),
msecs_to_jiffies(4000));
return test_and_clear_bit(PCI_DPC_RECOVERED, &pdev->priv_flags);
}
#endif /* CONFIG_HOTPLUG_PCI_PCIE */
static int dpc_wait_rp_inactive(struct pci_dev *pdev)
{
unsigned long timeout = jiffies + HZ;
u16 cap = pdev->dpc_cap, status;
pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status);
while (status & PCI_EXP_DPC_RP_BUSY &&
!time_after(jiffies, timeout)) {
msleep(10);
pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status);
}
if (status & PCI_EXP_DPC_RP_BUSY) {
pci_warn(pdev, "root port still busy\n");
return -EBUSY;
}
return 0;
}
pci_ers_result_t dpc_reset_link(struct pci_dev *pdev)
{
PCI: pciehp: Ignore Link Down/Up caused by DPC Downstream Port Containment (PCIe r5.0, sec. 6.2.10) disables the link upon an error and attempts to re-enable it when instructed by the DPC driver. A slot which is both DPC- and hotplug-capable is currently powered off by pciehp once DPC is triggered (due to the link change) and powered back up on successful recovery. That's undesirable, the slot should remain powered so the hotplugged device remains bound to its driver. DPC notifies the driver of the error and of successful recovery in pcie_do_recovery() and the driver may then restore the device to working state. Moreover, Sinan points out that turning off slot power by pciehp may foil recovery by DPC: Power off/on is a cold reset concurrently to DPC's warm reset. Sathyanarayanan reports extended delays or failure in link retraining by DPC if pciehp brings down the slot. Fix by detecting whether a Link Down event is caused by DPC and awaiting recovery if so. On successful recovery, ignore both the Link Down and the subsequent Link Up event. Afterwards, check whether the link is down to detect surprise-removal or another DPC event immediately after DPC recovery. Ensure that the corresponding DLLSC event is not ignored by synthesizing it and invoking irq_wake_thread() to trigger a re-run of pciehp_ist(). The IRQ threads of the hotplug and DPC drivers, pciehp_ist() and dpc_handler(), race against each other. If pciehp is faster than DPC, it will wait until DPC recovery completes. Recovery consists of two steps: The first step (waiting for link disablement) is recognizable by pciehp through a set DPC Trigger Status bit. The second step (waiting for link retraining) is recognizable through a newly introduced PCI_DPC_RECOVERING flag. If DPC is faster than pciehp, neither of the two flags will be set and pciehp may glean the recovery status from the new PCI_DPC_RECOVERED flag. The flag is zero if DPC didn't occur at all, hence DLLSC events are not ignored by default. pciehp waits up to 4 seconds before assuming that DPC recovery failed and bringing down the slot. This timeout is not taken from the spec (it doesn't mandate one) but based on a report from Yicong Yang that DPC may take a bit more than 3 seconds on HiSilicon's Kunpeng platform. The timeout is necessary because the DPC Trigger Status bit may never clear: On Root Ports which support RP Extensions for DPC, the DPC driver polls the DPC RP Busy bit for up to 1 second before giving up on DPC recovery. Without the timeout, pciehp would then wait indefinitely for DPC to complete. This commit draws inspiration from previous attempts to synchronize DPC with pciehp: By Sinan Kaya, August 2018: https://lore.kernel.org/linux-pci/20180818065126.77912-1-okaya@kernel.org/ By Ethan Zhao, October 2020: https://lore.kernel.org/linux-pci/20201007113158.48933-1-haifeng.zhao@intel.com/ By Kuppuswamy Sathyanarayanan, March 2021: https://lore.kernel.org/linux-pci/59cb30f5e5ac6d65427ceaadf1012b2ba8dbf66c.1615606143.git.sathyanarayanan.kuppuswamy@linux.intel.com/ Link: https://lore.kernel.org/r/0be565d97438fe2a6d57354b3aa4e8626952a00b.1619857124.git.lukas@wunner.de Reported-by: Sinan Kaya <okaya@kernel.org> Reported-by: Ethan Zhao <haifeng.zhao@intel.com> Reported-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> Tested-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> Tested-by: Yicong Yang <yangyicong@hisilicon.com> Signed-off-by: Lukas Wunner <lukas@wunner.de> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Ashok Raj <ashok.raj@intel.com> Cc: Keith Busch <kbusch@kernel.org>
2021-05-01 08:29:00 +00:00
pci_ers_result_t ret;
u16 cap;
PCI: pciehp: Ignore Link Down/Up caused by DPC Downstream Port Containment (PCIe r5.0, sec. 6.2.10) disables the link upon an error and attempts to re-enable it when instructed by the DPC driver. A slot which is both DPC- and hotplug-capable is currently powered off by pciehp once DPC is triggered (due to the link change) and powered back up on successful recovery. That's undesirable, the slot should remain powered so the hotplugged device remains bound to its driver. DPC notifies the driver of the error and of successful recovery in pcie_do_recovery() and the driver may then restore the device to working state. Moreover, Sinan points out that turning off slot power by pciehp may foil recovery by DPC: Power off/on is a cold reset concurrently to DPC's warm reset. Sathyanarayanan reports extended delays or failure in link retraining by DPC if pciehp brings down the slot. Fix by detecting whether a Link Down event is caused by DPC and awaiting recovery if so. On successful recovery, ignore both the Link Down and the subsequent Link Up event. Afterwards, check whether the link is down to detect surprise-removal or another DPC event immediately after DPC recovery. Ensure that the corresponding DLLSC event is not ignored by synthesizing it and invoking irq_wake_thread() to trigger a re-run of pciehp_ist(). The IRQ threads of the hotplug and DPC drivers, pciehp_ist() and dpc_handler(), race against each other. If pciehp is faster than DPC, it will wait until DPC recovery completes. Recovery consists of two steps: The first step (waiting for link disablement) is recognizable by pciehp through a set DPC Trigger Status bit. The second step (waiting for link retraining) is recognizable through a newly introduced PCI_DPC_RECOVERING flag. If DPC is faster than pciehp, neither of the two flags will be set and pciehp may glean the recovery status from the new PCI_DPC_RECOVERED flag. The flag is zero if DPC didn't occur at all, hence DLLSC events are not ignored by default. pciehp waits up to 4 seconds before assuming that DPC recovery failed and bringing down the slot. This timeout is not taken from the spec (it doesn't mandate one) but based on a report from Yicong Yang that DPC may take a bit more than 3 seconds on HiSilicon's Kunpeng platform. The timeout is necessary because the DPC Trigger Status bit may never clear: On Root Ports which support RP Extensions for DPC, the DPC driver polls the DPC RP Busy bit for up to 1 second before giving up on DPC recovery. Without the timeout, pciehp would then wait indefinitely for DPC to complete. This commit draws inspiration from previous attempts to synchronize DPC with pciehp: By Sinan Kaya, August 2018: https://lore.kernel.org/linux-pci/20180818065126.77912-1-okaya@kernel.org/ By Ethan Zhao, October 2020: https://lore.kernel.org/linux-pci/20201007113158.48933-1-haifeng.zhao@intel.com/ By Kuppuswamy Sathyanarayanan, March 2021: https://lore.kernel.org/linux-pci/59cb30f5e5ac6d65427ceaadf1012b2ba8dbf66c.1615606143.git.sathyanarayanan.kuppuswamy@linux.intel.com/ Link: https://lore.kernel.org/r/0be565d97438fe2a6d57354b3aa4e8626952a00b.1619857124.git.lukas@wunner.de Reported-by: Sinan Kaya <okaya@kernel.org> Reported-by: Ethan Zhao <haifeng.zhao@intel.com> Reported-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> Tested-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> Tested-by: Yicong Yang <yangyicong@hisilicon.com> Signed-off-by: Lukas Wunner <lukas@wunner.de> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Ashok Raj <ashok.raj@intel.com> Cc: Keith Busch <kbusch@kernel.org>
2021-05-01 08:29:00 +00:00
set_bit(PCI_DPC_RECOVERING, &pdev->priv_flags);
/*
* DPC disables the Link automatically in hardware, so it has
* already been reset by the time we get here.
*/
cap = pdev->dpc_cap;
/*
* Wait until the Link is inactive, then clear DPC Trigger Status
* to allow the Port to leave DPC.
*/
PCI: pciehp: Reduce noisiness on hot removal When a PCIe card is hot-removed, the Presence Detect State and Data Link Layer Link Active bits often do not clear simultaneously. I've seen delays of up to 244 msec between the two events with Thunderbolt. After pciehp has brought down the slot in response to the first event, the other bit may still be set. It's not discernible whether it's set because a new card is already in the slot or if it will soon clear. So pciehp tries to bring up the slot and in the latter case fails with a bunch of messages, some of them at KERN_ERR severity. If the slot is no longer occupied, the messages are false positives and annoy users. Stuart Hayes reports the following splat on hot removal: KERN_INFO pcieport 0000:3c:06.0: pciehp: Slot(180): Link Up KERN_INFO pcieport 0000:3c:06.0: pciehp: Timeout waiting for Presence Detect KERN_ERR pcieport 0000:3c:06.0: pciehp: link training error: status 0x0001 KERN_ERR pcieport 0000:3c:06.0: pciehp: Failed to check link status Dongdong Liu complains about a similar splat: KERN_INFO pciehp 0000:80:10.0:pcie004: Slot(36): Link Down KERN_INFO iommu: Removing device 0000:87:00.0 from group 12 KERN_INFO pciehp 0000:80:10.0:pcie004: Slot(36): Card present KERN_INFO pcieport 0000:80:10.0: Data Link Layer Link Active not set in 1000 msec KERN_ERR pciehp 0000:80:10.0:pcie004: Failed to check link status Users are particularly irritated to see a bringup attempt even though the slot was explicitly brought down via sysfs. In a perfect world, we could avoid this by setting Link Disable on slot bringdown and re-enabling it upon a Presence Detect State change. In reality however, there are broken hotplug ports which hardwire Presence Detect to zero, see 80696f991424 ("PCI: pciehp: Tolerate Presence Detect hardwired to zero"). Conversely, PCIe r1.0 hotplug ports hardwire Link Active to zero because Link Active Reporting wasn't specified before PCIe r1.1. On unplug, some ports first clear Presence then Link (see Stuart Hayes' splat) whereas others use the inverse order (see Dongdong Liu's splat). To top it off, there are hotplug ports which flap the Presence and Link bits on slot bringup, see 6c35a1ac3da6 ("PCI: pciehp: Tolerate initially unstable link"). pciehp is designed to work with all of these variants. Surplus attempts at slot bringup are a lesser evil than not being able to bring up slots at all. Although we could try to perfect the behavior for specific hotplug controllers, we'd risk breaking others or increasing code complexity. But we can certainly minimize annoyance by emitting only a single message with KERN_INFO severity if bringup is unsuccessful: * Drop the "Timeout waiting for Presence Detect" message in pcie_wait_for_presence(). The sole caller of that function, pciehp_check_link_status(), ignores the timeout and carries on. It emits error messages of its own and I don't think this particular message adds much value. * There's a single error condition in pciehp_check_link_status() which does not emit a message. Adding one allows dropping the "Failed to check link status" message emitted by board_added() if pciehp_check_link_status() returns a non-zero integer. * Tone down all messages in pciehp_check_link_status() to KERN_INFO severity and rephrase them to look as innocuous as possible. To this end, move the message emitted by pcie_wait_for_link_delay() to its callers. As a result, Stuart Hayes' splat becomes: KERN_INFO pcieport 0000:3c:06.0: pciehp: Slot(180): Link Up KERN_INFO pcieport 0000:3c:06.0: pciehp: Slot(180): Cannot train link: status 0x0001 Dongdong Liu's splat becomes: KERN_INFO pciehp 0000:80:10.0:pcie004: Slot(36): Card present KERN_INFO pciehp 0000:80:10.0:pcie004: Slot(36): No link The messages now merely serve as information that presence or link bits were set a little longer than expected. Bringup failures which are not false positives are still reported, albeit no longer at KERN_ERR severity. Link: https://lore.kernel.org/linux-pci/20200310182100.102987-1-stuart.w.hayes@gmail.com/ Link: https://lore.kernel.org/linux-pci/1547649064-19019-1-git-send-email-liudongdong3@huawei.com/ Link: https://lore.kernel.org/r/b45e46fd8a6aa6930aaac9d7718c2e4b787a4e5e.1595935071.git.lukas@wunner.de Reported-by: Stuart Hayes <stuart.w.hayes@gmail.com> Reported-by: Dongdong Liu <liudongdong3@huawei.com> Signed-off-by: Lukas Wunner <lukas@wunner.de> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2020-09-17 21:13:20 +00:00
if (!pcie_wait_for_link(pdev, false))
pci_info(pdev, "Data Link Layer Link Active not cleared in 1000 msec\n");
PCI: pciehp: Ignore Link Down/Up caused by DPC Downstream Port Containment (PCIe r5.0, sec. 6.2.10) disables the link upon an error and attempts to re-enable it when instructed by the DPC driver. A slot which is both DPC- and hotplug-capable is currently powered off by pciehp once DPC is triggered (due to the link change) and powered back up on successful recovery. That's undesirable, the slot should remain powered so the hotplugged device remains bound to its driver. DPC notifies the driver of the error and of successful recovery in pcie_do_recovery() and the driver may then restore the device to working state. Moreover, Sinan points out that turning off slot power by pciehp may foil recovery by DPC: Power off/on is a cold reset concurrently to DPC's warm reset. Sathyanarayanan reports extended delays or failure in link retraining by DPC if pciehp brings down the slot. Fix by detecting whether a Link Down event is caused by DPC and awaiting recovery if so. On successful recovery, ignore both the Link Down and the subsequent Link Up event. Afterwards, check whether the link is down to detect surprise-removal or another DPC event immediately after DPC recovery. Ensure that the corresponding DLLSC event is not ignored by synthesizing it and invoking irq_wake_thread() to trigger a re-run of pciehp_ist(). The IRQ threads of the hotplug and DPC drivers, pciehp_ist() and dpc_handler(), race against each other. If pciehp is faster than DPC, it will wait until DPC recovery completes. Recovery consists of two steps: The first step (waiting for link disablement) is recognizable by pciehp through a set DPC Trigger Status bit. The second step (waiting for link retraining) is recognizable through a newly introduced PCI_DPC_RECOVERING flag. If DPC is faster than pciehp, neither of the two flags will be set and pciehp may glean the recovery status from the new PCI_DPC_RECOVERED flag. The flag is zero if DPC didn't occur at all, hence DLLSC events are not ignored by default. pciehp waits up to 4 seconds before assuming that DPC recovery failed and bringing down the slot. This timeout is not taken from the spec (it doesn't mandate one) but based on a report from Yicong Yang that DPC may take a bit more than 3 seconds on HiSilicon's Kunpeng platform. The timeout is necessary because the DPC Trigger Status bit may never clear: On Root Ports which support RP Extensions for DPC, the DPC driver polls the DPC RP Busy bit for up to 1 second before giving up on DPC recovery. Without the timeout, pciehp would then wait indefinitely for DPC to complete. This commit draws inspiration from previous attempts to synchronize DPC with pciehp: By Sinan Kaya, August 2018: https://lore.kernel.org/linux-pci/20180818065126.77912-1-okaya@kernel.org/ By Ethan Zhao, October 2020: https://lore.kernel.org/linux-pci/20201007113158.48933-1-haifeng.zhao@intel.com/ By Kuppuswamy Sathyanarayanan, March 2021: https://lore.kernel.org/linux-pci/59cb30f5e5ac6d65427ceaadf1012b2ba8dbf66c.1615606143.git.sathyanarayanan.kuppuswamy@linux.intel.com/ Link: https://lore.kernel.org/r/0be565d97438fe2a6d57354b3aa4e8626952a00b.1619857124.git.lukas@wunner.de Reported-by: Sinan Kaya <okaya@kernel.org> Reported-by: Ethan Zhao <haifeng.zhao@intel.com> Reported-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> Tested-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> Tested-by: Yicong Yang <yangyicong@hisilicon.com> Signed-off-by: Lukas Wunner <lukas@wunner.de> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Ashok Raj <ashok.raj@intel.com> Cc: Keith Busch <kbusch@kernel.org>
2021-05-01 08:29:00 +00:00
if (pdev->dpc_rp_extensions && dpc_wait_rp_inactive(pdev)) {
clear_bit(PCI_DPC_RECOVERED, &pdev->priv_flags);
ret = PCI_ERS_RESULT_DISCONNECT;
goto out;
}
pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS,
PCI_EXP_DPC_STATUS_TRIGGER);
if (pci_bridge_wait_for_secondary_bus(pdev, "DPC")) {
PCI: pciehp: Ignore Link Down/Up caused by DPC Downstream Port Containment (PCIe r5.0, sec. 6.2.10) disables the link upon an error and attempts to re-enable it when instructed by the DPC driver. A slot which is both DPC- and hotplug-capable is currently powered off by pciehp once DPC is triggered (due to the link change) and powered back up on successful recovery. That's undesirable, the slot should remain powered so the hotplugged device remains bound to its driver. DPC notifies the driver of the error and of successful recovery in pcie_do_recovery() and the driver may then restore the device to working state. Moreover, Sinan points out that turning off slot power by pciehp may foil recovery by DPC: Power off/on is a cold reset concurrently to DPC's warm reset. Sathyanarayanan reports extended delays or failure in link retraining by DPC if pciehp brings down the slot. Fix by detecting whether a Link Down event is caused by DPC and awaiting recovery if so. On successful recovery, ignore both the Link Down and the subsequent Link Up event. Afterwards, check whether the link is down to detect surprise-removal or another DPC event immediately after DPC recovery. Ensure that the corresponding DLLSC event is not ignored by synthesizing it and invoking irq_wake_thread() to trigger a re-run of pciehp_ist(). The IRQ threads of the hotplug and DPC drivers, pciehp_ist() and dpc_handler(), race against each other. If pciehp is faster than DPC, it will wait until DPC recovery completes. Recovery consists of two steps: The first step (waiting for link disablement) is recognizable by pciehp through a set DPC Trigger Status bit. The second step (waiting for link retraining) is recognizable through a newly introduced PCI_DPC_RECOVERING flag. If DPC is faster than pciehp, neither of the two flags will be set and pciehp may glean the recovery status from the new PCI_DPC_RECOVERED flag. The flag is zero if DPC didn't occur at all, hence DLLSC events are not ignored by default. pciehp waits up to 4 seconds before assuming that DPC recovery failed and bringing down the slot. This timeout is not taken from the spec (it doesn't mandate one) but based on a report from Yicong Yang that DPC may take a bit more than 3 seconds on HiSilicon's Kunpeng platform. The timeout is necessary because the DPC Trigger Status bit may never clear: On Root Ports which support RP Extensions for DPC, the DPC driver polls the DPC RP Busy bit for up to 1 second before giving up on DPC recovery. Without the timeout, pciehp would then wait indefinitely for DPC to complete. This commit draws inspiration from previous attempts to synchronize DPC with pciehp: By Sinan Kaya, August 2018: https://lore.kernel.org/linux-pci/20180818065126.77912-1-okaya@kernel.org/ By Ethan Zhao, October 2020: https://lore.kernel.org/linux-pci/20201007113158.48933-1-haifeng.zhao@intel.com/ By Kuppuswamy Sathyanarayanan, March 2021: https://lore.kernel.org/linux-pci/59cb30f5e5ac6d65427ceaadf1012b2ba8dbf66c.1615606143.git.sathyanarayanan.kuppuswamy@linux.intel.com/ Link: https://lore.kernel.org/r/0be565d97438fe2a6d57354b3aa4e8626952a00b.1619857124.git.lukas@wunner.de Reported-by: Sinan Kaya <okaya@kernel.org> Reported-by: Ethan Zhao <haifeng.zhao@intel.com> Reported-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> Tested-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> Tested-by: Yicong Yang <yangyicong@hisilicon.com> Signed-off-by: Lukas Wunner <lukas@wunner.de> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Ashok Raj <ashok.raj@intel.com> Cc: Keith Busch <kbusch@kernel.org>
2021-05-01 08:29:00 +00:00
clear_bit(PCI_DPC_RECOVERED, &pdev->priv_flags);
ret = PCI_ERS_RESULT_DISCONNECT;
} else {
set_bit(PCI_DPC_RECOVERED, &pdev->priv_flags);
ret = PCI_ERS_RESULT_RECOVERED;
PCI: pciehp: Reduce noisiness on hot removal When a PCIe card is hot-removed, the Presence Detect State and Data Link Layer Link Active bits often do not clear simultaneously. I've seen delays of up to 244 msec between the two events with Thunderbolt. After pciehp has brought down the slot in response to the first event, the other bit may still be set. It's not discernible whether it's set because a new card is already in the slot or if it will soon clear. So pciehp tries to bring up the slot and in the latter case fails with a bunch of messages, some of them at KERN_ERR severity. If the slot is no longer occupied, the messages are false positives and annoy users. Stuart Hayes reports the following splat on hot removal: KERN_INFO pcieport 0000:3c:06.0: pciehp: Slot(180): Link Up KERN_INFO pcieport 0000:3c:06.0: pciehp: Timeout waiting for Presence Detect KERN_ERR pcieport 0000:3c:06.0: pciehp: link training error: status 0x0001 KERN_ERR pcieport 0000:3c:06.0: pciehp: Failed to check link status Dongdong Liu complains about a similar splat: KERN_INFO pciehp 0000:80:10.0:pcie004: Slot(36): Link Down KERN_INFO iommu: Removing device 0000:87:00.0 from group 12 KERN_INFO pciehp 0000:80:10.0:pcie004: Slot(36): Card present KERN_INFO pcieport 0000:80:10.0: Data Link Layer Link Active not set in 1000 msec KERN_ERR pciehp 0000:80:10.0:pcie004: Failed to check link status Users are particularly irritated to see a bringup attempt even though the slot was explicitly brought down via sysfs. In a perfect world, we could avoid this by setting Link Disable on slot bringdown and re-enabling it upon a Presence Detect State change. In reality however, there are broken hotplug ports which hardwire Presence Detect to zero, see 80696f991424 ("PCI: pciehp: Tolerate Presence Detect hardwired to zero"). Conversely, PCIe r1.0 hotplug ports hardwire Link Active to zero because Link Active Reporting wasn't specified before PCIe r1.1. On unplug, some ports first clear Presence then Link (see Stuart Hayes' splat) whereas others use the inverse order (see Dongdong Liu's splat). To top it off, there are hotplug ports which flap the Presence and Link bits on slot bringup, see 6c35a1ac3da6 ("PCI: pciehp: Tolerate initially unstable link"). pciehp is designed to work with all of these variants. Surplus attempts at slot bringup are a lesser evil than not being able to bring up slots at all. Although we could try to perfect the behavior for specific hotplug controllers, we'd risk breaking others or increasing code complexity. But we can certainly minimize annoyance by emitting only a single message with KERN_INFO severity if bringup is unsuccessful: * Drop the "Timeout waiting for Presence Detect" message in pcie_wait_for_presence(). The sole caller of that function, pciehp_check_link_status(), ignores the timeout and carries on. It emits error messages of its own and I don't think this particular message adds much value. * There's a single error condition in pciehp_check_link_status() which does not emit a message. Adding one allows dropping the "Failed to check link status" message emitted by board_added() if pciehp_check_link_status() returns a non-zero integer. * Tone down all messages in pciehp_check_link_status() to KERN_INFO severity and rephrase them to look as innocuous as possible. To this end, move the message emitted by pcie_wait_for_link_delay() to its callers. As a result, Stuart Hayes' splat becomes: KERN_INFO pcieport 0000:3c:06.0: pciehp: Slot(180): Link Up KERN_INFO pcieport 0000:3c:06.0: pciehp: Slot(180): Cannot train link: status 0x0001 Dongdong Liu's splat becomes: KERN_INFO pciehp 0000:80:10.0:pcie004: Slot(36): Card present KERN_INFO pciehp 0000:80:10.0:pcie004: Slot(36): No link The messages now merely serve as information that presence or link bits were set a little longer than expected. Bringup failures which are not false positives are still reported, albeit no longer at KERN_ERR severity. Link: https://lore.kernel.org/linux-pci/20200310182100.102987-1-stuart.w.hayes@gmail.com/ Link: https://lore.kernel.org/linux-pci/1547649064-19019-1-git-send-email-liudongdong3@huawei.com/ Link: https://lore.kernel.org/r/b45e46fd8a6aa6930aaac9d7718c2e4b787a4e5e.1595935071.git.lukas@wunner.de Reported-by: Stuart Hayes <stuart.w.hayes@gmail.com> Reported-by: Dongdong Liu <liudongdong3@huawei.com> Signed-off-by: Lukas Wunner <lukas@wunner.de> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
2020-09-17 21:13:20 +00:00
}
PCI: pciehp: Ignore Link Down/Up caused by DPC Downstream Port Containment (PCIe r5.0, sec. 6.2.10) disables the link upon an error and attempts to re-enable it when instructed by the DPC driver. A slot which is both DPC- and hotplug-capable is currently powered off by pciehp once DPC is triggered (due to the link change) and powered back up on successful recovery. That's undesirable, the slot should remain powered so the hotplugged device remains bound to its driver. DPC notifies the driver of the error and of successful recovery in pcie_do_recovery() and the driver may then restore the device to working state. Moreover, Sinan points out that turning off slot power by pciehp may foil recovery by DPC: Power off/on is a cold reset concurrently to DPC's warm reset. Sathyanarayanan reports extended delays or failure in link retraining by DPC if pciehp brings down the slot. Fix by detecting whether a Link Down event is caused by DPC and awaiting recovery if so. On successful recovery, ignore both the Link Down and the subsequent Link Up event. Afterwards, check whether the link is down to detect surprise-removal or another DPC event immediately after DPC recovery. Ensure that the corresponding DLLSC event is not ignored by synthesizing it and invoking irq_wake_thread() to trigger a re-run of pciehp_ist(). The IRQ threads of the hotplug and DPC drivers, pciehp_ist() and dpc_handler(), race against each other. If pciehp is faster than DPC, it will wait until DPC recovery completes. Recovery consists of two steps: The first step (waiting for link disablement) is recognizable by pciehp through a set DPC Trigger Status bit. The second step (waiting for link retraining) is recognizable through a newly introduced PCI_DPC_RECOVERING flag. If DPC is faster than pciehp, neither of the two flags will be set and pciehp may glean the recovery status from the new PCI_DPC_RECOVERED flag. The flag is zero if DPC didn't occur at all, hence DLLSC events are not ignored by default. pciehp waits up to 4 seconds before assuming that DPC recovery failed and bringing down the slot. This timeout is not taken from the spec (it doesn't mandate one) but based on a report from Yicong Yang that DPC may take a bit more than 3 seconds on HiSilicon's Kunpeng platform. The timeout is necessary because the DPC Trigger Status bit may never clear: On Root Ports which support RP Extensions for DPC, the DPC driver polls the DPC RP Busy bit for up to 1 second before giving up on DPC recovery. Without the timeout, pciehp would then wait indefinitely for DPC to complete. This commit draws inspiration from previous attempts to synchronize DPC with pciehp: By Sinan Kaya, August 2018: https://lore.kernel.org/linux-pci/20180818065126.77912-1-okaya@kernel.org/ By Ethan Zhao, October 2020: https://lore.kernel.org/linux-pci/20201007113158.48933-1-haifeng.zhao@intel.com/ By Kuppuswamy Sathyanarayanan, March 2021: https://lore.kernel.org/linux-pci/59cb30f5e5ac6d65427ceaadf1012b2ba8dbf66c.1615606143.git.sathyanarayanan.kuppuswamy@linux.intel.com/ Link: https://lore.kernel.org/r/0be565d97438fe2a6d57354b3aa4e8626952a00b.1619857124.git.lukas@wunner.de Reported-by: Sinan Kaya <okaya@kernel.org> Reported-by: Ethan Zhao <haifeng.zhao@intel.com> Reported-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> Tested-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> Tested-by: Yicong Yang <yangyicong@hisilicon.com> Signed-off-by: Lukas Wunner <lukas@wunner.de> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Ashok Raj <ashok.raj@intel.com> Cc: Keith Busch <kbusch@kernel.org>
2021-05-01 08:29:00 +00:00
out:
clear_bit(PCI_DPC_RECOVERING, &pdev->priv_flags);
wake_up_all(&dpc_completed_waitqueue);
return ret;
}
static void dpc_process_rp_pio_error(struct pci_dev *pdev)
{
u16 cap = pdev->dpc_cap, dpc_status, first_error;
u32 status, mask, sev, syserr, exc, log, prefix;
struct pcie_tlp_log tlp_log;
int i;
pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_STATUS, &status);
pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_MASK, &mask);
pci_err(pdev, "rp_pio_status: %#010x, rp_pio_mask: %#010x\n",
status, mask);
pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_SEVERITY, &sev);
pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_SYSERROR, &syserr);
pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_EXCEPTION, &exc);
pci_err(pdev, "RP PIO severity=%#010x, syserror=%#010x, exception=%#010x\n",
sev, syserr, exc);
/* Get First Error Pointer */
pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &dpc_status);
first_error = FIELD_GET(PCI_EXP_DPC_RP_PIO_FEP, dpc_status);
for (i = 0; i < ARRAY_SIZE(rp_pio_error_string); i++) {
if ((status & ~mask) & (1 << i))
pci_err(pdev, "[%2d] %s%s\n", i, rp_pio_error_string[i],
first_error == i ? " (First)" : "");
}
if (pdev->dpc_rp_log_size < 4)
goto clear_status;
pcie_read_tlp_log(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG, &tlp_log);
pci_err(pdev, "TLP Header: %#010x %#010x %#010x %#010x\n",
tlp_log.dw[0], tlp_log.dw[1], tlp_log.dw[2], tlp_log.dw[3]);
if (pdev->dpc_rp_log_size < 5)
goto clear_status;
pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_IMPSPEC_LOG, &log);
pci_err(pdev, "RP PIO ImpSpec Log %#010x\n", log);
for (i = 0; i < pdev->dpc_rp_log_size - 5; i++) {
pci_read_config_dword(pdev,
cap + PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG + i * 4, &prefix);
pci_err(pdev, "TLP Prefix Header: dw%d, %#010x\n", i, prefix);
}
clear_status:
pci_write_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_STATUS, status);
}
static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev,
struct aer_err_info *info)
{
int pos = dev->aer_cap;
u32 status, mask, sev;
pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &mask);
status &= ~mask;
if (!status)
return 0;
pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev);
status &= sev;
if (status)
info->severity = AER_FATAL;
else
info->severity = AER_NONFATAL;
return 1;
}
void dpc_process_error(struct pci_dev *pdev)
{
u16 cap = pdev->dpc_cap, status, source, reason, ext_reason;
struct aer_err_info info;
pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status);
pci_read_config_word(pdev, cap + PCI_EXP_DPC_SOURCE_ID, &source);
pci_info(pdev, "containment event, status:%#06x source:%#06x\n",
status, source);
reason = status & PCI_EXP_DPC_STATUS_TRIGGER_RSN;
ext_reason = status & PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT;
pci_warn(pdev, "%s detected\n",
(reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_UNCOR) ?
"unmasked uncorrectable error" :
(reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_NFE) ?
"ERR_NONFATAL" :
(reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_FE) ?
"ERR_FATAL" :
(ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_RP_PIO) ?
"RP PIO error" :
(ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_SW_TRIGGER) ?
"software trigger" :
"reserved error");
/* show RP PIO error detail information */
if (pdev->dpc_rp_extensions &&
reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_IN_EXT &&
ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_RP_PIO)
dpc_process_rp_pio_error(pdev);
else if (reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_UNCOR &&
dpc_get_aer_uncorrect_severity(pdev, &info) &&
aer_get_device_error_info(pdev, &info)) {
aer_print_error(pdev, &info);
pci_aer_clear_nonfatal_status(pdev);
pci_aer_clear_fatal_status(pdev);
}
}
PCI/DPC: Ignore Surprise Down error on hot removal According to PCIe r6.0 sec 6.7.6 [1], async removal with DPC may result in surprise down error. This error is expected and is just a side-effect of async remove. Ignore surprise down error generated as a side-effect of async remove. Typically, this error is benign as the pciehp handler invoked by PDC or/and DLLSC alongside DPC, de-enumerates and brings down the device appropriately, but the error messages might confuse users. Get rid of these irritating log messages with a 1s delay while pciehp waits for DPC recovery. The implementation is as follows: On an async remove a DPC is triggered along with a Presence Detect State change and/or DLL State Change. Determine it's an async remove by checking for DPC Trigger Status in DPC Status Register and Surprise Down Error Status in AER Uncorrected Error Status to be non-zero. If true, treat the DPC event as a side-effect of async remove, clear the error status registers and continue with hot-plug tear down routines. If not, follow the existing routine to handle AER and DPC errors. Masking Surprise Down Errors was explored as an alternative approach, but discarded due to the odd behavior that masking only avoids the interrupt, but still records an error per PCIe r6.0, sec 6.2.3.2.2. That stale error would be reported the next time some error other than Surprise Down is handled. Dmesg before: pcieport 0000:00:01.4: DPC: containment event, status:0x1f01 source:0x0000 pcieport 0000:00:01.4: DPC: unmasked uncorrectable error detected pcieport 0000:00:01.4: PCIe Bus Error: severity=Uncorrected (Fatal), type=Transaction Layer, (Receiver ID) pcieport 0000:00:01.4: device [1022:14ab] error status/mask=00000020/04004000 pcieport 0000:00:01.4: [ 5] SDES (First) nvme nvme2: frozen state error detected, reset controller pcieport 0000:00:01.4: DPC: Data Link Layer Link Active not set in 1000 msec pcieport 0000:00:01.4: AER: subordinate device reset failed pcieport 0000:00:01.4: AER: device recovery failed pcieport 0000:00:01.4: pciehp: Slot(16): Link Down nvme2n1: detected capacity change from 1953525168 to 0 pci 0000:04:00.0: Removing from iommu group 49 Dmesg after: pcieport 0000:00:01.4: pciehp: Slot(16): Link Down nvme1n1: detected capacity change from 1953525168 to 0 pci 0000:04:00.0: Removing from iommu group 37 [1] PCI Express Base Specification Revision 6.0, Dec 16 2021. https://members.pcisig.com/wg/PCI-SIG/document/16609 Link: https://lore.kernel.org/r/20240207181854.121335-1-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Reviewed-by: Lukas Wunner <lukas@wunner.de> Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2024-02-07 18:18:54 +00:00
static void pci_clear_surpdn_errors(struct pci_dev *pdev)
{
if (pdev->dpc_rp_extensions)
pci_write_config_dword(pdev, pdev->dpc_cap +
PCI_EXP_DPC_RP_PIO_STATUS, ~0);
/*
* In practice, Surprise Down errors have been observed to also set
* error bits in the Status Register as well as the Fatal Error
* Detected bit in the Device Status Register.
*/
pci_write_config_word(pdev, PCI_STATUS, 0xffff);
pcie_capability_write_word(pdev, PCI_EXP_DEVSTA, PCI_EXP_DEVSTA_FED);
}
static void dpc_handle_surprise_removal(struct pci_dev *pdev)
{
if (!pcie_wait_for_link(pdev, false)) {
pci_info(pdev, "Data Link Layer Link Active not cleared in 1000 msec\n");
goto out;
}
if (pdev->dpc_rp_extensions && dpc_wait_rp_inactive(pdev))
goto out;
pci_aer_raw_clear_status(pdev);
pci_clear_surpdn_errors(pdev);
pci_write_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_STATUS,
PCI_EXP_DPC_STATUS_TRIGGER);
out:
clear_bit(PCI_DPC_RECOVERED, &pdev->priv_flags);
wake_up_all(&dpc_completed_waitqueue);
}
static bool dpc_is_surprise_removal(struct pci_dev *pdev)
{
u16 status;
if (!pdev->is_hotplug_bridge)
return false;
if (pci_read_config_word(pdev, pdev->aer_cap + PCI_ERR_UNCOR_STATUS,
&status))
return false;
return status & PCI_ERR_UNC_SURPDN;
}
static irqreturn_t dpc_handler(int irq, void *context)
{
struct pci_dev *pdev = context;
PCI/DPC: Ignore Surprise Down error on hot removal According to PCIe r6.0 sec 6.7.6 [1], async removal with DPC may result in surprise down error. This error is expected and is just a side-effect of async remove. Ignore surprise down error generated as a side-effect of async remove. Typically, this error is benign as the pciehp handler invoked by PDC or/and DLLSC alongside DPC, de-enumerates and brings down the device appropriately, but the error messages might confuse users. Get rid of these irritating log messages with a 1s delay while pciehp waits for DPC recovery. The implementation is as follows: On an async remove a DPC is triggered along with a Presence Detect State change and/or DLL State Change. Determine it's an async remove by checking for DPC Trigger Status in DPC Status Register and Surprise Down Error Status in AER Uncorrected Error Status to be non-zero. If true, treat the DPC event as a side-effect of async remove, clear the error status registers and continue with hot-plug tear down routines. If not, follow the existing routine to handle AER and DPC errors. Masking Surprise Down Errors was explored as an alternative approach, but discarded due to the odd behavior that masking only avoids the interrupt, but still records an error per PCIe r6.0, sec 6.2.3.2.2. That stale error would be reported the next time some error other than Surprise Down is handled. Dmesg before: pcieport 0000:00:01.4: DPC: containment event, status:0x1f01 source:0x0000 pcieport 0000:00:01.4: DPC: unmasked uncorrectable error detected pcieport 0000:00:01.4: PCIe Bus Error: severity=Uncorrected (Fatal), type=Transaction Layer, (Receiver ID) pcieport 0000:00:01.4: device [1022:14ab] error status/mask=00000020/04004000 pcieport 0000:00:01.4: [ 5] SDES (First) nvme nvme2: frozen state error detected, reset controller pcieport 0000:00:01.4: DPC: Data Link Layer Link Active not set in 1000 msec pcieport 0000:00:01.4: AER: subordinate device reset failed pcieport 0000:00:01.4: AER: device recovery failed pcieport 0000:00:01.4: pciehp: Slot(16): Link Down nvme2n1: detected capacity change from 1953525168 to 0 pci 0000:04:00.0: Removing from iommu group 49 Dmesg after: pcieport 0000:00:01.4: pciehp: Slot(16): Link Down nvme1n1: detected capacity change from 1953525168 to 0 pci 0000:04:00.0: Removing from iommu group 37 [1] PCI Express Base Specification Revision 6.0, Dec 16 2021. https://members.pcisig.com/wg/PCI-SIG/document/16609 Link: https://lore.kernel.org/r/20240207181854.121335-1-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Reviewed-by: Lukas Wunner <lukas@wunner.de> Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2024-02-07 18:18:54 +00:00
/*
* According to PCIe r6.0 sec 6.7.6, errors are an expected side effect
* of async removal and should be ignored by software.
*/
if (dpc_is_surprise_removal(pdev)) {
dpc_handle_surprise_removal(pdev);
return IRQ_HANDLED;
}
dpc_process_error(pdev);
/* We configure DPC so it only triggers on ERR_FATAL */
pcie_do_recovery(pdev, pci_channel_io_frozen, dpc_reset_link);
return IRQ_HANDLED;
}
static irqreturn_t dpc_irq(int irq, void *context)
{
struct pci_dev *pdev = context;
u16 cap = pdev->dpc_cap, status;
pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status);
if (!(status & PCI_EXP_DPC_STATUS_INTERRUPT) || PCI_POSSIBLE_ERROR(status))
return IRQ_NONE;
pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS,
PCI_EXP_DPC_STATUS_INTERRUPT);
if (status & PCI_EXP_DPC_STATUS_TRIGGER)
return IRQ_WAKE_THREAD;
return IRQ_HANDLED;
}
void pci_dpc_init(struct pci_dev *pdev)
{
u16 cap;
pdev->dpc_cap = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DPC);
if (!pdev->dpc_cap)
return;
pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CAP, &cap);
if (!(cap & PCI_EXP_DPC_CAP_RP_EXT))
return;
pdev->dpc_rp_extensions = true;
/* Quirks may set dpc_rp_log_size if device or firmware is buggy */
if (!pdev->dpc_rp_log_size) {
pdev->dpc_rp_log_size =
FIELD_GET(PCI_EXP_DPC_RP_PIO_LOG_SIZE, cap);
if (pdev->dpc_rp_log_size < 4 || pdev->dpc_rp_log_size > 9) {
pci_err(pdev, "RP PIO log size %u is invalid\n",
pdev->dpc_rp_log_size);
pdev->dpc_rp_log_size = 0;
}
}
}
#define FLAG(x, y) (((x) & (y)) ? '+' : '-')
static int dpc_probe(struct pcie_device *dev)
{
struct pci_dev *pdev = dev->port;
struct device *device = &dev->device;
int status;
u16 ctl, cap;
if (!pcie_aer_is_native(pdev) && !pcie_ports_dpc_native)
return -ENOTSUPP;
status = devm_request_threaded_irq(device, dev->irq, dpc_irq,
dpc_handler, IRQF_SHARED,
"pcie-dpc", pdev);
if (status) {
pci_warn(pdev, "request IRQ%d failed: %d\n", dev->irq,
status);
return status;
}
pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CAP, &cap);
pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, &ctl);
ctl &= ~PCI_EXP_DPC_CTL_EN_MASK;
ctl |= PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN;
pci_write_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, ctl);
pci_info(pdev, "enabled with IRQ %d\n", dev->irq);
pci_info(pdev, "error containment capabilities: Int Msg #%d, RPExt%c PoisonedTLP%c SwTrigger%c RP PIO Log %d, DL_ActiveErr%c\n",
cap & PCI_EXP_DPC_IRQ, FLAG(cap, PCI_EXP_DPC_CAP_RP_EXT),
FLAG(cap, PCI_EXP_DPC_CAP_POISONED_TLP),
FLAG(cap, PCI_EXP_DPC_CAP_SW_TRIGGER), pdev->dpc_rp_log_size,
FLAG(cap, PCI_EXP_DPC_CAP_DL_ACTIVE));
pci_add_ext_cap_save_buffer(pdev, PCI_EXT_CAP_ID_DPC, sizeof(u16));
return status;
}
static void dpc_remove(struct pcie_device *dev)
{
struct pci_dev *pdev = dev->port;
u16 ctl;
pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, &ctl);
ctl &= ~(PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN);
pci_write_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, ctl);
}
static struct pcie_port_service_driver dpcdriver = {
.name = "dpc",
.port_type = PCIE_ANY_PORT,
.service = PCIE_PORT_SERVICE_DPC,
.probe = dpc_probe,
.remove = dpc_remove,
};
int __init pcie_dpc_init(void)
{
return pcie_port_service_register(&dpcdriver);
}