cxl/pci: Add callback to log AER correctable error

Add AER error handler callback to read the RAS capability structure
correctable error (CE) status register for the CXL device. Log the
error as a trace event and clear the error. For CXL devices, the driver
also needs to write back to the status register to clear the
unmasked correctable errors.

See CXL spec rev3.0 8.2.4.16 for RAS capability structure CE Status
Register.

Suggested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/166985287203.2871899.13605149073500556137.stgit@djiang5-desk3.ch.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
This commit is contained in:
Dave Jiang 2022-11-30 17:02:25 -07:00 committed by Dan Williams
parent 361187e047
commit 6155ccc9dd

View file

@ -617,10 +617,30 @@ static void cxl_error_resume(struct pci_dev *pdev)
dev->driver ? "successful" : "failed");
}
static void cxl_cor_error_detected(struct pci_dev *pdev)
{
struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
struct cxl_memdev *cxlmd = cxlds->cxlmd;
struct device *dev = &cxlmd->dev;
void __iomem *addr;
u32 status;
if (!cxlds->regs.ras)
return;
addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
status = le32_to_cpu(readl(addr));
if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
trace_cxl_aer_correctable_error(dev_name(dev), status);
}
}
static const struct pci_error_handlers cxl_error_handlers = {
.error_detected = cxl_error_detected,
.slot_reset = cxl_slot_reset,
.resume = cxl_error_resume,
.cor_error_detected = cxl_cor_error_detected,
};
static struct pci_driver cxl_pci_driver = {