From 0339dc39a521ead3dbcf101acd8c028c61db57dc Mon Sep 17 00:00:00 2001 From: Smita Koralahalli Date: Wed, 23 Aug 2023 23:43:03 +0000 Subject: [PATCH 1/8] cxl/pci: Fix appropriate checking for _OSC while handling CXL RAS registers cxl_pci fails to unmask CXL protocol errors when CXL memory error reporting is not granted native control. Given that CXL memory error reporting uses the event interface and protocol errors use AER, unmask protocol errors based only on the native AER setting. Without this change end user deployments will fail to report protocol errors in the case where native memory error handling is not granted to Linux. Also, return zero instead of an error code to not block the communication with the cxl device when in native memory error reporting mode. Fixes: 248529edc86f ("cxl: add RAS status unmasking for CXL") Cc: Signed-off-by: Smita Koralahalli Reviewed-by: Robert Richter Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20230823234305.27333-2-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Dan Williams --- drivers/cxl/pci.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 1cb1494c28fe..2323169b6e5f 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -541,9 +541,9 @@ static int cxl_pci_ras_unmask(struct pci_dev *pdev) return 0; } - /* BIOS has CXL error control */ - if (!host_bridge->native_cxl_error) - return -ENXIO; + /* BIOS has PCIe AER error control */ + if (!host_bridge->native_aer) + return 0; rc = pcie_capability_read_word(pdev, PCI_EXP_DEVCTL, &cap); if (rc) From 49f776724e64c27dd861e7ac8da9d42f01d9d172 Mon Sep 17 00:00:00 2001 From: Smita Koralahalli Date: Wed, 23 Aug 2023 23:43:04 +0000 Subject: [PATCH 2/8] PCI/AER: Export pcie_aer_is_native() Export and move the declaration of pcie_aer_is_native() to a common header file to be reused by cxl/pci module. Signed-off-by: Smita Koralahalli Acked-by: Bjorn Helgaas Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Robert Richter Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20230823234305.27333-3-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Dan Williams --- drivers/pci/pcie/aer.c | 1 + drivers/pci/pcie/portdrv.h | 2 -- include/linux/aer.h | 2 ++ 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index e85ff946e8c8..9c8fd69ae5ad 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -229,6 +229,7 @@ int pcie_aer_is_native(struct pci_dev *dev) return pcie_ports_native || host->native_aer; } +EXPORT_SYMBOL_NS_GPL(pcie_aer_is_native, CXL); static int pci_enable_pcie_error_reporting(struct pci_dev *dev) { diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index 58a2b1a1cae4..1f3803bde7ee 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -29,10 +29,8 @@ extern bool pcie_ports_dpc_native; #ifdef CONFIG_PCIEAER int pcie_aer_init(void); -int pcie_aer_is_native(struct pci_dev *dev); #else static inline int pcie_aer_init(void) { return 0; } -static inline int pcie_aer_is_native(struct pci_dev *dev) { return 0; } #endif #ifdef CONFIG_HOTPLUG_PCI_PCIE diff --git a/include/linux/aer.h b/include/linux/aer.h index 2dd175f5debd..29cc10220952 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -42,11 +42,13 @@ struct aer_capability_regs { #if defined(CONFIG_PCIEAER) int pci_aer_clear_nonfatal_status(struct pci_dev *dev); +int pcie_aer_is_native(struct pci_dev *dev); #else static inline int pci_aer_clear_nonfatal_status(struct pci_dev *dev) { return -EINVAL; } +static inline int pcie_aer_is_native(struct pci_dev *dev) { return 0; } #endif void cper_print_aer(struct pci_dev *dev, int aer_severity, From 55b8ff06a0c70e9a6a1696c69f52c0240167d23f Mon Sep 17 00:00:00 2001 From: Smita Koralahalli Date: Wed, 23 Aug 2023 23:43:05 +0000 Subject: [PATCH 3/8] cxl/pci: Replace host_bridge->native_aer with pcie_aer_is_native() Use pcie_aer_is_native() to determine the native AER ownership as the usage of host_bride->native_aer does not cover command line override of AER ownership. Signed-off-by: Smita Koralahalli Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Robert Richter Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20230823234305.27333-4-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Dan Williams --- drivers/cxl/pci.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 2323169b6e5f..44a21ab7add5 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -529,7 +529,6 @@ static int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, static int cxl_pci_ras_unmask(struct pci_dev *pdev) { - struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus); struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); void __iomem *addr; u32 orig_val, val, mask; @@ -542,7 +541,7 @@ static int cxl_pci_ras_unmask(struct pci_dev *pdev) } /* BIOS has PCIe AER error control */ - if (!host_bridge->native_aer) + if (!pcie_aer_is_native(pdev)) return 0; rc = pcie_capability_read_word(pdev, PCI_EXP_DEVCTL, &cap); From d2f706058826b803f5b9dc3f6d4c213ae0c54eb9 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Sun, 3 Sep 2023 14:42:58 -0700 Subject: [PATCH 4/8] cxl/mbox: Fix CEL logic for poison and security commands The following debug output was observed while testing CXL cxl_core:cxl_walk_cel:721: cxl_mock_mem cxl_mem.0: Opcode 0x4300 unsupported by driver opcode 0x4300 (Get Poison) is supported by the driver and the mock device supports it. The logic should be checking that the opcode is both not poison and not security. Fix the logic to allow poison and security commands. Fixes: ad64f5952ce3 ("cxl/memdev: Only show sanitize sysfs files when supported") Cc: Signed-off-by: Ira Weiny Reviewed-by: Davidlohr Bueso Acked-by: Jonathan Cameron Link: https://lore.kernel.org/r/20230903-cxl-cel-fix-v1-1-e260c9467be3@intel.com [cleanup cxl_walk_cel() to centralized "enabled" checks] Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index ca60bb8114f2..4df4f614f490 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -715,24 +715,25 @@ static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel) for (i = 0; i < cel_entries; i++) { u16 opcode = le16_to_cpu(cel_entry[i].opcode); struct cxl_mem_command *cmd = cxl_mem_find_command(opcode); + int enabled = 0; - if (!cmd && (!cxl_is_poison_command(opcode) || - !cxl_is_security_command(opcode))) { - dev_dbg(dev, - "Opcode 0x%04x unsupported by driver\n", opcode); - continue; + if (cmd) { + set_bit(cmd->info.id, mds->enabled_cmds); + enabled++; } - if (cmd) - set_bit(cmd->info.id, mds->enabled_cmds); - - if (cxl_is_poison_command(opcode)) + if (cxl_is_poison_command(opcode)) { cxl_set_poison_cmd_enabled(&mds->poison, opcode); + enabled++; + } - if (cxl_is_security_command(opcode)) + if (cxl_is_security_command(opcode)) { cxl_set_security_cmd_enabled(&mds->security, opcode); + enabled++; + } - dev_dbg(dev, "Opcode 0x%04x enabled\n", opcode); + dev_dbg(dev, "Opcode 0x%04x %s\n", opcode, + enabled ? "enabled" : "unsupported by driver"); } } From 9e4edf1a2196fa4bea6e8201f166785bd066446a Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Tue, 5 Sep 2023 14:10:07 -0700 Subject: [PATCH 5/8] cxl/region: Match auto-discovered region decoders by HPA range Currently, when the region driver attaches a region to a port, it selects the ports next available decoder to program. With the addition of auto-discovered regions, a port decoder has already been programmed so grabbing the next available decoder can be a mismatch when there is more than one region using the port. The failure appears like this with CXL DEBUG enabled: [] cxl_core:alloc_region_ref:754: cxl region0: endpoint9: HPA order violation region0:[mem 0x14780000000-0x1478fffffff flags 0x200] vs [mem 0x880000000-0x185fffffff flags 0x200] [] cxl_core:cxl_port_attach_region:972: cxl region0: endpoint9: failed to allocate region reference When CXL DEBUG is not enabled, there is no failure message. The region just never materializes. Users can suspect this issue if they know their firmware has programmed decoders so that more than one region is using a port. Note that the problem may appear intermittently, ie not on every reboot. Add a matching method for auto-discovered regions that finds a decoder based on an HPA range. The decoder range must exactly match the region resource parameter. Fixes: a32320b71f08 ("cxl/region: Add region autodiscovery") Signed-off-by: Alison Schofield Reviewed-by: Dave Jiang Reviewed-by: Davidlohr Bueso Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20230905211007.256385-1-alison.schofield@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index e115ba382e04..b4c6a749406f 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -717,13 +717,35 @@ static int match_free_decoder(struct device *dev, void *data) return 0; } +static int match_auto_decoder(struct device *dev, void *data) +{ + struct cxl_region_params *p = data; + struct cxl_decoder *cxld; + struct range *r; + + if (!is_switch_decoder(dev)) + return 0; + + cxld = to_cxl_decoder(dev); + r = &cxld->hpa_range; + + if (p->res && p->res->start == r->start && p->res->end == r->end) + return 1; + + return 0; +} + static struct cxl_decoder *cxl_region_find_decoder(struct cxl_port *port, struct cxl_region *cxlr) { struct device *dev; int id = 0; - dev = device_find_child(&port->dev, &id, match_free_decoder); + if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) + dev = device_find_child(&port->dev, &cxlr->params, + match_auto_decoder); + else + dev = device_find_child(&port->dev, &id, match_free_decoder); if (!dev) return NULL; /* From 18f35dc9314db89e2d215951e5afa3e636b72baf Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Tue, 22 Aug 2023 11:09:28 -0700 Subject: [PATCH 6/8] cxl/region: Refactor granularity select in cxl_port_setup_targets() In cxl_port_setup_targets() the region driver validates the configuration of auto-discovered region decoders, as well as decoders the driver is preparing to program. The existing calculations use the encoded interleave granularity value to create an interleave granularity that properly fans out when routing an x1 interleave to a greater than x1 interleave. That all worked well, until this config came along: Host Bridge: 2 way at 256 granularity Switch Decoder_A: 1 way at 512 Endpoint_X: 2 way at 256 Switch Decoder_B: 1 way at 512 Endpoint_Y: 2 way at 256 When the Host Bridge interleave is greater than 1 and the root decoder interleave is exactly 1, the region driver needs to consider the number of targets in the region when calculating the expected granularity. While examining the existing logic, and trying to cover the case above, a couple of simplifications appeared, hence this proposed refactoring. The first simplification is to apply the logic to the nominal values and use the existing helper function granularity_to_eig() to translate the desired granularity to the encoded form. This means the comment and code regarding setting address bits is discarded. Although that logic is not wrong, it adds a level of complexity that is not required in the granularity selection. The eig and eiw are indeed part of the routing instructions programmed into the decoders. Up-level the discussion to nominal ways and granularity for clearer analysis. The second simplification reduces the logic to a single granularity calculation that works for all cases. The new calculation doesn't care if parent_iw => 1 because parent_iw is used as a multiplier. The refactor cleans up a useless assignment of eiw made after the iw is already calculated. Regression testing included an examination of all of the ways and granularity selections made during a run of the cxl_test unit tests. There were no differences in selections before and after this patch. Fixes: ("27b3f8d13830 cxl/region: Program target lists") Signed-off-by: Alison Schofield Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20230822180928.117596-1-alison.schofield@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index b4c6a749406f..6d63b8798c29 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1176,16 +1176,15 @@ static int cxl_port_setup_targets(struct cxl_port *port, } /* - * If @parent_port is masking address bits, pick the next unused address - * bit to route @port's targets. + * Interleave granularity is a multiple of @parent_port granularity. + * Multiplier is the parent port interleave ways. */ - if (parent_iw > 1 && cxl_rr->nr_targets > 1) { - u32 address_bit = max(peig + peiw, eiw + peig); - - eig = address_bit - eiw + 1; - } else { - eiw = peiw; - eig = peig; + rc = granularity_to_eig(parent_ig * parent_iw, &eig); + if (rc) { + dev_dbg(&cxlr->dev, + "%s: invalid granularity calculation (%d * %d)\n", + dev_name(&parent_port->dev), parent_ig, parent_iw); + return rc; } rc = eig_to_granularity(eig, &ig); From a76b62518eb30ef59158fa777ab2e2a23e1334f9 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 15 Sep 2023 01:07:30 -0700 Subject: [PATCH 7/8] cxl/port: Fix cxl_test register enumeration regression The cxl_test unit test environment models a CXL topology for sysfs/user-ABI regression testing. It uses interface mocking via the "--wrap=" linker option to redirect cxl_core routines that parse hardware registers with versions that just publish objects, like devm_cxl_enumerate_decoders(). Starting with: Commit 19ab69a60e3b ("cxl/port: Store the port's Component Register mappings in struct cxl_port") ...port register enumeration is moved into devm_cxl_add_port(). This conflicts with the "cxl_test avoids emulating registers stance" so either the port code needs to be refactored (too violent), or modified so that register enumeration is skipped on "fake" cxl_test ports (annoying, but straightforward). This conflict has happened previously and the "check for platform device" workaround to avoid instrusive refactoring was deployed in those scenarios. In general, refactoring should only benefit production code, test code needs to remain minimally instrusive to the greatest extent possible. This was missed previously because it may sometimes just cause warning messages to be emitted, but it can also cause test failures. The backport to -stable is only nice to have for clean cxl_test runs. Fixes: 19ab69a60e3b ("cxl/port: Store the port's Component Register mappings in struct cxl_port") Cc: stable@vger.kernel.org Reported-by: Alison Schofield Reviewed-by: Dave Jiang Tested-by: Dave Jiang Link: https://lore.kernel.org/r/169476525052.1013896.6235102957693675187.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/port.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 724be8448eb4..7ca01a834e18 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright(c) 2020 Intel Corporation. All rights reserved. */ +#include #include #include #include @@ -706,16 +707,20 @@ static int cxl_setup_comp_regs(struct device *dev, struct cxl_register_map *map, return cxl_setup_regs(map); } -static inline int cxl_port_setup_regs(struct cxl_port *port, - resource_size_t component_reg_phys) +static int cxl_port_setup_regs(struct cxl_port *port, + resource_size_t component_reg_phys) { + if (dev_is_platform(port->uport_dev)) + return 0; return cxl_setup_comp_regs(&port->dev, &port->comp_map, component_reg_phys); } -static inline int cxl_dport_setup_regs(struct cxl_dport *dport, - resource_size_t component_reg_phys) +static int cxl_dport_setup_regs(struct cxl_dport *dport, + resource_size_t component_reg_phys) { + if (dev_is_platform(dport->dport_dev)) + return 0; return cxl_setup_comp_regs(dport->dport_dev, &dport->comp_map, component_reg_phys); } From c66650d29764e228eba40b7a59fdb70fa6567daa Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 22 Sep 2023 10:53:19 -0700 Subject: [PATCH 8/8] cxl/acpi: Annotate struct cxl_cxims_data with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct cxl_cxims_data. Additionally, since the element count member must be set before accessing the annotated flexible array member, move its initialization earlier. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Cc: Davidlohr Bueso Cc: Jonathan Cameron Cc: Dave Jiang Cc: Alison Schofield Cc: Vishal Verma Cc: Ira Weiny Cc: Dan Williams Cc: linux-cxl@vger.kernel.org Signed-off-by: Kees Cook Reviewed-by: Vishal Verma Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20230922175319.work.096-kees@kernel.org Signed-off-by: Dan Williams --- drivers/cxl/acpi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index d1c559879dcc..40d055560e52 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -14,7 +14,7 @@ struct cxl_cxims_data { int nr_maps; - u64 xormaps[]; + u64 xormaps[] __counted_by(nr_maps); }; /* @@ -112,9 +112,9 @@ static int cxl_parse_cxims(union acpi_subtable_headers *header, void *arg, GFP_KERNEL); if (!cximsd) return -ENOMEM; + cximsd->nr_maps = nr_maps; memcpy(cximsd->xormaps, cxims->xormap_list, nr_maps * sizeof(*cximsd->xormaps)); - cximsd->nr_maps = nr_maps; cxlrd->platform_data = cximsd; return 0;