From 1acfb9b7ee0b1881bb8e875b6757976e48293ec4 Mon Sep 17 00:00:00 2001 From: Jay Fang Date: Mon, 12 Mar 2018 17:13:32 +0800 Subject: [PATCH 01/10] PCI: Add decoding for 16 GT/s link speed PCIe 4.0 defines the 16.0 GT/s link speed. Links can run at that speed without any Linux changes, but previously their sysfs "max_link_speed" and "current_link_speed" files contained "Unknown speed", not the expected "16.0 GT/s". Add decoding for the new 16 GT/s link speed. Signed-off-by: Jay Fang [bhelgaas: add PCI_EXP_LNKCAP2_SLS_16_0GB] Signed-off-by: Bjorn Helgaas Reviewed-by: Dongdong Liu --- drivers/pci/pci-sysfs.c | 6 ++++++ drivers/pci/probe.c | 2 +- drivers/pci/slot.c | 1 + include/linux/pci.h | 1 + include/uapi/linux/pci_regs.h | 7 +++++-- 5 files changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index eb6bee8724cc..7dc5be545d18 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -168,6 +168,9 @@ static ssize_t max_link_speed_show(struct device *dev, return -EINVAL; switch (linkcap & PCI_EXP_LNKCAP_SLS) { + case PCI_EXP_LNKCAP_SLS_16_0GB: + speed = "16 GT/s"; + break; case PCI_EXP_LNKCAP_SLS_8_0GB: speed = "8 GT/s"; break; @@ -213,6 +216,9 @@ static ssize_t current_link_speed_show(struct device *dev, return -EINVAL; switch (linkstat & PCI_EXP_LNKSTA_CLS) { + case PCI_EXP_LNKSTA_CLS_16_0GB: + speed = "16 GT/s"; + break; case PCI_EXP_LNKSTA_CLS_8_0GB: speed = "8 GT/s"; break; diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index ef5377438a1e..86bf045f3d59 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -592,7 +592,7 @@ const unsigned char pcie_link_speed[] = { PCIE_SPEED_2_5GT, /* 1 */ PCIE_SPEED_5_0GT, /* 2 */ PCIE_SPEED_8_0GT, /* 3 */ - PCI_SPEED_UNKNOWN, /* 4 */ + PCIE_SPEED_16_0GT, /* 4 */ PCI_SPEED_UNKNOWN, /* 5 */ PCI_SPEED_UNKNOWN, /* 6 */ PCI_SPEED_UNKNOWN, /* 7 */ diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c index d10f556dc03e..191893e19d5c 100644 --- a/drivers/pci/slot.c +++ b/drivers/pci/slot.c @@ -76,6 +76,7 @@ static const char *pci_bus_speed_strings[] = { "2.5 GT/s PCIe", /* 0x14 */ "5.0 GT/s PCIe", /* 0x15 */ "8.0 GT/s PCIe", /* 0x16 */ + "16.0 GT/s PCIe", /* 0x17 */ }; static ssize_t bus_speed_read(enum pci_bus_speed speed, char *buf) diff --git a/include/linux/pci.h b/include/linux/pci.h index 024a1beda008..8043a5937ad0 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -256,6 +256,7 @@ enum pci_bus_speed { PCIE_SPEED_2_5GT = 0x14, PCIE_SPEED_5_0GT = 0x15, PCIE_SPEED_8_0GT = 0x16, + PCIE_SPEED_16_0GT = 0x17, PCI_SPEED_UNKNOWN = 0xff, }; diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 0c79eac5e9b8..103ba797a8f3 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -520,6 +520,7 @@ #define PCI_EXP_LNKCAP_SLS_2_5GB 0x00000001 /* LNKCAP2 SLS Vector bit 0 */ #define PCI_EXP_LNKCAP_SLS_5_0GB 0x00000002 /* LNKCAP2 SLS Vector bit 1 */ #define PCI_EXP_LNKCAP_SLS_8_0GB 0x00000003 /* LNKCAP2 SLS Vector bit 2 */ +#define PCI_EXP_LNKCAP_SLS_16_0GB 0x00000004 /* LNKCAP2 SLS Vector bit 3 */ #define PCI_EXP_LNKCAP_MLW 0x000003f0 /* Maximum Link Width */ #define PCI_EXP_LNKCAP_ASPMS 0x00000c00 /* ASPM Support */ #define PCI_EXP_LNKCAP_L0SEL 0x00007000 /* L0s Exit Latency */ @@ -547,6 +548,7 @@ #define PCI_EXP_LNKSTA_CLS_2_5GB 0x0001 /* Current Link Speed 2.5GT/s */ #define PCI_EXP_LNKSTA_CLS_5_0GB 0x0002 /* Current Link Speed 5.0GT/s */ #define PCI_EXP_LNKSTA_CLS_8_0GB 0x0003 /* Current Link Speed 8.0GT/s */ +#define PCI_EXP_LNKSTA_CLS_16_0GB 0x0004 /* Current Link Speed 16.0GT/s */ #define PCI_EXP_LNKSTA_NLW 0x03f0 /* Negotiated Link Width */ #define PCI_EXP_LNKSTA_NLW_X1 0x0010 /* Current Link Width x1 */ #define PCI_EXP_LNKSTA_NLW_X2 0x0020 /* Current Link Width x2 */ @@ -648,8 +650,9 @@ #define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V2 44 /* v2 endpoints without link end here */ #define PCI_EXP_LNKCAP2 44 /* Link Capabilities 2 */ #define PCI_EXP_LNKCAP2_SLS_2_5GB 0x00000002 /* Supported Speed 2.5GT/s */ -#define PCI_EXP_LNKCAP2_SLS_5_0GB 0x00000004 /* Supported Speed 5.0GT/s */ -#define PCI_EXP_LNKCAP2_SLS_8_0GB 0x00000008 /* Supported Speed 8.0GT/s */ +#define PCI_EXP_LNKCAP2_SLS_5_0GB 0x00000004 /* Supported Speed 5GT/s */ +#define PCI_EXP_LNKCAP2_SLS_8_0GB 0x00000008 /* Supported Speed 8GT/s */ +#define PCI_EXP_LNKCAP2_SLS_16_0GB 0x00000010 /* Supported Speed 16GT/s */ #define PCI_EXP_LNKCAP2_CROSSLINK 0x00000100 /* Crosslink supported */ #define PCI_EXP_LNKCTL2 48 /* Link Control 2 */ #define PCI_EXP_LNKSTA2 50 /* Link Status 2 */ From 6cf57be0f78e289aaf236f8bc55c40ea6c422c75 Mon Sep 17 00:00:00 2001 From: Tal Gilboa Date: Fri, 30 Mar 2018 07:44:05 -0500 Subject: [PATCH 02/10] PCI: Add pcie_get_speed_cap() to find max supported link speed Add pcie_get_speed_cap() to find the max link speed supported by a device. Change max_link_speed_show() to use pcie_get_speed_cap(). Signed-off-by: Tal Gilboa [bhelgaas: return speed directly instead of error and *speed, don't export outside drivers/pci] Signed-off-by: Bjorn Helgaas Reviewed-by: Tariq Toukan --- drivers/pci/pci-sysfs.c | 28 ++------------------------ drivers/pci/pci.c | 44 +++++++++++++++++++++++++++++++++++++++++ drivers/pci/pci.h | 10 ++++++++++ 3 files changed, 56 insertions(+), 26 deletions(-) diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 7dc5be545d18..c2ea05fbbf1d 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -158,33 +158,9 @@ static DEVICE_ATTR_RO(resource); static ssize_t max_link_speed_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct pci_dev *pci_dev = to_pci_dev(dev); - u32 linkcap; - int err; - const char *speed; + struct pci_dev *pdev = to_pci_dev(dev); - err = pcie_capability_read_dword(pci_dev, PCI_EXP_LNKCAP, &linkcap); - if (err) - return -EINVAL; - - switch (linkcap & PCI_EXP_LNKCAP_SLS) { - case PCI_EXP_LNKCAP_SLS_16_0GB: - speed = "16 GT/s"; - break; - case PCI_EXP_LNKCAP_SLS_8_0GB: - speed = "8 GT/s"; - break; - case PCI_EXP_LNKCAP_SLS_5_0GB: - speed = "5 GT/s"; - break; - case PCI_EXP_LNKCAP_SLS_2_5GB: - speed = "2.5 GT/s"; - break; - default: - speed = "Unknown speed"; - } - - return sprintf(buf, "%s\n", speed); + return sprintf(buf, "%s\n", PCIE_SPEED2STR(pcie_get_speed_cap(pdev))); } static DEVICE_ATTR_RO(max_link_speed); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index f6a4dd10d9b0..b29d3436ee9f 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -5146,6 +5146,50 @@ int pcie_get_minimum_link(struct pci_dev *dev, enum pci_bus_speed *speed, } EXPORT_SYMBOL(pcie_get_minimum_link); +/** + * pcie_get_speed_cap - query for the PCI device's link speed capability + * @dev: PCI device to query + * + * Query the PCI device speed capability. Return the maximum link speed + * supported by the device. + */ +enum pci_bus_speed pcie_get_speed_cap(struct pci_dev *dev) +{ + u32 lnkcap2, lnkcap; + + /* + * PCIe r4.0 sec 7.5.3.18 recommends using the Supported Link + * Speeds Vector in Link Capabilities 2 when supported, falling + * back to Max Link Speed in Link Capabilities otherwise. + */ + pcie_capability_read_dword(dev, PCI_EXP_LNKCAP2, &lnkcap2); + if (lnkcap2) { /* PCIe r3.0-compliant */ + if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_16_0GB) + return PCIE_SPEED_16_0GT; + else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_8_0GB) + return PCIE_SPEED_8_0GT; + else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_5_0GB) + return PCIE_SPEED_5_0GT; + else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_2_5GB) + return PCIE_SPEED_2_5GT; + return PCI_SPEED_UNKNOWN; + } + + pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &lnkcap); + if (lnkcap) { + if (lnkcap & PCI_EXP_LNKCAP_SLS_16_0GB) + return PCIE_SPEED_16_0GT; + else if (lnkcap & PCI_EXP_LNKCAP_SLS_8_0GB) + return PCIE_SPEED_8_0GT; + else if (lnkcap & PCI_EXP_LNKCAP_SLS_5_0GB) + return PCIE_SPEED_5_0GT; + else if (lnkcap & PCI_EXP_LNKCAP_SLS_2_5GB) + return PCIE_SPEED_2_5GT; + } + + return PCI_SPEED_UNKNOWN; +} + /** * pci_select_bars - Make BAR mask from the type of resource * @dev: the PCI device for which BAR mask is made diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index fcd81911b127..1186d8be6055 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -253,6 +253,16 @@ bool pci_bus_clip_resource(struct pci_dev *dev, int idx); void pci_reassigndev_resource_alignment(struct pci_dev *dev); void pci_disable_bridge_window(struct pci_dev *dev); +/* PCIe link information */ +#define PCIE_SPEED2STR(speed) \ + ((speed) == PCIE_SPEED_16_0GT ? "16 GT/s" : \ + (speed) == PCIE_SPEED_8_0GT ? "8 GT/s" : \ + (speed) == PCIE_SPEED_5_0GT ? "5 GT/s" : \ + (speed) == PCIE_SPEED_2_5GT ? "2.5 GT/s" : \ + "Unknown speed") + +enum pci_bus_speed pcie_get_speed_cap(struct pci_dev *dev); + /* Single Root I/O Virtualization */ struct pci_sriov { int pos; /* Capability position */ From c70b65fb7f121da7d01f62588ce6abb4741f513f Mon Sep 17 00:00:00 2001 From: Tal Gilboa Date: Fri, 30 Mar 2018 08:24:36 -0500 Subject: [PATCH 03/10] PCI: Add pcie_get_width_cap() to find max supported link width Add pcie_get_width_cap() to find the max link width supported by a device. Change max_link_width_show() to use pcie_get_width_cap(). Signed-off-by: Tal Gilboa [bhelgaas: return width directly instead of error and *width, don't export outside drivers/pci] Signed-off-by: Bjorn Helgaas Reviewed-by: Tariq Toukan --- drivers/pci/pci-sysfs.c | 10 ++-------- drivers/pci/pci.c | 18 ++++++++++++++++++ drivers/pci/pci.h | 1 + 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index c2ea05fbbf1d..63d0952684fb 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -167,15 +167,9 @@ static DEVICE_ATTR_RO(max_link_speed); static ssize_t max_link_width_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct pci_dev *pci_dev = to_pci_dev(dev); - u32 linkcap; - int err; + struct pci_dev *pdev = to_pci_dev(dev); - err = pcie_capability_read_dword(pci_dev, PCI_EXP_LNKCAP, &linkcap); - if (err) - return -EINVAL; - - return sprintf(buf, "%u\n", (linkcap & PCI_EXP_LNKCAP_MLW) >> 4); + return sprintf(buf, "%u\n", pcie_get_width_cap(pdev)); } static DEVICE_ATTR_RO(max_link_width); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index b29d3436ee9f..43075be79388 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -5190,6 +5190,24 @@ enum pci_bus_speed pcie_get_speed_cap(struct pci_dev *dev) return PCI_SPEED_UNKNOWN; } +/** + * pcie_get_width_cap - query for the PCI device's link width capability + * @dev: PCI device to query + * + * Query the PCI device width capability. Return the maximum link width + * supported by the device. + */ +enum pcie_link_width pcie_get_width_cap(struct pci_dev *dev) +{ + u32 lnkcap; + + pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &lnkcap); + if (lnkcap) + return (lnkcap & PCI_EXP_LNKCAP_MLW) >> 4; + + return PCIE_LNK_WIDTH_UNKNOWN; +} + /** * pci_select_bars - Make BAR mask from the type of resource * @dev: the PCI device for which BAR mask is made diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 1186d8be6055..66738f1050c0 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -262,6 +262,7 @@ void pci_disable_bridge_window(struct pci_dev *dev); "Unknown speed") enum pci_bus_speed pcie_get_speed_cap(struct pci_dev *dev); +enum pcie_link_width pcie_get_width_cap(struct pci_dev *dev); /* Single Root I/O Virtualization */ struct pci_sriov { From b852f63aa6cee3f4846383377c414ae9c4fbc166 Mon Sep 17 00:00:00 2001 From: Tal Gilboa Date: Fri, 30 Mar 2018 08:32:03 -0500 Subject: [PATCH 04/10] PCI: Add pcie_bandwidth_capable() to compute max supported link bandwidth Add pcie_bandwidth_capable() to compute the max link bandwidth supported by a device, based on the max link speed and width, adjusted by the encoding overhead. The maximum bandwidth of the link is computed as: max_link_width * max_link_speed * (1 - encoding_overhead) 2.5 and 5.0 GT/s links use 8b/10b encoding, which reduces the raw bandwidth available by 20%; 8.0 GT/s and faster links use 128b/130b encoding, which reduces it by about 1.5%. The result is in Mb/s, i.e., megabits/second, of raw bandwidth. Signed-off-by: Tal Gilboa [bhelgaas: add 16 GT/s, adjust for pcie_get_speed_cap() and pcie_get_width_cap() signatures, don't export outside drivers/pci] Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 22 ++++++++++++++++++++++ drivers/pci/pci.h | 10 ++++++++++ 2 files changed, 32 insertions(+) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 43075be79388..ff1e72060952 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -5208,6 +5208,28 @@ enum pcie_link_width pcie_get_width_cap(struct pci_dev *dev) return PCIE_LNK_WIDTH_UNKNOWN; } +/** + * pcie_bandwidth_capable - calculate a PCI device's link bandwidth capability + * @dev: PCI device + * @speed: storage for link speed + * @width: storage for link width + * + * Calculate a PCI device's link bandwidth by querying for its link speed + * and width, multiplying them, and applying encoding overhead. The result + * is in Mb/s, i.e., megabits/second of raw bandwidth. + */ +u32 pcie_bandwidth_capable(struct pci_dev *dev, enum pci_bus_speed *speed, + enum pcie_link_width *width) +{ + *speed = pcie_get_speed_cap(dev); + *width = pcie_get_width_cap(dev); + + if (*speed == PCI_SPEED_UNKNOWN || *width == PCIE_LNK_WIDTH_UNKNOWN) + return 0; + + return *width * PCIE_SPEED2MBS_ENC(*speed); +} + /** * pci_select_bars - Make BAR mask from the type of resource * @dev: the PCI device for which BAR mask is made diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 66738f1050c0..ce9adec6c66e 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -261,8 +261,18 @@ void pci_disable_bridge_window(struct pci_dev *dev); (speed) == PCIE_SPEED_2_5GT ? "2.5 GT/s" : \ "Unknown speed") +/* PCIe speed to Mb/s reduced by encoding overhead */ +#define PCIE_SPEED2MBS_ENC(speed) \ + ((speed) == PCIE_SPEED_16_0GT ? 16000*128/130 : \ + (speed) == PCIE_SPEED_8_0GT ? 8000*128/130 : \ + (speed) == PCIE_SPEED_5_0GT ? 5000*8/10 : \ + (speed) == PCIE_SPEED_2_5GT ? 2500*8/10 : \ + 0) + enum pci_bus_speed pcie_get_speed_cap(struct pci_dev *dev); enum pcie_link_width pcie_get_width_cap(struct pci_dev *dev); +u32 pcie_bandwidth_capable(struct pci_dev *dev, enum pci_bus_speed *speed, + enum pcie_link_width *width); /* Single Root I/O Virtualization */ struct pci_sriov { From 6db79a88c67e4679d9c1e4a3f05c6385e21f6e9a Mon Sep 17 00:00:00 2001 From: Tal Gilboa Date: Fri, 30 Mar 2018 08:37:44 -0500 Subject: [PATCH 05/10] PCI: Add pcie_bandwidth_available() to compute bandwidth available to device Add pcie_bandwidth_available() to compute the bandwidth available to a device. This may be limited by the device itself or by a slower upstream link leading to the device. The available bandwidth at each link along the path is computed as: link_width * link_speed * (1 - encoding_overhead) 2.5 and 5.0 GT/s links use 8b/10b encoding, which reduces the raw bandwidth available by 20%; 8.0 GT/s and faster links use 128b/130b encoding, which reduces it by about 1.5%. The result is in Mb/s, i.e., megabits/second, of raw bandwidth. Also return the device with the slowest link and the speed and width of that link. Signed-off-by: Tal Gilboa [bhelgaas: changelog, leave pcie_get_minimum_link() alone for now, return bw directly, use pci_upstream_bridge(), check "next_bw <= bw" to find uppermost limiting device, return speed/width of the limiting device] Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 58 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/pci.h | 3 +++ 2 files changed, 61 insertions(+) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index ff1e72060952..91138cbeb853 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -5146,6 +5146,64 @@ int pcie_get_minimum_link(struct pci_dev *dev, enum pci_bus_speed *speed, } EXPORT_SYMBOL(pcie_get_minimum_link); +/** + * pcie_bandwidth_available - determine minimum link settings of a PCIe + * device and its bandwidth limitation + * @dev: PCI device to query + * @limiting_dev: storage for device causing the bandwidth limitation + * @speed: storage for speed of limiting device + * @width: storage for width of limiting device + * + * Walk up the PCI device chain and find the point where the minimum + * bandwidth is available. Return the bandwidth available there and (if + * limiting_dev, speed, and width pointers are supplied) information about + * that point. The bandwidth returned is in Mb/s, i.e., megabits/second of + * raw bandwidth. + */ +u32 pcie_bandwidth_available(struct pci_dev *dev, struct pci_dev **limiting_dev, + enum pci_bus_speed *speed, + enum pcie_link_width *width) +{ + u16 lnksta; + enum pci_bus_speed next_speed; + enum pcie_link_width next_width; + u32 bw, next_bw; + + if (speed) + *speed = PCI_SPEED_UNKNOWN; + if (width) + *width = PCIE_LNK_WIDTH_UNKNOWN; + + bw = 0; + + while (dev) { + pcie_capability_read_word(dev, PCI_EXP_LNKSTA, &lnksta); + + next_speed = pcie_link_speed[lnksta & PCI_EXP_LNKSTA_CLS]; + next_width = (lnksta & PCI_EXP_LNKSTA_NLW) >> + PCI_EXP_LNKSTA_NLW_SHIFT; + + next_bw = next_width * PCIE_SPEED2MBS_ENC(next_speed); + + /* Check if current device limits the total bandwidth */ + if (!bw || next_bw <= bw) { + bw = next_bw; + + if (limiting_dev) + *limiting_dev = dev; + if (speed) + *speed = next_speed; + if (width) + *width = next_width; + } + + dev = pci_upstream_bridge(dev); + } + + return bw; +} +EXPORT_SYMBOL(pcie_bandwidth_available); + /** * pcie_get_speed_cap - query for the PCI device's link speed capability * @dev: PCI device to query diff --git a/include/linux/pci.h b/include/linux/pci.h index 8043a5937ad0..f2bf2b7a66c7 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1083,6 +1083,9 @@ int pcie_get_mps(struct pci_dev *dev); int pcie_set_mps(struct pci_dev *dev, int mps); int pcie_get_minimum_link(struct pci_dev *dev, enum pci_bus_speed *speed, enum pcie_link_width *width); +u32 pcie_bandwidth_available(struct pci_dev *dev, struct pci_dev **limiting_dev, + enum pci_bus_speed *speed, + enum pcie_link_width *width); void pcie_flr(struct pci_dev *dev); int __pci_reset_function_locked(struct pci_dev *dev); int pci_reset_function(struct pci_dev *dev); From 9e506a7b51474241f0c900e53e85512780275c05 Mon Sep 17 00:00:00 2001 From: Tal Gilboa Date: Fri, 30 Mar 2018 08:56:47 -0500 Subject: [PATCH 06/10] PCI: Add pcie_print_link_status() to log link speed and whether it's limited Add pcie_print_link_status(). This logs the current settings of the link (speed, width, and total available bandwidth). If the device is capable of more bandwidth but is limited by a slower upstream link, we include information about the link that limits the device's performance. The user may be able to move the device to a different slot for better performance. This provides a unified method for all PCI devices to report status and issues, instead of each device reporting in a different way, using different code. Signed-off-by: Tal Gilboa [bhelgaas: changelog, reword log messages, print device capabilities when not limited, print bandwidth in Gb/s] Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 32 ++++++++++++++++++++++++++++++++ include/linux/pci.h | 1 + 2 files changed, 33 insertions(+) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 91138cbeb853..e7a3917ed389 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -5288,6 +5288,38 @@ u32 pcie_bandwidth_capable(struct pci_dev *dev, enum pci_bus_speed *speed, return *width * PCIE_SPEED2MBS_ENC(*speed); } +/** + * pcie_print_link_status - Report the PCI device's link speed and width + * @dev: PCI device to query + * + * Report the available bandwidth at the device. If this is less than the + * device is capable of, report the device's maximum possible bandwidth and + * the upstream link that limits its performance to less than that. + */ +void pcie_print_link_status(struct pci_dev *dev) +{ + enum pcie_link_width width, width_cap; + enum pci_bus_speed speed, speed_cap; + struct pci_dev *limiting_dev = NULL; + u32 bw_avail, bw_cap; + + bw_cap = pcie_bandwidth_capable(dev, &speed_cap, &width_cap); + bw_avail = pcie_bandwidth_available(dev, &limiting_dev, &speed, &width); + + if (bw_avail >= bw_cap) + pci_info(dev, "%u.%03u Gb/s available bandwidth (%s x%d link)\n", + bw_cap / 1000, bw_cap % 1000, + PCIE_SPEED2STR(speed_cap), width_cap); + else + pci_info(dev, "%u.%03u Gb/s available bandwidth, limited by %s x%d link at %s (capable of %u.%03u Gb/s with %s x%d link)\n", + bw_avail / 1000, bw_avail % 1000, + PCIE_SPEED2STR(speed), width, + limiting_dev ? pci_name(limiting_dev) : "", + bw_cap / 1000, bw_cap % 1000, + PCIE_SPEED2STR(speed_cap), width_cap); +} +EXPORT_SYMBOL(pcie_print_link_status); + /** * pci_select_bars - Make BAR mask from the type of resource * @dev: the PCI device for which BAR mask is made diff --git a/include/linux/pci.h b/include/linux/pci.h index f2bf2b7a66c7..38f7957121ef 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1086,6 +1086,7 @@ int pcie_get_minimum_link(struct pci_dev *dev, enum pci_bus_speed *speed, u32 pcie_bandwidth_available(struct pci_dev *dev, struct pci_dev **limiting_dev, enum pci_bus_speed *speed, enum pcie_link_width *width); +void pcie_print_link_status(struct pci_dev *dev); void pcie_flr(struct pci_dev *dev); int __pci_reset_function_locked(struct pci_dev *dev); int pci_reset_function(struct pci_dev *dev); From 190b509c8de2af7c35b8fdcef4e54163a865d680 Mon Sep 17 00:00:00 2001 From: Tal Gilboa Date: Fri, 30 Mar 2018 09:01:05 -0500 Subject: [PATCH 07/10] net/mlx4_core: Report PCIe link properties with pcie_print_link_status() Use pcie_print_link_status() to report PCIe link speed and possible limitations instead of implementing this in the driver itself. Signed-off-by: Tal Gilboa Signed-off-by: Tariq Toukan [bhelgaas: changelog] Signed-off-by: Bjorn Helgaas --- drivers/net/ethernet/mellanox/mlx4/main.c | 81 +---------------------- 1 file changed, 1 insertion(+), 80 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 4d84cab77105..30cacac54e69 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -623,85 +623,6 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) return 0; } -static int mlx4_get_pcie_dev_link_caps(struct mlx4_dev *dev, - enum pci_bus_speed *speed, - enum pcie_link_width *width) -{ - u32 lnkcap1, lnkcap2; - int err1, err2; - -#define PCIE_MLW_CAP_SHIFT 4 /* start of MLW mask in link capabilities */ - - *speed = PCI_SPEED_UNKNOWN; - *width = PCIE_LNK_WIDTH_UNKNOWN; - - err1 = pcie_capability_read_dword(dev->persist->pdev, PCI_EXP_LNKCAP, - &lnkcap1); - err2 = pcie_capability_read_dword(dev->persist->pdev, PCI_EXP_LNKCAP2, - &lnkcap2); - if (!err2 && lnkcap2) { /* PCIe r3.0-compliant */ - if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_8_0GB) - *speed = PCIE_SPEED_8_0GT; - else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_5_0GB) - *speed = PCIE_SPEED_5_0GT; - else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_2_5GB) - *speed = PCIE_SPEED_2_5GT; - } - if (!err1) { - *width = (lnkcap1 & PCI_EXP_LNKCAP_MLW) >> PCIE_MLW_CAP_SHIFT; - if (!lnkcap2) { /* pre-r3.0 */ - if (lnkcap1 & PCI_EXP_LNKCAP_SLS_5_0GB) - *speed = PCIE_SPEED_5_0GT; - else if (lnkcap1 & PCI_EXP_LNKCAP_SLS_2_5GB) - *speed = PCIE_SPEED_2_5GT; - } - } - - if (*speed == PCI_SPEED_UNKNOWN || *width == PCIE_LNK_WIDTH_UNKNOWN) { - return err1 ? err1 : - err2 ? err2 : -EINVAL; - } - return 0; -} - -static void mlx4_check_pcie_caps(struct mlx4_dev *dev) -{ - enum pcie_link_width width, width_cap; - enum pci_bus_speed speed, speed_cap; - int err; - -#define PCIE_SPEED_STR(speed) \ - (speed == PCIE_SPEED_8_0GT ? "8.0GT/s" : \ - speed == PCIE_SPEED_5_0GT ? "5.0GT/s" : \ - speed == PCIE_SPEED_2_5GT ? "2.5GT/s" : \ - "Unknown") - - err = mlx4_get_pcie_dev_link_caps(dev, &speed_cap, &width_cap); - if (err) { - mlx4_warn(dev, - "Unable to determine PCIe device BW capabilities\n"); - return; - } - - err = pcie_get_minimum_link(dev->persist->pdev, &speed, &width); - if (err || speed == PCI_SPEED_UNKNOWN || - width == PCIE_LNK_WIDTH_UNKNOWN) { - mlx4_warn(dev, - "Unable to determine PCI device chain minimum BW\n"); - return; - } - - if (width != width_cap || speed != speed_cap) - mlx4_warn(dev, - "PCIe BW is different than device's capability\n"); - - mlx4_info(dev, "PCIe link speed is %s, device supports %s\n", - PCIE_SPEED_STR(speed), PCIE_SPEED_STR(speed_cap)); - mlx4_info(dev, "PCIe link width is x%d, device supports x%d\n", - width, width_cap); - return; -} - /*The function checks if there are live vf, return the num of them*/ static int mlx4_how_many_lives_vf(struct mlx4_dev *dev) { @@ -3475,7 +3396,7 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, * express device capabilities are under-satisfied by the bus. */ if (!mlx4_is_slave(dev)) - mlx4_check_pcie_caps(dev); + pcie_print_link_status(dev->persist->pdev); /* In master functions, the communication channel must be initialized * after obtaining its address from fw */ From 00c6bcb0d60e3f05a533d324ca17b21b6af87fea Mon Sep 17 00:00:00 2001 From: Tal Gilboa Date: Fri, 30 Mar 2018 09:01:34 -0500 Subject: [PATCH 08/10] net/mlx5: Report PCIe link properties with pcie_print_link_status() Use pcie_print_link_status() to report PCIe link speed and possible limitations. Signed-off-by: Tal Gilboa [bhelgaas: changelog] Signed-off-by: Bjorn Helgaas Reviewed-by: Tariq Toukan --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 2ef641c91c26..622f02d34aae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1043,6 +1043,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, dev_info(&pdev->dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev), fw_rev_min(dev), fw_rev_sub(dev)); + /* Only PFs hold the relevant PCIe information for this query */ + if (mlx5_core_is_pf(dev)) + pcie_print_link_status(dev->pdev); + /* on load removing any previous indication of internal error, device is * up */ From 33523a361307c097a8a23f3549cb485db5fba6c5 Mon Sep 17 00:00:00 2001 From: Tal Gilboa Date: Fri, 30 Mar 2018 09:01:51 -0500 Subject: [PATCH 09/10] net/mlx5e: Use pcie_bandwidth_available() to compute bandwidth Use the new pci_bandwidth_available() function to calculate maximum available bandwidth through the PCI chain instead of computing it ourselves with mlx5e_get_pci_bw(). This is used to detect when the device is capable of more bandwidth than is available in the current slot. The driver may adjust compression settings accordingly. Note that pci_bandwidth_available() accounts for PCIe encoding overhead, so it is more accurate than mlx5e_get_pci_bw() was. Signed-off-by: Tal Gilboa [bhelgaas: remove mlx5e_get_pci_bw() wrapper altogether] Signed-off-by: Bjorn Helgaas Reviewed-by: Tariq Toukan --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 32 +------------------ 1 file changed, 1 insertion(+), 31 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 47bab842c5ee..93291ec4a3d1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3864,36 +3864,6 @@ void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, indirection_rqt[i] = i % num_channels; } -static int mlx5e_get_pci_bw(struct mlx5_core_dev *mdev, u32 *pci_bw) -{ - enum pcie_link_width width; - enum pci_bus_speed speed; - int err = 0; - - err = pcie_get_minimum_link(mdev->pdev, &speed, &width); - if (err) - return err; - - if (speed == PCI_SPEED_UNKNOWN || width == PCIE_LNK_WIDTH_UNKNOWN) - return -EINVAL; - - switch (speed) { - case PCIE_SPEED_2_5GT: - *pci_bw = 2500 * width; - break; - case PCIE_SPEED_5_0GT: - *pci_bw = 5000 * width; - break; - case PCIE_SPEED_8_0GT: - *pci_bw = 8000 * width; - break; - default: - return -EINVAL; - } - - return 0; -} - static bool cqe_compress_heuristic(u32 link_speed, u32 pci_bw) { return (link_speed && pci_bw && @@ -3979,7 +3949,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, params->num_tc = 1; mlx5e_get_max_linkspeed(mdev, &link_speed); - mlx5e_get_pci_bw(mdev, &pci_bw); + pci_bw = pcie_bandwidth_available(mdev->pdev, NULL, NULL, NULL); mlx5_core_dbg(mdev, "Max link speed = %d, PCI BW = %d\n", link_speed, pci_bw); From 170648fda93729f05d0758c76b8cd9170408471b Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 30 Mar 2018 14:22:44 -0500 Subject: [PATCH 10/10] fm10k: Report PCIe link properties with pcie_print_link_status() Previously the driver used pcie_get_minimum_link() to warn when the NIC is in a slot that can't supply as much bandwidth as the NIC could use. pcie_get_minimum_link() can be misleading because it finds the slowest link and the narrowest link (which may be different links) without considering the total bandwidth of each link. For a path with a 16 GT/s x1 link and a 2.5 GT/s x16 link, it returns 2.5 GT/s x1, which corresponds to 250 MB/s of bandwidth, not the true available bandwidth of about 1969 MB/s for a 16 GT/s x1 link. Use pcie_print_link_status() to report PCIe link speed and possible limitations instead of implementing this in the driver itself. This finds the slowest link in the path to the device by computing the total bandwidth of each link and compares that with the capabilities of the device. Note that the driver previously used dev_warn() to suggest using a different slot, but pcie_print_link_status() uses dev_info() because if the platform has no faster slot available, the user can't do anything about the warning and may not want to be bothered with it. Signed-off-by: Bjorn Helgaas Acked-by: Jacob Keller --- drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 87 +------------------- 1 file changed, 1 insertion(+), 86 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index a434fecfdfeb..aa05fb534942 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -2120,91 +2120,6 @@ static int fm10k_sw_init(struct fm10k_intfc *interface, return 0; } -static void fm10k_slot_warn(struct fm10k_intfc *interface) -{ - enum pcie_link_width width = PCIE_LNK_WIDTH_UNKNOWN; - enum pci_bus_speed speed = PCI_SPEED_UNKNOWN; - struct fm10k_hw *hw = &interface->hw; - int max_gts = 0, expected_gts = 0; - - if (pcie_get_minimum_link(interface->pdev, &speed, &width) || - speed == PCI_SPEED_UNKNOWN || width == PCIE_LNK_WIDTH_UNKNOWN) { - dev_warn(&interface->pdev->dev, - "Unable to determine PCI Express bandwidth.\n"); - return; - } - - switch (speed) { - case PCIE_SPEED_2_5GT: - /* 8b/10b encoding reduces max throughput by 20% */ - max_gts = 2 * width; - break; - case PCIE_SPEED_5_0GT: - /* 8b/10b encoding reduces max throughput by 20% */ - max_gts = 4 * width; - break; - case PCIE_SPEED_8_0GT: - /* 128b/130b encoding has less than 2% impact on throughput */ - max_gts = 8 * width; - break; - default: - dev_warn(&interface->pdev->dev, - "Unable to determine PCI Express bandwidth.\n"); - return; - } - - dev_info(&interface->pdev->dev, - "PCI Express bandwidth of %dGT/s available\n", - max_gts); - dev_info(&interface->pdev->dev, - "(Speed:%s, Width: x%d, Encoding Loss:%s, Payload:%s)\n", - (speed == PCIE_SPEED_8_0GT ? "8.0GT/s" : - speed == PCIE_SPEED_5_0GT ? "5.0GT/s" : - speed == PCIE_SPEED_2_5GT ? "2.5GT/s" : - "Unknown"), - hw->bus.width, - (speed == PCIE_SPEED_2_5GT ? "20%" : - speed == PCIE_SPEED_5_0GT ? "20%" : - speed == PCIE_SPEED_8_0GT ? "<2%" : - "Unknown"), - (hw->bus.payload == fm10k_bus_payload_128 ? "128B" : - hw->bus.payload == fm10k_bus_payload_256 ? "256B" : - hw->bus.payload == fm10k_bus_payload_512 ? "512B" : - "Unknown")); - - switch (hw->bus_caps.speed) { - case fm10k_bus_speed_2500: - /* 8b/10b encoding reduces max throughput by 20% */ - expected_gts = 2 * hw->bus_caps.width; - break; - case fm10k_bus_speed_5000: - /* 8b/10b encoding reduces max throughput by 20% */ - expected_gts = 4 * hw->bus_caps.width; - break; - case fm10k_bus_speed_8000: - /* 128b/130b encoding has less than 2% impact on throughput */ - expected_gts = 8 * hw->bus_caps.width; - break; - default: - dev_warn(&interface->pdev->dev, - "Unable to determine expected PCI Express bandwidth.\n"); - return; - } - - if (max_gts >= expected_gts) - return; - - dev_warn(&interface->pdev->dev, - "This device requires %dGT/s of bandwidth for optimal performance.\n", - expected_gts); - dev_warn(&interface->pdev->dev, - "A %sslot with x%d lanes is suggested.\n", - (hw->bus_caps.speed == fm10k_bus_speed_2500 ? "2.5GT/s " : - hw->bus_caps.speed == fm10k_bus_speed_5000 ? "5.0GT/s " : - hw->bus_caps.speed == fm10k_bus_speed_8000 ? "8.0GT/s " : ""), - hw->bus_caps.width); -} - /** * fm10k_probe - Device Initialization Routine * @pdev: PCI device information struct @@ -2326,7 +2241,7 @@ static int fm10k_probe(struct pci_dev *pdev, const struct pci_device_id *ent) mod_timer(&interface->service_timer, (HZ * 2) + jiffies); /* print warning for non-optimal configurations */ - fm10k_slot_warn(interface); + pcie_print_link_status(interface->pdev); /* report MAC address for logging */ dev_info(&pdev->dev, "%pM\n", netdev->dev_addr);