From 258ea41c926b7b3a16d0d7aa210a1401c4a1601b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Nov 2023 12:06:52 +0100 Subject: [PATCH 001/234] Revert "phy: realtek: usb: Add driver for the Realtek SoC USB 3.0 PHY" This reverts commit adda6e82a7de7d6d478f6c8ef127f0ac51c510a1. The recently added Realtek PHY drivers depend on the new port status notification mechanism which was built on the deprecated USB PHY implementation and devicetree binding. Specifically, using these PHYs would require describing the very same PHY using both the generic "phy" property and the deprecated "usb-phy" property which is clearly wrong. We should not be building new functionality on top of the legacy USB PHY implementation even if it is currently stuck in some kind of transitional limbo. Revert the new Realtek PHY drivers for now so that the port status notification interface can be reverted and replaced. Fixes: adda6e82a7de ("phy: realtek: usb: Add driver for the Realtek SoC USB 3.0 PHY") Cc: stable@vger.kernel.org # 6.6 Cc: Stanley Chang Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20231106110654.31090-2-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/phy/realtek/Kconfig | 12 - drivers/phy/realtek/Makefile | 1 - drivers/phy/realtek/phy-rtk-usb3.c | 761 ----------------------------- 3 files changed, 774 deletions(-) delete mode 100644 drivers/phy/realtek/phy-rtk-usb3.c diff --git a/drivers/phy/realtek/Kconfig b/drivers/phy/realtek/Kconfig index 75ac7e7c31ae..745587751070 100644 --- a/drivers/phy/realtek/Kconfig +++ b/drivers/phy/realtek/Kconfig @@ -17,16 +17,4 @@ config PHY_RTK_RTD_USB2PHY DWC3 USB IP. This driver will do the PHY initialization of the parameters. -config PHY_RTK_RTD_USB3PHY - tristate "Realtek RTD USB3 PHY Transceiver Driver" - depends on USB_SUPPORT - select GENERIC_PHY - select USB_PHY - select USB_COMMON - help - Enable this to support Realtek SoC USB3 phy transceiver. - The DHC (digital home center) RTD series SoCs used the Synopsys - DWC3 USB IP. This driver will do the PHY initialization - of the parameters. - endif # ARCH_REALTEK || COMPILE_TEST diff --git a/drivers/phy/realtek/Makefile b/drivers/phy/realtek/Makefile index ed7b47ff8a26..cf5d440841a2 100644 --- a/drivers/phy/realtek/Makefile +++ b/drivers/phy/realtek/Makefile @@ -1,3 +1,2 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_PHY_RTK_RTD_USB2PHY) += phy-rtk-usb2.o -obj-$(CONFIG_PHY_RTK_RTD_USB3PHY) += phy-rtk-usb3.o diff --git a/drivers/phy/realtek/phy-rtk-usb3.c b/drivers/phy/realtek/phy-rtk-usb3.c deleted file mode 100644 index 67446a85e968..000000000000 --- a/drivers/phy/realtek/phy-rtk-usb3.c +++ /dev/null @@ -1,761 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * phy-rtk-usb3.c RTK usb3.0 phy driver - * - * copyright (c) 2023 realtek semiconductor corporation - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define USB_MDIO_CTRL_PHY_BUSY BIT(7) -#define USB_MDIO_CTRL_PHY_WRITE BIT(0) -#define USB_MDIO_CTRL_PHY_ADDR_SHIFT 8 -#define USB_MDIO_CTRL_PHY_DATA_SHIFT 16 - -#define MAX_USB_PHY_DATA_SIZE 0x30 -#define PHY_ADDR_0X09 0x09 -#define PHY_ADDR_0X0B 0x0b -#define PHY_ADDR_0X0D 0x0d -#define PHY_ADDR_0X10 0x10 -#define PHY_ADDR_0X1F 0x1f -#define PHY_ADDR_0X20 0x20 -#define PHY_ADDR_0X21 0x21 -#define PHY_ADDR_0X30 0x30 - -#define REG_0X09_FORCE_CALIBRATION BIT(9) -#define REG_0X0B_RX_OFFSET_RANGE_MASK 0xc -#define REG_0X0D_RX_DEBUG_TEST_EN BIT(6) -#define REG_0X10_DEBUG_MODE_SETTING 0x3c0 -#define REG_0X10_DEBUG_MODE_SETTING_MASK 0x3f8 -#define REG_0X1F_RX_OFFSET_CODE_MASK 0x1e - -#define USB_U3_TX_LFPS_SWING_TRIM_SHIFT 4 -#define USB_U3_TX_LFPS_SWING_TRIM_MASK 0xf -#define AMPLITUDE_CONTROL_COARSE_MASK 0xff -#define AMPLITUDE_CONTROL_FINE_MASK 0xffff -#define AMPLITUDE_CONTROL_COARSE_DEFAULT 0xff -#define AMPLITUDE_CONTROL_FINE_DEFAULT 0xffff - -#define PHY_ADDR_MAP_ARRAY_INDEX(addr) (addr) -#define ARRAY_INDEX_MAP_PHY_ADDR(index) (index) - -struct phy_reg { - void __iomem *reg_mdio_ctl; -}; - -struct phy_data { - u8 addr; - u16 data; -}; - -struct phy_cfg { - int param_size; - struct phy_data param[MAX_USB_PHY_DATA_SIZE]; - - bool check_efuse; - bool do_toggle; - bool do_toggle_once; - bool use_default_parameter; - bool check_rx_front_end_offset; -}; - -struct phy_parameter { - struct phy_reg phy_reg; - - /* Get from efuse */ - u8 efuse_usb_u3_tx_lfps_swing_trim; - - /* Get from dts */ - u32 amplitude_control_coarse; - u32 amplitude_control_fine; -}; - -struct rtk_phy { - struct usb_phy phy; - struct device *dev; - - struct phy_cfg *phy_cfg; - int num_phy; - struct phy_parameter *phy_parameter; - - struct dentry *debug_dir; -}; - -#define PHY_IO_TIMEOUT_USEC (50000) -#define PHY_IO_DELAY_US (100) - -static inline int utmi_wait_register(void __iomem *reg, u32 mask, u32 result) -{ - int ret; - unsigned int val; - - ret = read_poll_timeout(readl, val, ((val & mask) == result), - PHY_IO_DELAY_US, PHY_IO_TIMEOUT_USEC, false, reg); - if (ret) { - pr_err("%s can't program USB phy\n", __func__); - return -ETIMEDOUT; - } - - return 0; -} - -static int rtk_phy3_wait_vbusy(struct phy_reg *phy_reg) -{ - return utmi_wait_register(phy_reg->reg_mdio_ctl, USB_MDIO_CTRL_PHY_BUSY, 0); -} - -static u16 rtk_phy_read(struct phy_reg *phy_reg, char addr) -{ - unsigned int tmp; - u32 value; - - tmp = (addr << USB_MDIO_CTRL_PHY_ADDR_SHIFT); - - writel(tmp, phy_reg->reg_mdio_ctl); - - rtk_phy3_wait_vbusy(phy_reg); - - value = readl(phy_reg->reg_mdio_ctl); - value = value >> USB_MDIO_CTRL_PHY_DATA_SHIFT; - - return (u16)value; -} - -static int rtk_phy_write(struct phy_reg *phy_reg, char addr, u16 data) -{ - unsigned int val; - - val = USB_MDIO_CTRL_PHY_WRITE | - (addr << USB_MDIO_CTRL_PHY_ADDR_SHIFT) | - (data << USB_MDIO_CTRL_PHY_DATA_SHIFT); - - writel(val, phy_reg->reg_mdio_ctl); - - rtk_phy3_wait_vbusy(phy_reg); - - return 0; -} - -static void do_rtk_usb3_phy_toggle(struct rtk_phy *rtk_phy, int index, bool connect) -{ - struct phy_cfg *phy_cfg = rtk_phy->phy_cfg; - struct phy_reg *phy_reg; - struct phy_parameter *phy_parameter; - struct phy_data *phy_data; - u8 addr; - u16 data; - int i; - - phy_parameter = &((struct phy_parameter *)rtk_phy->phy_parameter)[index]; - phy_reg = &phy_parameter->phy_reg; - - if (!phy_cfg->do_toggle) - return; - - i = PHY_ADDR_MAP_ARRAY_INDEX(PHY_ADDR_0X09); - phy_data = phy_cfg->param + i; - addr = phy_data->addr; - data = phy_data->data; - - if (!addr && !data) { - addr = PHY_ADDR_0X09; - data = rtk_phy_read(phy_reg, addr); - phy_data->addr = addr; - phy_data->data = data; - } - - rtk_phy_write(phy_reg, addr, data & (~REG_0X09_FORCE_CALIBRATION)); - mdelay(1); - rtk_phy_write(phy_reg, addr, data | REG_0X09_FORCE_CALIBRATION); -} - -static int do_rtk_phy_init(struct rtk_phy *rtk_phy, int index) -{ - struct phy_cfg *phy_cfg; - struct phy_reg *phy_reg; - struct phy_parameter *phy_parameter; - int i = 0; - - phy_cfg = rtk_phy->phy_cfg; - phy_parameter = &((struct phy_parameter *)rtk_phy->phy_parameter)[index]; - phy_reg = &phy_parameter->phy_reg; - - if (phy_cfg->use_default_parameter) - goto do_toggle; - - for (i = 0; i < phy_cfg->param_size; i++) { - struct phy_data *phy_data = phy_cfg->param + i; - u8 addr = phy_data->addr; - u16 data = phy_data->data; - - if (!addr && !data) - continue; - - rtk_phy_write(phy_reg, addr, data); - } - -do_toggle: - if (phy_cfg->do_toggle_once) - phy_cfg->do_toggle = true; - - do_rtk_usb3_phy_toggle(rtk_phy, index, false); - - if (phy_cfg->do_toggle_once) { - u16 check_value = 0; - int count = 10; - u16 value_0x0d, value_0x10; - - /* Enable Debug mode by set 0x0D and 0x10 */ - value_0x0d = rtk_phy_read(phy_reg, PHY_ADDR_0X0D); - value_0x10 = rtk_phy_read(phy_reg, PHY_ADDR_0X10); - - rtk_phy_write(phy_reg, PHY_ADDR_0X0D, - value_0x0d | REG_0X0D_RX_DEBUG_TEST_EN); - rtk_phy_write(phy_reg, PHY_ADDR_0X10, - (value_0x10 & ~REG_0X10_DEBUG_MODE_SETTING_MASK) | - REG_0X10_DEBUG_MODE_SETTING); - - check_value = rtk_phy_read(phy_reg, PHY_ADDR_0X30); - - while (!(check_value & BIT(15))) { - check_value = rtk_phy_read(phy_reg, PHY_ADDR_0X30); - mdelay(1); - if (count-- < 0) - break; - } - - if (!(check_value & BIT(15))) - dev_info(rtk_phy->dev, "toggle fail addr=0x%02x, data=0x%04x\n", - PHY_ADDR_0X30, check_value); - - /* Disable Debug mode by set 0x0D and 0x10 to default*/ - rtk_phy_write(phy_reg, PHY_ADDR_0X0D, value_0x0d); - rtk_phy_write(phy_reg, PHY_ADDR_0X10, value_0x10); - - phy_cfg->do_toggle = false; - } - - if (phy_cfg->check_rx_front_end_offset) { - u16 rx_offset_code, rx_offset_range; - u16 code_mask = REG_0X1F_RX_OFFSET_CODE_MASK; - u16 range_mask = REG_0X0B_RX_OFFSET_RANGE_MASK; - bool do_update = false; - - rx_offset_code = rtk_phy_read(phy_reg, PHY_ADDR_0X1F); - if (((rx_offset_code & code_mask) == 0x0) || - ((rx_offset_code & code_mask) == code_mask)) - do_update = true; - - rx_offset_range = rtk_phy_read(phy_reg, PHY_ADDR_0X0B); - if (((rx_offset_range & range_mask) == range_mask) && do_update) { - dev_warn(rtk_phy->dev, "Don't update rx_offset_range (rx_offset_code=0x%x, rx_offset_range=0x%x)\n", - rx_offset_code, rx_offset_range); - do_update = false; - } - - if (do_update) { - u16 tmp1, tmp2; - - tmp1 = rx_offset_range & (~range_mask); - tmp2 = rx_offset_range & range_mask; - tmp2 += (1 << 2); - rx_offset_range = tmp1 | (tmp2 & range_mask); - rtk_phy_write(phy_reg, PHY_ADDR_0X0B, rx_offset_range); - goto do_toggle; - } - } - - return 0; -} - -static int rtk_phy_init(struct phy *phy) -{ - struct rtk_phy *rtk_phy = phy_get_drvdata(phy); - int ret = 0; - int i; - unsigned long phy_init_time = jiffies; - - for (i = 0; i < rtk_phy->num_phy; i++) - ret = do_rtk_phy_init(rtk_phy, i); - - dev_dbg(rtk_phy->dev, "Initialized RTK USB 3.0 PHY (take %dms)\n", - jiffies_to_msecs(jiffies - phy_init_time)); - - return ret; -} - -static int rtk_phy_exit(struct phy *phy) -{ - return 0; -} - -static const struct phy_ops ops = { - .init = rtk_phy_init, - .exit = rtk_phy_exit, - .owner = THIS_MODULE, -}; - -static void rtk_phy_toggle(struct usb_phy *usb3_phy, bool connect, int port) -{ - int index = port; - struct rtk_phy *rtk_phy = NULL; - - rtk_phy = dev_get_drvdata(usb3_phy->dev); - - if (index > rtk_phy->num_phy) { - dev_err(rtk_phy->dev, "%s: The port=%d is not in usb phy (num_phy=%d)\n", - __func__, index, rtk_phy->num_phy); - return; - } - - do_rtk_usb3_phy_toggle(rtk_phy, index, connect); -} - -static int rtk_phy_notify_port_status(struct usb_phy *x, int port, - u16 portstatus, u16 portchange) -{ - bool connect = false; - - pr_debug("%s port=%d portstatus=0x%x portchange=0x%x\n", - __func__, port, (int)portstatus, (int)portchange); - if (portstatus & USB_PORT_STAT_CONNECTION) - connect = true; - - if (portchange & USB_PORT_STAT_C_CONNECTION) - rtk_phy_toggle(x, connect, port); - - return 0; -} - -#ifdef CONFIG_DEBUG_FS -static struct dentry *create_phy_debug_root(void) -{ - struct dentry *phy_debug_root; - - phy_debug_root = debugfs_lookup("phy", usb_debug_root); - if (!phy_debug_root) - phy_debug_root = debugfs_create_dir("phy", usb_debug_root); - - return phy_debug_root; -} - -static int rtk_usb3_parameter_show(struct seq_file *s, void *unused) -{ - struct rtk_phy *rtk_phy = s->private; - struct phy_cfg *phy_cfg; - int i, index; - - phy_cfg = rtk_phy->phy_cfg; - - seq_puts(s, "Property:\n"); - seq_printf(s, " check_efuse: %s\n", - phy_cfg->check_efuse ? "Enable" : "Disable"); - seq_printf(s, " do_toggle: %s\n", - phy_cfg->do_toggle ? "Enable" : "Disable"); - seq_printf(s, " do_toggle_once: %s\n", - phy_cfg->do_toggle_once ? "Enable" : "Disable"); - seq_printf(s, " use_default_parameter: %s\n", - phy_cfg->use_default_parameter ? "Enable" : "Disable"); - - for (index = 0; index < rtk_phy->num_phy; index++) { - struct phy_reg *phy_reg; - struct phy_parameter *phy_parameter; - - phy_parameter = &((struct phy_parameter *)rtk_phy->phy_parameter)[index]; - phy_reg = &phy_parameter->phy_reg; - - seq_printf(s, "PHY %d:\n", index); - - for (i = 0; i < phy_cfg->param_size; i++) { - struct phy_data *phy_data = phy_cfg->param + i; - u8 addr = ARRAY_INDEX_MAP_PHY_ADDR(i); - u16 data = phy_data->data; - - if (!phy_data->addr && !data) - seq_printf(s, " addr = 0x%02x, data = none ==> read value = 0x%04x\n", - addr, rtk_phy_read(phy_reg, addr)); - else - seq_printf(s, " addr = 0x%02x, data = 0x%04x ==> read value = 0x%04x\n", - addr, data, rtk_phy_read(phy_reg, addr)); - } - - seq_puts(s, "PHY Property:\n"); - seq_printf(s, " efuse_usb_u3_tx_lfps_swing_trim: 0x%x\n", - (int)phy_parameter->efuse_usb_u3_tx_lfps_swing_trim); - seq_printf(s, " amplitude_control_coarse: 0x%x\n", - (int)phy_parameter->amplitude_control_coarse); - seq_printf(s, " amplitude_control_fine: 0x%x\n", - (int)phy_parameter->amplitude_control_fine); - } - - return 0; -} -DEFINE_SHOW_ATTRIBUTE(rtk_usb3_parameter); - -static inline void create_debug_files(struct rtk_phy *rtk_phy) -{ - struct dentry *phy_debug_root = NULL; - - phy_debug_root = create_phy_debug_root(); - - if (!phy_debug_root) - return; - - rtk_phy->debug_dir = debugfs_create_dir(dev_name(rtk_phy->dev), phy_debug_root); - - debugfs_create_file("parameter", 0444, rtk_phy->debug_dir, rtk_phy, - &rtk_usb3_parameter_fops); - - return; -} - -static inline void remove_debug_files(struct rtk_phy *rtk_phy) -{ - debugfs_remove_recursive(rtk_phy->debug_dir); -} -#else -static inline void create_debug_files(struct rtk_phy *rtk_phy) { } -static inline void remove_debug_files(struct rtk_phy *rtk_phy) { } -#endif /* CONFIG_DEBUG_FS */ - -static int get_phy_data_by_efuse(struct rtk_phy *rtk_phy, - struct phy_parameter *phy_parameter, int index) -{ - struct phy_cfg *phy_cfg = rtk_phy->phy_cfg; - u8 value = 0; - struct nvmem_cell *cell; - - if (!phy_cfg->check_efuse) - goto out; - - cell = nvmem_cell_get(rtk_phy->dev, "usb_u3_tx_lfps_swing_trim"); - if (IS_ERR(cell)) { - dev_dbg(rtk_phy->dev, "%s no usb_u3_tx_lfps_swing_trim: %ld\n", - __func__, PTR_ERR(cell)); - } else { - unsigned char *buf; - size_t buf_size; - - buf = nvmem_cell_read(cell, &buf_size); - if (!IS_ERR(buf)) { - value = buf[0] & USB_U3_TX_LFPS_SWING_TRIM_MASK; - kfree(buf); - } - nvmem_cell_put(cell); - } - - if (value > 0 && value < 0x8) - phy_parameter->efuse_usb_u3_tx_lfps_swing_trim = 0x8; - else - phy_parameter->efuse_usb_u3_tx_lfps_swing_trim = (u8)value; - -out: - return 0; -} - -static void update_amplitude_control_value(struct rtk_phy *rtk_phy, - struct phy_parameter *phy_parameter) -{ - struct phy_cfg *phy_cfg; - struct phy_reg *phy_reg; - - phy_reg = &phy_parameter->phy_reg; - phy_cfg = rtk_phy->phy_cfg; - - if (phy_parameter->amplitude_control_coarse != AMPLITUDE_CONTROL_COARSE_DEFAULT) { - u16 val_mask = AMPLITUDE_CONTROL_COARSE_MASK; - u16 data; - - if (!phy_cfg->param[PHY_ADDR_0X20].addr && !phy_cfg->param[PHY_ADDR_0X20].data) { - phy_cfg->param[PHY_ADDR_0X20].addr = PHY_ADDR_0X20; - data = rtk_phy_read(phy_reg, PHY_ADDR_0X20); - } else { - data = phy_cfg->param[PHY_ADDR_0X20].data; - } - - data &= (~val_mask); - data |= (phy_parameter->amplitude_control_coarse & val_mask); - - phy_cfg->param[PHY_ADDR_0X20].data = data; - } - - if (phy_parameter->efuse_usb_u3_tx_lfps_swing_trim) { - u8 efuse_val = phy_parameter->efuse_usb_u3_tx_lfps_swing_trim; - u16 val_mask = USB_U3_TX_LFPS_SWING_TRIM_MASK; - int val_shift = USB_U3_TX_LFPS_SWING_TRIM_SHIFT; - u16 data; - - if (!phy_cfg->param[PHY_ADDR_0X20].addr && !phy_cfg->param[PHY_ADDR_0X20].data) { - phy_cfg->param[PHY_ADDR_0X20].addr = PHY_ADDR_0X20; - data = rtk_phy_read(phy_reg, PHY_ADDR_0X20); - } else { - data = phy_cfg->param[PHY_ADDR_0X20].data; - } - - data &= ~(val_mask << val_shift); - data |= ((efuse_val & val_mask) << val_shift); - - phy_cfg->param[PHY_ADDR_0X20].data = data; - } - - if (phy_parameter->amplitude_control_fine != AMPLITUDE_CONTROL_FINE_DEFAULT) { - u16 val_mask = AMPLITUDE_CONTROL_FINE_MASK; - - if (!phy_cfg->param[PHY_ADDR_0X21].addr && !phy_cfg->param[PHY_ADDR_0X21].data) - phy_cfg->param[PHY_ADDR_0X21].addr = PHY_ADDR_0X21; - - phy_cfg->param[PHY_ADDR_0X21].data = - phy_parameter->amplitude_control_fine & val_mask; - } -} - -static int parse_phy_data(struct rtk_phy *rtk_phy) -{ - struct device *dev = rtk_phy->dev; - struct phy_parameter *phy_parameter; - int ret = 0; - int index; - - rtk_phy->phy_parameter = devm_kzalloc(dev, sizeof(struct phy_parameter) * - rtk_phy->num_phy, GFP_KERNEL); - if (!rtk_phy->phy_parameter) - return -ENOMEM; - - for (index = 0; index < rtk_phy->num_phy; index++) { - phy_parameter = &((struct phy_parameter *)rtk_phy->phy_parameter)[index]; - - phy_parameter->phy_reg.reg_mdio_ctl = of_iomap(dev->of_node, 0) + index; - - /* Amplitude control address 0x20 bit 0 to bit 7 */ - if (of_property_read_u32(dev->of_node, "realtek,amplitude-control-coarse-tuning", - &phy_parameter->amplitude_control_coarse)) - phy_parameter->amplitude_control_coarse = AMPLITUDE_CONTROL_COARSE_DEFAULT; - - /* Amplitude control address 0x21 bit 0 to bit 16 */ - if (of_property_read_u32(dev->of_node, "realtek,amplitude-control-fine-tuning", - &phy_parameter->amplitude_control_fine)) - phy_parameter->amplitude_control_fine = AMPLITUDE_CONTROL_FINE_DEFAULT; - - get_phy_data_by_efuse(rtk_phy, phy_parameter, index); - - update_amplitude_control_value(rtk_phy, phy_parameter); - } - - return ret; -} - -static int rtk_usb3phy_probe(struct platform_device *pdev) -{ - struct rtk_phy *rtk_phy; - struct device *dev = &pdev->dev; - struct phy *generic_phy; - struct phy_provider *phy_provider; - const struct phy_cfg *phy_cfg; - int ret; - - phy_cfg = of_device_get_match_data(dev); - if (!phy_cfg) { - dev_err(dev, "phy config are not assigned!\n"); - return -EINVAL; - } - - rtk_phy = devm_kzalloc(dev, sizeof(*rtk_phy), GFP_KERNEL); - if (!rtk_phy) - return -ENOMEM; - - rtk_phy->dev = &pdev->dev; - rtk_phy->phy.dev = rtk_phy->dev; - rtk_phy->phy.label = "rtk-usb3phy"; - rtk_phy->phy.notify_port_status = rtk_phy_notify_port_status; - - rtk_phy->phy_cfg = devm_kzalloc(dev, sizeof(*phy_cfg), GFP_KERNEL); - - memcpy(rtk_phy->phy_cfg, phy_cfg, sizeof(*phy_cfg)); - - rtk_phy->num_phy = 1; - - ret = parse_phy_data(rtk_phy); - if (ret) - goto err; - - platform_set_drvdata(pdev, rtk_phy); - - generic_phy = devm_phy_create(rtk_phy->dev, NULL, &ops); - if (IS_ERR(generic_phy)) - return PTR_ERR(generic_phy); - - phy_set_drvdata(generic_phy, rtk_phy); - - phy_provider = devm_of_phy_provider_register(rtk_phy->dev, of_phy_simple_xlate); - if (IS_ERR(phy_provider)) - return PTR_ERR(phy_provider); - - ret = usb_add_phy_dev(&rtk_phy->phy); - if (ret) - goto err; - - create_debug_files(rtk_phy); - -err: - return ret; -} - -static void rtk_usb3phy_remove(struct platform_device *pdev) -{ - struct rtk_phy *rtk_phy = platform_get_drvdata(pdev); - - remove_debug_files(rtk_phy); - - usb_remove_phy(&rtk_phy->phy); -} - -static const struct phy_cfg rtd1295_phy_cfg = { - .param_size = MAX_USB_PHY_DATA_SIZE, - .param = { [0] = {0x01, 0x4008}, [1] = {0x01, 0xe046}, - [2] = {0x02, 0x6046}, [3] = {0x03, 0x2779}, - [4] = {0x04, 0x72f5}, [5] = {0x05, 0x2ad3}, - [6] = {0x06, 0x000e}, [7] = {0x07, 0x2e00}, - [8] = {0x08, 0x3591}, [9] = {0x09, 0x525c}, - [10] = {0x0a, 0xa600}, [11] = {0x0b, 0xa904}, - [12] = {0x0c, 0xc000}, [13] = {0x0d, 0xef1c}, - [14] = {0x0e, 0x2000}, [15] = {0x0f, 0x0000}, - [16] = {0x10, 0x000c}, [17] = {0x11, 0x4c00}, - [18] = {0x12, 0xfc00}, [19] = {0x13, 0x0c81}, - [20] = {0x14, 0xde01}, [21] = {0x15, 0x0000}, - [22] = {0x16, 0x0000}, [23] = {0x17, 0x0000}, - [24] = {0x18, 0x0000}, [25] = {0x19, 0x4004}, - [26] = {0x1a, 0x1260}, [27] = {0x1b, 0xff00}, - [28] = {0x1c, 0xcb00}, [29] = {0x1d, 0xa03f}, - [30] = {0x1e, 0xc2e0}, [31] = {0x1f, 0x2807}, - [32] = {0x20, 0x947a}, [33] = {0x21, 0x88aa}, - [34] = {0x22, 0x0057}, [35] = {0x23, 0xab66}, - [36] = {0x24, 0x0800}, [37] = {0x25, 0x0000}, - [38] = {0x26, 0x040a}, [39] = {0x27, 0x01d6}, - [40] = {0x28, 0xf8c2}, [41] = {0x29, 0x3080}, - [42] = {0x2a, 0x3082}, [43] = {0x2b, 0x2078}, - [44] = {0x2c, 0xffff}, [45] = {0x2d, 0xffff}, - [46] = {0x2e, 0x0000}, [47] = {0x2f, 0x0040}, }, - .check_efuse = false, - .do_toggle = true, - .do_toggle_once = false, - .use_default_parameter = false, - .check_rx_front_end_offset = false, -}; - -static const struct phy_cfg rtd1619_phy_cfg = { - .param_size = MAX_USB_PHY_DATA_SIZE, - .param = { [8] = {0x08, 0x3591}, - [38] = {0x26, 0x840b}, - [40] = {0x28, 0xf842}, }, - .check_efuse = false, - .do_toggle = true, - .do_toggle_once = false, - .use_default_parameter = false, - .check_rx_front_end_offset = false, -}; - -static const struct phy_cfg rtd1319_phy_cfg = { - .param_size = MAX_USB_PHY_DATA_SIZE, - .param = { [1] = {0x01, 0xac86}, - [6] = {0x06, 0x0003}, - [9] = {0x09, 0x924c}, - [10] = {0x0a, 0xa608}, - [11] = {0x0b, 0xb905}, - [14] = {0x0e, 0x2010}, - [32] = {0x20, 0x705a}, - [33] = {0x21, 0xf645}, - [34] = {0x22, 0x0013}, - [35] = {0x23, 0xcb66}, - [41] = {0x29, 0xff00}, }, - .check_efuse = true, - .do_toggle = true, - .do_toggle_once = false, - .use_default_parameter = false, - .check_rx_front_end_offset = false, -}; - -static const struct phy_cfg rtd1619b_phy_cfg = { - .param_size = MAX_USB_PHY_DATA_SIZE, - .param = { [1] = {0x01, 0xac8c}, - [6] = {0x06, 0x0017}, - [9] = {0x09, 0x724c}, - [10] = {0x0a, 0xb610}, - [11] = {0x0b, 0xb90d}, - [13] = {0x0d, 0xef2a}, - [15] = {0x0f, 0x9050}, - [16] = {0x10, 0x000c}, - [32] = {0x20, 0x70ff}, - [34] = {0x22, 0x0013}, - [35] = {0x23, 0xdb66}, - [38] = {0x26, 0x8609}, - [41] = {0x29, 0xff13}, - [42] = {0x2a, 0x3070}, }, - .check_efuse = true, - .do_toggle = false, - .do_toggle_once = true, - .use_default_parameter = false, - .check_rx_front_end_offset = false, -}; - -static const struct phy_cfg rtd1319d_phy_cfg = { - .param_size = MAX_USB_PHY_DATA_SIZE, - .param = { [1] = {0x01, 0xac89}, - [4] = {0x04, 0xf2f5}, - [6] = {0x06, 0x0017}, - [9] = {0x09, 0x424c}, - [10] = {0x0a, 0x9610}, - [11] = {0x0b, 0x9901}, - [12] = {0x0c, 0xf000}, - [13] = {0x0d, 0xef2a}, - [14] = {0x0e, 0x1000}, - [15] = {0x0f, 0x9050}, - [32] = {0x20, 0x7077}, - [35] = {0x23, 0x0b62}, - [37] = {0x25, 0x10ec}, - [42] = {0x2a, 0x3070}, }, - .check_efuse = true, - .do_toggle = false, - .do_toggle_once = true, - .use_default_parameter = false, - .check_rx_front_end_offset = true, -}; - -static const struct of_device_id usbphy_rtk_dt_match[] = { - { .compatible = "realtek,rtd1295-usb3phy", .data = &rtd1295_phy_cfg }, - { .compatible = "realtek,rtd1319-usb3phy", .data = &rtd1319_phy_cfg }, - { .compatible = "realtek,rtd1319d-usb3phy", .data = &rtd1319d_phy_cfg }, - { .compatible = "realtek,rtd1619-usb3phy", .data = &rtd1619_phy_cfg }, - { .compatible = "realtek,rtd1619b-usb3phy", .data = &rtd1619b_phy_cfg }, - {}, -}; -MODULE_DEVICE_TABLE(of, usbphy_rtk_dt_match); - -static struct platform_driver rtk_usb3phy_driver = { - .probe = rtk_usb3phy_probe, - .remove_new = rtk_usb3phy_remove, - .driver = { - .name = "rtk-usb3phy", - .of_match_table = usbphy_rtk_dt_match, - }, -}; - -module_platform_driver(rtk_usb3phy_driver); - -MODULE_LICENSE("GPL"); -MODULE_ALIAS("platform: rtk-usb3phy"); -MODULE_AUTHOR("Stanley Chang "); -MODULE_DESCRIPTION("Realtek usb 3.0 phy driver"); From 7a784bcdd7e54f0599da3b2360e472238412623e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Nov 2023 12:06:53 +0100 Subject: [PATCH 002/234] Revert "phy: realtek: usb: Add driver for the Realtek SoC USB 2.0 PHY" This reverts commit 134e6d25f6bd06071e5aac0a7eefcea6f7713955. The recently added Realtek PHY drivers depend on the new port status notification mechanism which was built on the deprecated USB PHY implementation and devicetree binding. Specifically, using these PHYs would require describing the very same PHY using both the generic "phy" property and the deprecated "usb-phy" property which is clearly wrong. We should not be building new functionality on top of the legacy USB PHY implementation even if it is currently stuck in some kind of transitional limbo. Revert the new Realtek PHY drivers for now so that the port status notification interface can be reverted and replaced. Fixes: 134e6d25f6bd ("phy: realtek: usb: Add driver for the Realtek SoC USB 2.0 PHY") Cc: stable@vger.kernel.org # 6.6 Cc: Stanley Chang Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20231106110654.31090-3-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/phy/Kconfig | 1 - drivers/phy/Makefile | 1 - drivers/phy/realtek/Kconfig | 20 - drivers/phy/realtek/Makefile | 2 - drivers/phy/realtek/phy-rtk-usb2.c | 1325 ---------------------------- 5 files changed, 1349 deletions(-) delete mode 100644 drivers/phy/realtek/Kconfig delete mode 100644 drivers/phy/realtek/Makefile delete mode 100644 drivers/phy/realtek/phy-rtk-usb2.c diff --git a/drivers/phy/Kconfig b/drivers/phy/Kconfig index 787354b849c7..4cef568231bf 100644 --- a/drivers/phy/Kconfig +++ b/drivers/phy/Kconfig @@ -87,7 +87,6 @@ source "drivers/phy/motorola/Kconfig" source "drivers/phy/mscc/Kconfig" source "drivers/phy/qualcomm/Kconfig" source "drivers/phy/ralink/Kconfig" -source "drivers/phy/realtek/Kconfig" source "drivers/phy/renesas/Kconfig" source "drivers/phy/rockchip/Kconfig" source "drivers/phy/samsung/Kconfig" diff --git a/drivers/phy/Makefile b/drivers/phy/Makefile index 868a220ed0f6..fb3dc9de6111 100644 --- a/drivers/phy/Makefile +++ b/drivers/phy/Makefile @@ -26,7 +26,6 @@ obj-y += allwinner/ \ mscc/ \ qualcomm/ \ ralink/ \ - realtek/ \ renesas/ \ rockchip/ \ samsung/ \ diff --git a/drivers/phy/realtek/Kconfig b/drivers/phy/realtek/Kconfig deleted file mode 100644 index 745587751070..000000000000 --- a/drivers/phy/realtek/Kconfig +++ /dev/null @@ -1,20 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# Phy drivers for Realtek platforms -# - -if ARCH_REALTEK || COMPILE_TEST - -config PHY_RTK_RTD_USB2PHY - tristate "Realtek RTD USB2 PHY Transceiver Driver" - depends on USB_SUPPORT - select GENERIC_PHY - select USB_PHY - select USB_COMMON - help - Enable this to support Realtek SoC USB2 phy transceiver. - The DHC (digital home center) RTD series SoCs used the Synopsys - DWC3 USB IP. This driver will do the PHY initialization - of the parameters. - -endif # ARCH_REALTEK || COMPILE_TEST diff --git a/drivers/phy/realtek/Makefile b/drivers/phy/realtek/Makefile deleted file mode 100644 index cf5d440841a2..000000000000 --- a/drivers/phy/realtek/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_PHY_RTK_RTD_USB2PHY) += phy-rtk-usb2.o diff --git a/drivers/phy/realtek/phy-rtk-usb2.c b/drivers/phy/realtek/phy-rtk-usb2.c deleted file mode 100644 index 0a6426285c67..000000000000 --- a/drivers/phy/realtek/phy-rtk-usb2.c +++ /dev/null @@ -1,1325 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * phy-rtk-usb2.c RTK usb2.0 PHY driver - * - * Copyright (C) 2023 Realtek Semiconductor Corporation - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* GUSB2PHYACCn register */ -#define PHY_NEW_REG_REQ BIT(25) -#define PHY_VSTS_BUSY BIT(23) -#define PHY_VCTRL_SHIFT 8 -#define PHY_REG_DATA_MASK 0xff - -#define GET_LOW_NIBBLE(addr) ((addr) & 0x0f) -#define GET_HIGH_NIBBLE(addr) (((addr) & 0xf0) >> 4) - -#define EFUS_USB_DC_CAL_RATE 2 -#define EFUS_USB_DC_CAL_MAX 7 - -#define EFUS_USB_DC_DIS_RATE 1 -#define EFUS_USB_DC_DIS_MAX 7 - -#define MAX_PHY_DATA_SIZE 20 -#define OFFEST_PHY_READ 0x20 - -#define MAX_USB_PHY_NUM 4 -#define MAX_USB_PHY_PAGE0_DATA_SIZE 16 -#define MAX_USB_PHY_PAGE1_DATA_SIZE 16 -#define MAX_USB_PHY_PAGE2_DATA_SIZE 8 - -#define SET_PAGE_OFFSET 0xf4 -#define SET_PAGE_0 0x9b -#define SET_PAGE_1 0xbb -#define SET_PAGE_2 0xdb - -#define PAGE_START 0xe0 -#define PAGE0_0XE4 0xe4 -#define PAGE0_0XE6 0xe6 -#define PAGE0_0XE7 0xe7 -#define PAGE1_0XE0 0xe0 -#define PAGE1_0XE2 0xe2 - -#define SENSITIVITY_CTRL (BIT(4) | BIT(5) | BIT(6)) -#define ENABLE_AUTO_SENSITIVITY_CALIBRATION BIT(2) -#define DEFAULT_DC_DRIVING_VALUE (0x8) -#define DEFAULT_DC_DISCONNECTION_VALUE (0x6) -#define HS_CLK_SELECT BIT(6) - -struct phy_reg { - void __iomem *reg_wrap_vstatus; - void __iomem *reg_gusb2phyacc0; - int vstatus_index; -}; - -struct phy_data { - u8 addr; - u8 data; -}; - -struct phy_cfg { - int page0_size; - struct phy_data page0[MAX_USB_PHY_PAGE0_DATA_SIZE]; - int page1_size; - struct phy_data page1[MAX_USB_PHY_PAGE1_DATA_SIZE]; - int page2_size; - struct phy_data page2[MAX_USB_PHY_PAGE2_DATA_SIZE]; - - int num_phy; - - bool check_efuse; - int check_efuse_version; -#define CHECK_EFUSE_V1 1 -#define CHECK_EFUSE_V2 2 - int efuse_dc_driving_rate; - int efuse_dc_disconnect_rate; - int dc_driving_mask; - int dc_disconnect_mask; - bool usb_dc_disconnect_at_page0; - int driving_updated_for_dev_dis; - - bool do_toggle; - bool do_toggle_driving; - bool use_default_parameter; - bool is_double_sensitivity_mode; -}; - -struct phy_parameter { - struct phy_reg phy_reg; - - /* Get from efuse */ - s8 efuse_usb_dc_cal; - s8 efuse_usb_dc_dis; - - /* Get from dts */ - bool inverse_hstx_sync_clock; - u32 driving_level; - s32 driving_level_compensate; - s32 disconnection_compensate; -}; - -struct rtk_phy { - struct usb_phy phy; - struct device *dev; - - struct phy_cfg *phy_cfg; - int num_phy; - struct phy_parameter *phy_parameter; - - struct dentry *debug_dir; -}; - -/* mapping 0xE0 to 0 ... 0xE7 to 7, 0xF0 to 8 ,,, 0xF7 to 15 */ -static inline int page_addr_to_array_index(u8 addr) -{ - return (int)((((addr) - PAGE_START) & 0x7) + - ((((addr) - PAGE_START) & 0x10) >> 1)); -} - -static inline u8 array_index_to_page_addr(int index) -{ - return ((((index) + PAGE_START) & 0x7) + - ((((index) & 0x8) << 1) + PAGE_START)); -} - -#define PHY_IO_TIMEOUT_USEC (50000) -#define PHY_IO_DELAY_US (100) - -static inline int utmi_wait_register(void __iomem *reg, u32 mask, u32 result) -{ - int ret; - unsigned int val; - - ret = read_poll_timeout(readl, val, ((val & mask) == result), - PHY_IO_DELAY_US, PHY_IO_TIMEOUT_USEC, false, reg); - if (ret) { - pr_err("%s can't program USB phy\n", __func__); - return -ETIMEDOUT; - } - - return 0; -} - -static char rtk_phy_read(struct phy_reg *phy_reg, char addr) -{ - void __iomem *reg_gusb2phyacc0 = phy_reg->reg_gusb2phyacc0; - unsigned int val; - int ret = 0; - - addr -= OFFEST_PHY_READ; - - /* polling until VBusy == 0 */ - ret = utmi_wait_register(reg_gusb2phyacc0, PHY_VSTS_BUSY, 0); - if (ret) - return (char)ret; - - /* VCtrl = low nibble of addr, and set PHY_NEW_REG_REQ */ - val = PHY_NEW_REG_REQ | (GET_LOW_NIBBLE(addr) << PHY_VCTRL_SHIFT); - writel(val, reg_gusb2phyacc0); - ret = utmi_wait_register(reg_gusb2phyacc0, PHY_VSTS_BUSY, 0); - if (ret) - return (char)ret; - - /* VCtrl = high nibble of addr, and set PHY_NEW_REG_REQ */ - val = PHY_NEW_REG_REQ | (GET_HIGH_NIBBLE(addr) << PHY_VCTRL_SHIFT); - writel(val, reg_gusb2phyacc0); - ret = utmi_wait_register(reg_gusb2phyacc0, PHY_VSTS_BUSY, 0); - if (ret) - return (char)ret; - - val = readl(reg_gusb2phyacc0); - - return (char)(val & PHY_REG_DATA_MASK); -} - -static int rtk_phy_write(struct phy_reg *phy_reg, char addr, char data) -{ - unsigned int val; - void __iomem *reg_wrap_vstatus = phy_reg->reg_wrap_vstatus; - void __iomem *reg_gusb2phyacc0 = phy_reg->reg_gusb2phyacc0; - int shift_bits = phy_reg->vstatus_index * 8; - int ret = 0; - - /* write data to VStatusOut2 (data output to phy) */ - writel((u32)data << shift_bits, reg_wrap_vstatus); - - ret = utmi_wait_register(reg_gusb2phyacc0, PHY_VSTS_BUSY, 0); - if (ret) - return ret; - - /* VCtrl = low nibble of addr, set PHY_NEW_REG_REQ */ - val = PHY_NEW_REG_REQ | (GET_LOW_NIBBLE(addr) << PHY_VCTRL_SHIFT); - - writel(val, reg_gusb2phyacc0); - ret = utmi_wait_register(reg_gusb2phyacc0, PHY_VSTS_BUSY, 0); - if (ret) - return ret; - - /* VCtrl = high nibble of addr, set PHY_NEW_REG_REQ */ - val = PHY_NEW_REG_REQ | (GET_HIGH_NIBBLE(addr) << PHY_VCTRL_SHIFT); - - writel(val, reg_gusb2phyacc0); - ret = utmi_wait_register(reg_gusb2phyacc0, PHY_VSTS_BUSY, 0); - if (ret) - return ret; - - return 0; -} - -static int rtk_phy_set_page(struct phy_reg *phy_reg, int page) -{ - switch (page) { - case 0: - return rtk_phy_write(phy_reg, SET_PAGE_OFFSET, SET_PAGE_0); - case 1: - return rtk_phy_write(phy_reg, SET_PAGE_OFFSET, SET_PAGE_1); - case 2: - return rtk_phy_write(phy_reg, SET_PAGE_OFFSET, SET_PAGE_2); - default: - pr_err("%s error page=%d\n", __func__, page); - } - - return -EINVAL; -} - -static u8 __updated_dc_disconnect_level_page0_0xe4(struct phy_cfg *phy_cfg, - struct phy_parameter *phy_parameter, u8 data) -{ - u8 ret; - s32 val; - s32 dc_disconnect_mask = phy_cfg->dc_disconnect_mask; - int offset = 4; - - val = (s32)((data >> offset) & dc_disconnect_mask) - + phy_parameter->efuse_usb_dc_dis - + phy_parameter->disconnection_compensate; - - if (val > dc_disconnect_mask) - val = dc_disconnect_mask; - else if (val < 0) - val = 0; - - ret = (data & (~(dc_disconnect_mask << offset))) | - (val & dc_disconnect_mask) << offset; - - return ret; -} - -/* updated disconnect level at page0 */ -static void update_dc_disconnect_level_at_page0(struct rtk_phy *rtk_phy, - struct phy_parameter *phy_parameter, bool update) -{ - struct phy_cfg *phy_cfg; - struct phy_reg *phy_reg; - struct phy_data *phy_data_page; - struct phy_data *phy_data; - u8 addr, data; - int offset = 4; - s32 dc_disconnect_mask; - int i; - - phy_cfg = rtk_phy->phy_cfg; - phy_reg = &phy_parameter->phy_reg; - - /* Set page 0 */ - phy_data_page = phy_cfg->page0; - rtk_phy_set_page(phy_reg, 0); - - i = page_addr_to_array_index(PAGE0_0XE4); - phy_data = phy_data_page + i; - if (!phy_data->addr) { - phy_data->addr = PAGE0_0XE4; - phy_data->data = rtk_phy_read(phy_reg, PAGE0_0XE4); - } - - addr = phy_data->addr; - data = phy_data->data; - dc_disconnect_mask = phy_cfg->dc_disconnect_mask; - - if (update) - data = __updated_dc_disconnect_level_page0_0xe4(phy_cfg, phy_parameter, data); - else - data = (data & ~(dc_disconnect_mask << offset)) | - (DEFAULT_DC_DISCONNECTION_VALUE << offset); - - if (rtk_phy_write(phy_reg, addr, data)) - dev_err(rtk_phy->dev, - "%s: Error to set page1 parameter addr=0x%x value=0x%x\n", - __func__, addr, data); -} - -static u8 __updated_dc_disconnect_level_page1_0xe2(struct phy_cfg *phy_cfg, - struct phy_parameter *phy_parameter, u8 data) -{ - u8 ret; - s32 val; - s32 dc_disconnect_mask = phy_cfg->dc_disconnect_mask; - - if (phy_cfg->check_efuse_version == CHECK_EFUSE_V1) { - val = (s32)(data & dc_disconnect_mask) - + phy_parameter->efuse_usb_dc_dis - + phy_parameter->disconnection_compensate; - } else { /* for CHECK_EFUSE_V2 or no efuse */ - if (phy_parameter->efuse_usb_dc_dis) - val = (s32)(phy_parameter->efuse_usb_dc_dis + - phy_parameter->disconnection_compensate); - else - val = (s32)((data & dc_disconnect_mask) + - phy_parameter->disconnection_compensate); - } - - if (val > dc_disconnect_mask) - val = dc_disconnect_mask; - else if (val < 0) - val = 0; - - ret = (data & (~dc_disconnect_mask)) | (val & dc_disconnect_mask); - - return ret; -} - -/* updated disconnect level at page1 */ -static void update_dc_disconnect_level_at_page1(struct rtk_phy *rtk_phy, - struct phy_parameter *phy_parameter, bool update) -{ - struct phy_cfg *phy_cfg; - struct phy_data *phy_data_page; - struct phy_data *phy_data; - struct phy_reg *phy_reg; - u8 addr, data; - s32 dc_disconnect_mask; - int i; - - phy_cfg = rtk_phy->phy_cfg; - phy_reg = &phy_parameter->phy_reg; - - /* Set page 1 */ - phy_data_page = phy_cfg->page1; - rtk_phy_set_page(phy_reg, 1); - - i = page_addr_to_array_index(PAGE1_0XE2); - phy_data = phy_data_page + i; - if (!phy_data->addr) { - phy_data->addr = PAGE1_0XE2; - phy_data->data = rtk_phy_read(phy_reg, PAGE1_0XE2); - } - - addr = phy_data->addr; - data = phy_data->data; - dc_disconnect_mask = phy_cfg->dc_disconnect_mask; - - if (update) - data = __updated_dc_disconnect_level_page1_0xe2(phy_cfg, phy_parameter, data); - else - data = (data & ~dc_disconnect_mask) | DEFAULT_DC_DISCONNECTION_VALUE; - - if (rtk_phy_write(phy_reg, addr, data)) - dev_err(rtk_phy->dev, - "%s: Error to set page1 parameter addr=0x%x value=0x%x\n", - __func__, addr, data); -} - -static void update_dc_disconnect_level(struct rtk_phy *rtk_phy, - struct phy_parameter *phy_parameter, bool update) -{ - struct phy_cfg *phy_cfg = rtk_phy->phy_cfg; - - if (phy_cfg->usb_dc_disconnect_at_page0) - update_dc_disconnect_level_at_page0(rtk_phy, phy_parameter, update); - else - update_dc_disconnect_level_at_page1(rtk_phy, phy_parameter, update); -} - -static u8 __update_dc_driving_page0_0xe4(struct phy_cfg *phy_cfg, - struct phy_parameter *phy_parameter, u8 data) -{ - s32 driving_level_compensate = phy_parameter->driving_level_compensate; - s32 dc_driving_mask = phy_cfg->dc_driving_mask; - s32 val; - u8 ret; - - if (phy_cfg->check_efuse_version == CHECK_EFUSE_V1) { - val = (s32)(data & dc_driving_mask) + driving_level_compensate - + phy_parameter->efuse_usb_dc_cal; - } else { /* for CHECK_EFUSE_V2 or no efuse */ - if (phy_parameter->efuse_usb_dc_cal) - val = (s32)((phy_parameter->efuse_usb_dc_cal & dc_driving_mask) - + driving_level_compensate); - else - val = (s32)(data & dc_driving_mask); - } - - if (val > dc_driving_mask) - val = dc_driving_mask; - else if (val < 0) - val = 0; - - ret = (data & (~dc_driving_mask)) | (val & dc_driving_mask); - - return ret; -} - -static void update_dc_driving_level(struct rtk_phy *rtk_phy, - struct phy_parameter *phy_parameter) -{ - struct phy_cfg *phy_cfg; - struct phy_reg *phy_reg; - - phy_reg = &phy_parameter->phy_reg; - phy_cfg = rtk_phy->phy_cfg; - if (!phy_cfg->page0[4].addr) { - rtk_phy_set_page(phy_reg, 0); - phy_cfg->page0[4].addr = PAGE0_0XE4; - phy_cfg->page0[4].data = rtk_phy_read(phy_reg, PAGE0_0XE4); - } - - if (phy_parameter->driving_level != DEFAULT_DC_DRIVING_VALUE) { - u32 dc_driving_mask; - u8 driving_level; - u8 data; - - data = phy_cfg->page0[4].data; - dc_driving_mask = phy_cfg->dc_driving_mask; - driving_level = data & dc_driving_mask; - - dev_dbg(rtk_phy->dev, "%s driving_level=%d => dts driving_level=%d\n", - __func__, driving_level, phy_parameter->driving_level); - - phy_cfg->page0[4].data = (data & (~dc_driving_mask)) | - (phy_parameter->driving_level & dc_driving_mask); - } - - phy_cfg->page0[4].data = __update_dc_driving_page0_0xe4(phy_cfg, - phy_parameter, - phy_cfg->page0[4].data); -} - -static void update_hs_clk_select(struct rtk_phy *rtk_phy, - struct phy_parameter *phy_parameter) -{ - struct phy_cfg *phy_cfg; - struct phy_reg *phy_reg; - - phy_cfg = rtk_phy->phy_cfg; - phy_reg = &phy_parameter->phy_reg; - - if (phy_parameter->inverse_hstx_sync_clock) { - if (!phy_cfg->page0[6].addr) { - rtk_phy_set_page(phy_reg, 0); - phy_cfg->page0[6].addr = PAGE0_0XE6; - phy_cfg->page0[6].data = rtk_phy_read(phy_reg, PAGE0_0XE6); - } - - phy_cfg->page0[6].data = phy_cfg->page0[6].data | HS_CLK_SELECT; - } -} - -static void do_rtk_phy_toggle(struct rtk_phy *rtk_phy, - int index, bool connect) -{ - struct phy_parameter *phy_parameter; - struct phy_cfg *phy_cfg; - struct phy_reg *phy_reg; - struct phy_data *phy_data_page; - u8 addr, data; - int i; - - phy_cfg = rtk_phy->phy_cfg; - phy_parameter = &((struct phy_parameter *)rtk_phy->phy_parameter)[index]; - phy_reg = &phy_parameter->phy_reg; - - if (!phy_cfg->do_toggle) - goto out; - - if (phy_cfg->is_double_sensitivity_mode) - goto do_toggle_driving; - - /* Set page 0 */ - rtk_phy_set_page(phy_reg, 0); - - addr = PAGE0_0XE7; - data = rtk_phy_read(phy_reg, addr); - - if (connect) - rtk_phy_write(phy_reg, addr, data & (~SENSITIVITY_CTRL)); - else - rtk_phy_write(phy_reg, addr, data | (SENSITIVITY_CTRL)); - -do_toggle_driving: - - if (!phy_cfg->do_toggle_driving) - goto do_toggle; - - /* Page 0 addr 0xE4 driving capability */ - - /* Set page 0 */ - phy_data_page = phy_cfg->page0; - rtk_phy_set_page(phy_reg, 0); - - i = page_addr_to_array_index(PAGE0_0XE4); - addr = phy_data_page[i].addr; - data = phy_data_page[i].data; - - if (connect) { - rtk_phy_write(phy_reg, addr, data); - } else { - u8 value; - s32 tmp; - s32 driving_updated = - phy_cfg->driving_updated_for_dev_dis; - s32 dc_driving_mask = phy_cfg->dc_driving_mask; - - tmp = (s32)(data & dc_driving_mask) + driving_updated; - - if (tmp > dc_driving_mask) - tmp = dc_driving_mask; - else if (tmp < 0) - tmp = 0; - - value = (data & (~dc_driving_mask)) | (tmp & dc_driving_mask); - - rtk_phy_write(phy_reg, addr, value); - } - -do_toggle: - /* restore dc disconnect level before toggle */ - update_dc_disconnect_level(rtk_phy, phy_parameter, false); - - /* Set page 1 */ - rtk_phy_set_page(phy_reg, 1); - - addr = PAGE1_0XE0; - data = rtk_phy_read(phy_reg, addr); - - rtk_phy_write(phy_reg, addr, data & - (~ENABLE_AUTO_SENSITIVITY_CALIBRATION)); - mdelay(1); - rtk_phy_write(phy_reg, addr, data | - (ENABLE_AUTO_SENSITIVITY_CALIBRATION)); - - /* update dc disconnect level after toggle */ - update_dc_disconnect_level(rtk_phy, phy_parameter, true); - -out: - return; -} - -static int do_rtk_phy_init(struct rtk_phy *rtk_phy, int index) -{ - struct phy_parameter *phy_parameter; - struct phy_cfg *phy_cfg; - struct phy_data *phy_data_page; - struct phy_reg *phy_reg; - int i; - - phy_cfg = rtk_phy->phy_cfg; - phy_parameter = &((struct phy_parameter *)rtk_phy->phy_parameter)[index]; - phy_reg = &phy_parameter->phy_reg; - - if (phy_cfg->use_default_parameter) { - dev_dbg(rtk_phy->dev, "%s phy#%d use default parameter\n", - __func__, index); - goto do_toggle; - } - - /* Set page 0 */ - phy_data_page = phy_cfg->page0; - rtk_phy_set_page(phy_reg, 0); - - for (i = 0; i < phy_cfg->page0_size; i++) { - struct phy_data *phy_data = phy_data_page + i; - u8 addr = phy_data->addr; - u8 data = phy_data->data; - - if (!addr) - continue; - - if (rtk_phy_write(phy_reg, addr, data)) { - dev_err(rtk_phy->dev, - "%s: Error to set page0 parameter addr=0x%x value=0x%x\n", - __func__, addr, data); - return -EINVAL; - } - } - - /* Set page 1 */ - phy_data_page = phy_cfg->page1; - rtk_phy_set_page(phy_reg, 1); - - for (i = 0; i < phy_cfg->page1_size; i++) { - struct phy_data *phy_data = phy_data_page + i; - u8 addr = phy_data->addr; - u8 data = phy_data->data; - - if (!addr) - continue; - - if (rtk_phy_write(phy_reg, addr, data)) { - dev_err(rtk_phy->dev, - "%s: Error to set page1 parameter addr=0x%x value=0x%x\n", - __func__, addr, data); - return -EINVAL; - } - } - - if (phy_cfg->page2_size == 0) - goto do_toggle; - - /* Set page 2 */ - phy_data_page = phy_cfg->page2; - rtk_phy_set_page(phy_reg, 2); - - for (i = 0; i < phy_cfg->page2_size; i++) { - struct phy_data *phy_data = phy_data_page + i; - u8 addr = phy_data->addr; - u8 data = phy_data->data; - - if (!addr) - continue; - - if (rtk_phy_write(phy_reg, addr, data)) { - dev_err(rtk_phy->dev, - "%s: Error to set page2 parameter addr=0x%x value=0x%x\n", - __func__, addr, data); - return -EINVAL; - } - } - -do_toggle: - do_rtk_phy_toggle(rtk_phy, index, false); - - return 0; -} - -static int rtk_phy_init(struct phy *phy) -{ - struct rtk_phy *rtk_phy = phy_get_drvdata(phy); - unsigned long phy_init_time = jiffies; - int i, ret = 0; - - if (!rtk_phy) - return -EINVAL; - - for (i = 0; i < rtk_phy->num_phy; i++) - ret = do_rtk_phy_init(rtk_phy, i); - - dev_dbg(rtk_phy->dev, "Initialized RTK USB 2.0 PHY (take %dms)\n", - jiffies_to_msecs(jiffies - phy_init_time)); - return ret; -} - -static int rtk_phy_exit(struct phy *phy) -{ - return 0; -} - -static const struct phy_ops ops = { - .init = rtk_phy_init, - .exit = rtk_phy_exit, - .owner = THIS_MODULE, -}; - -static void rtk_phy_toggle(struct usb_phy *usb2_phy, bool connect, int port) -{ - int index = port; - struct rtk_phy *rtk_phy = NULL; - - rtk_phy = dev_get_drvdata(usb2_phy->dev); - - if (index > rtk_phy->num_phy) { - dev_err(rtk_phy->dev, "%s: The port=%d is not in usb phy (num_phy=%d)\n", - __func__, index, rtk_phy->num_phy); - return; - } - - do_rtk_phy_toggle(rtk_phy, index, connect); -} - -static int rtk_phy_notify_port_status(struct usb_phy *x, int port, - u16 portstatus, u16 portchange) -{ - bool connect = false; - - pr_debug("%s port=%d portstatus=0x%x portchange=0x%x\n", - __func__, port, (int)portstatus, (int)portchange); - if (portstatus & USB_PORT_STAT_CONNECTION) - connect = true; - - if (portchange & USB_PORT_STAT_C_CONNECTION) - rtk_phy_toggle(x, connect, port); - - return 0; -} - -#ifdef CONFIG_DEBUG_FS -static struct dentry *create_phy_debug_root(void) -{ - struct dentry *phy_debug_root; - - phy_debug_root = debugfs_lookup("phy", usb_debug_root); - if (!phy_debug_root) - phy_debug_root = debugfs_create_dir("phy", usb_debug_root); - - return phy_debug_root; -} - -static int rtk_usb2_parameter_show(struct seq_file *s, void *unused) -{ - struct rtk_phy *rtk_phy = s->private; - struct phy_cfg *phy_cfg; - int i, index; - - phy_cfg = rtk_phy->phy_cfg; - - seq_puts(s, "Property:\n"); - seq_printf(s, " check_efuse: %s\n", - phy_cfg->check_efuse ? "Enable" : "Disable"); - seq_printf(s, " check_efuse_version: %d\n", - phy_cfg->check_efuse_version); - seq_printf(s, " efuse_dc_driving_rate: %d\n", - phy_cfg->efuse_dc_driving_rate); - seq_printf(s, " dc_driving_mask: 0x%x\n", - phy_cfg->dc_driving_mask); - seq_printf(s, " efuse_dc_disconnect_rate: %d\n", - phy_cfg->efuse_dc_disconnect_rate); - seq_printf(s, " dc_disconnect_mask: 0x%x\n", - phy_cfg->dc_disconnect_mask); - seq_printf(s, " usb_dc_disconnect_at_page0: %s\n", - phy_cfg->usb_dc_disconnect_at_page0 ? "true" : "false"); - seq_printf(s, " do_toggle: %s\n", - phy_cfg->do_toggle ? "Enable" : "Disable"); - seq_printf(s, " do_toggle_driving: %s\n", - phy_cfg->do_toggle_driving ? "Enable" : "Disable"); - seq_printf(s, " driving_updated_for_dev_dis: 0x%x\n", - phy_cfg->driving_updated_for_dev_dis); - seq_printf(s, " use_default_parameter: %s\n", - phy_cfg->use_default_parameter ? "Enable" : "Disable"); - seq_printf(s, " is_double_sensitivity_mode: %s\n", - phy_cfg->is_double_sensitivity_mode ? "Enable" : "Disable"); - - for (index = 0; index < rtk_phy->num_phy; index++) { - struct phy_parameter *phy_parameter; - struct phy_reg *phy_reg; - struct phy_data *phy_data_page; - - phy_parameter = &((struct phy_parameter *)rtk_phy->phy_parameter)[index]; - phy_reg = &phy_parameter->phy_reg; - - seq_printf(s, "PHY %d:\n", index); - - seq_puts(s, "Page 0:\n"); - /* Set page 0 */ - phy_data_page = phy_cfg->page0; - rtk_phy_set_page(phy_reg, 0); - - for (i = 0; i < phy_cfg->page0_size; i++) { - struct phy_data *phy_data = phy_data_page + i; - u8 addr = array_index_to_page_addr(i); - u8 data = phy_data->data; - u8 value = rtk_phy_read(phy_reg, addr); - - if (phy_data->addr) - seq_printf(s, " Page 0: addr=0x%x data=0x%02x ==> read value=0x%02x\n", - addr, data, value); - else - seq_printf(s, " Page 0: addr=0x%x data=none ==> read value=0x%02x\n", - addr, value); - } - - seq_puts(s, "Page 1:\n"); - /* Set page 1 */ - phy_data_page = phy_cfg->page1; - rtk_phy_set_page(phy_reg, 1); - - for (i = 0; i < phy_cfg->page1_size; i++) { - struct phy_data *phy_data = phy_data_page + i; - u8 addr = array_index_to_page_addr(i); - u8 data = phy_data->data; - u8 value = rtk_phy_read(phy_reg, addr); - - if (phy_data->addr) - seq_printf(s, " Page 1: addr=0x%x data=0x%02x ==> read value=0x%02x\n", - addr, data, value); - else - seq_printf(s, " Page 1: addr=0x%x data=none ==> read value=0x%02x\n", - addr, value); - } - - if (phy_cfg->page2_size == 0) - goto out; - - seq_puts(s, "Page 2:\n"); - /* Set page 2 */ - phy_data_page = phy_cfg->page2; - rtk_phy_set_page(phy_reg, 2); - - for (i = 0; i < phy_cfg->page2_size; i++) { - struct phy_data *phy_data = phy_data_page + i; - u8 addr = array_index_to_page_addr(i); - u8 data = phy_data->data; - u8 value = rtk_phy_read(phy_reg, addr); - - if (phy_data->addr) - seq_printf(s, " Page 2: addr=0x%x data=0x%02x ==> read value=0x%02x\n", - addr, data, value); - else - seq_printf(s, " Page 2: addr=0x%x data=none ==> read value=0x%02x\n", - addr, value); - } - -out: - seq_puts(s, "PHY Property:\n"); - seq_printf(s, " efuse_usb_dc_cal: %d\n", - (int)phy_parameter->efuse_usb_dc_cal); - seq_printf(s, " efuse_usb_dc_dis: %d\n", - (int)phy_parameter->efuse_usb_dc_dis); - seq_printf(s, " inverse_hstx_sync_clock: %s\n", - phy_parameter->inverse_hstx_sync_clock ? "Enable" : "Disable"); - seq_printf(s, " driving_level: %d\n", - phy_parameter->driving_level); - seq_printf(s, " driving_level_compensate: %d\n", - phy_parameter->driving_level_compensate); - seq_printf(s, " disconnection_compensate: %d\n", - phy_parameter->disconnection_compensate); - } - - return 0; -} -DEFINE_SHOW_ATTRIBUTE(rtk_usb2_parameter); - -static inline void create_debug_files(struct rtk_phy *rtk_phy) -{ - struct dentry *phy_debug_root = NULL; - - phy_debug_root = create_phy_debug_root(); - if (!phy_debug_root) - return; - - rtk_phy->debug_dir = debugfs_create_dir(dev_name(rtk_phy->dev), - phy_debug_root); - - debugfs_create_file("parameter", 0444, rtk_phy->debug_dir, rtk_phy, - &rtk_usb2_parameter_fops); - - return; -} - -static inline void remove_debug_files(struct rtk_phy *rtk_phy) -{ - debugfs_remove_recursive(rtk_phy->debug_dir); -} -#else -static inline void create_debug_files(struct rtk_phy *rtk_phy) { } -static inline void remove_debug_files(struct rtk_phy *rtk_phy) { } -#endif /* CONFIG_DEBUG_FS */ - -static int get_phy_data_by_efuse(struct rtk_phy *rtk_phy, - struct phy_parameter *phy_parameter, int index) -{ - struct phy_cfg *phy_cfg = rtk_phy->phy_cfg; - u8 value = 0; - struct nvmem_cell *cell; - struct soc_device_attribute rtk_soc_groot[] = { - { .family = "Realtek Groot",}, - { /* empty */ } }; - - if (!phy_cfg->check_efuse) - goto out; - - /* Read efuse for usb dc cal */ - cell = nvmem_cell_get(rtk_phy->dev, "usb-dc-cal"); - if (IS_ERR(cell)) { - dev_dbg(rtk_phy->dev, "%s no usb-dc-cal: %ld\n", - __func__, PTR_ERR(cell)); - } else { - unsigned char *buf; - size_t buf_size; - - buf = nvmem_cell_read(cell, &buf_size); - if (!IS_ERR(buf)) { - value = buf[0] & phy_cfg->dc_driving_mask; - kfree(buf); - } - nvmem_cell_put(cell); - } - - if (phy_cfg->check_efuse_version == CHECK_EFUSE_V1) { - int rate = phy_cfg->efuse_dc_driving_rate; - - if (value <= EFUS_USB_DC_CAL_MAX) - phy_parameter->efuse_usb_dc_cal = (int8_t)(value * rate); - else - phy_parameter->efuse_usb_dc_cal = -(int8_t) - ((EFUS_USB_DC_CAL_MAX & value) * rate); - - if (soc_device_match(rtk_soc_groot)) { - dev_dbg(rtk_phy->dev, "For groot IC we need a workaround to adjust efuse_usb_dc_cal\n"); - - /* We don't multiple dc_cal_rate=2 for positive dc cal compensate */ - if (value <= EFUS_USB_DC_CAL_MAX) - phy_parameter->efuse_usb_dc_cal = (int8_t)(value); - - /* We set max dc cal compensate is 0x8 if otp is 0x7 */ - if (value == 0x7) - phy_parameter->efuse_usb_dc_cal = (int8_t)(value + 1); - } - } else { /* for CHECK_EFUSE_V2 */ - phy_parameter->efuse_usb_dc_cal = value & phy_cfg->dc_driving_mask; - } - - /* Read efuse for usb dc disconnect level */ - value = 0; - cell = nvmem_cell_get(rtk_phy->dev, "usb-dc-dis"); - if (IS_ERR(cell)) { - dev_dbg(rtk_phy->dev, "%s no usb-dc-dis: %ld\n", - __func__, PTR_ERR(cell)); - } else { - unsigned char *buf; - size_t buf_size; - - buf = nvmem_cell_read(cell, &buf_size); - if (!IS_ERR(buf)) { - value = buf[0] & phy_cfg->dc_disconnect_mask; - kfree(buf); - } - nvmem_cell_put(cell); - } - - if (phy_cfg->check_efuse_version == CHECK_EFUSE_V1) { - int rate = phy_cfg->efuse_dc_disconnect_rate; - - if (value <= EFUS_USB_DC_DIS_MAX) - phy_parameter->efuse_usb_dc_dis = (int8_t)(value * rate); - else - phy_parameter->efuse_usb_dc_dis = -(int8_t) - ((EFUS_USB_DC_DIS_MAX & value) * rate); - } else { /* for CHECK_EFUSE_V2 */ - phy_parameter->efuse_usb_dc_dis = value & phy_cfg->dc_disconnect_mask; - } - -out: - return 0; -} - -static int parse_phy_data(struct rtk_phy *rtk_phy) -{ - struct device *dev = rtk_phy->dev; - struct device_node *np = dev->of_node; - struct phy_parameter *phy_parameter; - int ret = 0; - int index; - - rtk_phy->phy_parameter = devm_kzalloc(dev, sizeof(struct phy_parameter) * - rtk_phy->num_phy, GFP_KERNEL); - if (!rtk_phy->phy_parameter) - return -ENOMEM; - - for (index = 0; index < rtk_phy->num_phy; index++) { - phy_parameter = &((struct phy_parameter *)rtk_phy->phy_parameter)[index]; - - phy_parameter->phy_reg.reg_wrap_vstatus = of_iomap(np, 0); - phy_parameter->phy_reg.reg_gusb2phyacc0 = of_iomap(np, 1) + index; - phy_parameter->phy_reg.vstatus_index = index; - - if (of_property_read_bool(np, "realtek,inverse-hstx-sync-clock")) - phy_parameter->inverse_hstx_sync_clock = true; - else - phy_parameter->inverse_hstx_sync_clock = false; - - if (of_property_read_u32_index(np, "realtek,driving-level", - index, &phy_parameter->driving_level)) - phy_parameter->driving_level = DEFAULT_DC_DRIVING_VALUE; - - if (of_property_read_u32_index(np, "realtek,driving-level-compensate", - index, &phy_parameter->driving_level_compensate)) - phy_parameter->driving_level_compensate = 0; - - if (of_property_read_u32_index(np, "realtek,disconnection-compensate", - index, &phy_parameter->disconnection_compensate)) - phy_parameter->disconnection_compensate = 0; - - get_phy_data_by_efuse(rtk_phy, phy_parameter, index); - - update_dc_driving_level(rtk_phy, phy_parameter); - - update_hs_clk_select(rtk_phy, phy_parameter); - } - - return ret; -} - -static int rtk_usb2phy_probe(struct platform_device *pdev) -{ - struct rtk_phy *rtk_phy; - struct device *dev = &pdev->dev; - struct phy *generic_phy; - struct phy_provider *phy_provider; - const struct phy_cfg *phy_cfg; - int ret = 0; - - phy_cfg = of_device_get_match_data(dev); - if (!phy_cfg) { - dev_err(dev, "phy config are not assigned!\n"); - return -EINVAL; - } - - rtk_phy = devm_kzalloc(dev, sizeof(*rtk_phy), GFP_KERNEL); - if (!rtk_phy) - return -ENOMEM; - - rtk_phy->dev = &pdev->dev; - rtk_phy->phy.dev = rtk_phy->dev; - rtk_phy->phy.label = "rtk-usb2phy"; - rtk_phy->phy.notify_port_status = rtk_phy_notify_port_status; - - rtk_phy->phy_cfg = devm_kzalloc(dev, sizeof(*phy_cfg), GFP_KERNEL); - - memcpy(rtk_phy->phy_cfg, phy_cfg, sizeof(*phy_cfg)); - - rtk_phy->num_phy = phy_cfg->num_phy; - - ret = parse_phy_data(rtk_phy); - if (ret) - goto err; - - platform_set_drvdata(pdev, rtk_phy); - - generic_phy = devm_phy_create(rtk_phy->dev, NULL, &ops); - if (IS_ERR(generic_phy)) - return PTR_ERR(generic_phy); - - phy_set_drvdata(generic_phy, rtk_phy); - - phy_provider = devm_of_phy_provider_register(rtk_phy->dev, - of_phy_simple_xlate); - if (IS_ERR(phy_provider)) - return PTR_ERR(phy_provider); - - ret = usb_add_phy_dev(&rtk_phy->phy); - if (ret) - goto err; - - create_debug_files(rtk_phy); - -err: - return ret; -} - -static void rtk_usb2phy_remove(struct platform_device *pdev) -{ - struct rtk_phy *rtk_phy = platform_get_drvdata(pdev); - - remove_debug_files(rtk_phy); - - usb_remove_phy(&rtk_phy->phy); -} - -static const struct phy_cfg rtd1295_phy_cfg = { - .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE, - .page0 = { [0] = {0xe0, 0x90}, - [3] = {0xe3, 0x3a}, - [4] = {0xe4, 0x68}, - [6] = {0xe6, 0x91}, - [13] = {0xf5, 0x81}, - [15] = {0xf7, 0x02}, }, - .page1_size = 8, - .page1 = { /* default parameter */ }, - .page2_size = 0, - .page2 = { /* no parameter */ }, - .num_phy = 1, - .check_efuse = false, - .check_efuse_version = CHECK_EFUSE_V1, - .efuse_dc_driving_rate = 1, - .dc_driving_mask = 0xf, - .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE, - .dc_disconnect_mask = 0xf, - .usb_dc_disconnect_at_page0 = true, - .do_toggle = true, - .do_toggle_driving = false, - .driving_updated_for_dev_dis = 0xf, - .use_default_parameter = false, - .is_double_sensitivity_mode = false, -}; - -static const struct phy_cfg rtd1395_phy_cfg = { - .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE, - .page0 = { [4] = {0xe4, 0xac}, - [13] = {0xf5, 0x00}, - [15] = {0xf7, 0x02}, }, - .page1_size = 8, - .page1 = { /* default parameter */ }, - .page2_size = 0, - .page2 = { /* no parameter */ }, - .num_phy = 1, - .check_efuse = false, - .check_efuse_version = CHECK_EFUSE_V1, - .efuse_dc_driving_rate = 1, - .dc_driving_mask = 0xf, - .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE, - .dc_disconnect_mask = 0xf, - .usb_dc_disconnect_at_page0 = true, - .do_toggle = true, - .do_toggle_driving = false, - .driving_updated_for_dev_dis = 0xf, - .use_default_parameter = false, - .is_double_sensitivity_mode = false, -}; - -static const struct phy_cfg rtd1395_phy_cfg_2port = { - .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE, - .page0 = { [4] = {0xe4, 0xac}, - [13] = {0xf5, 0x00}, - [15] = {0xf7, 0x02}, }, - .page1_size = 8, - .page1 = { /* default parameter */ }, - .page2_size = 0, - .page2 = { /* no parameter */ }, - .num_phy = 2, - .check_efuse = false, - .check_efuse_version = CHECK_EFUSE_V1, - .efuse_dc_driving_rate = 1, - .dc_driving_mask = 0xf, - .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE, - .dc_disconnect_mask = 0xf, - .usb_dc_disconnect_at_page0 = true, - .do_toggle = true, - .do_toggle_driving = false, - .driving_updated_for_dev_dis = 0xf, - .use_default_parameter = false, - .is_double_sensitivity_mode = false, -}; - -static const struct phy_cfg rtd1619_phy_cfg = { - .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE, - .page0 = { [4] = {0xe4, 0x68}, }, - .page1_size = 8, - .page1 = { /* default parameter */ }, - .page2_size = 0, - .page2 = { /* no parameter */ }, - .num_phy = 1, - .check_efuse = true, - .check_efuse_version = CHECK_EFUSE_V1, - .efuse_dc_driving_rate = 1, - .dc_driving_mask = 0xf, - .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE, - .dc_disconnect_mask = 0xf, - .usb_dc_disconnect_at_page0 = true, - .do_toggle = true, - .do_toggle_driving = false, - .driving_updated_for_dev_dis = 0xf, - .use_default_parameter = false, - .is_double_sensitivity_mode = false, -}; - -static const struct phy_cfg rtd1319_phy_cfg = { - .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE, - .page0 = { [0] = {0xe0, 0x18}, - [4] = {0xe4, 0x6a}, - [7] = {0xe7, 0x71}, - [13] = {0xf5, 0x15}, - [15] = {0xf7, 0x32}, }, - .page1_size = 8, - .page1 = { [3] = {0xe3, 0x44}, }, - .page2_size = MAX_USB_PHY_PAGE2_DATA_SIZE, - .page2 = { [0] = {0xe0, 0x01}, }, - .num_phy = 1, - .check_efuse = true, - .check_efuse_version = CHECK_EFUSE_V1, - .efuse_dc_driving_rate = 1, - .dc_driving_mask = 0xf, - .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE, - .dc_disconnect_mask = 0xf, - .usb_dc_disconnect_at_page0 = true, - .do_toggle = true, - .do_toggle_driving = true, - .driving_updated_for_dev_dis = 0xf, - .use_default_parameter = false, - .is_double_sensitivity_mode = true, -}; - -static const struct phy_cfg rtd1312c_phy_cfg = { - .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE, - .page0 = { [0] = {0xe0, 0x14}, - [4] = {0xe4, 0x67}, - [5] = {0xe5, 0x55}, }, - .page1_size = 8, - .page1 = { [3] = {0xe3, 0x23}, - [6] = {0xe6, 0x58}, }, - .page2_size = MAX_USB_PHY_PAGE2_DATA_SIZE, - .page2 = { /* default parameter */ }, - .num_phy = 1, - .check_efuse = true, - .check_efuse_version = CHECK_EFUSE_V1, - .efuse_dc_driving_rate = 1, - .dc_driving_mask = 0xf, - .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE, - .dc_disconnect_mask = 0xf, - .usb_dc_disconnect_at_page0 = true, - .do_toggle = true, - .do_toggle_driving = true, - .driving_updated_for_dev_dis = 0xf, - .use_default_parameter = false, - .is_double_sensitivity_mode = true, -}; - -static const struct phy_cfg rtd1619b_phy_cfg = { - .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE, - .page0 = { [0] = {0xe0, 0xa3}, - [4] = {0xe4, 0x88}, - [5] = {0xe5, 0x4f}, - [6] = {0xe6, 0x02}, }, - .page1_size = 8, - .page1 = { [3] = {0xe3, 0x64}, }, - .page2_size = MAX_USB_PHY_PAGE2_DATA_SIZE, - .page2 = { [7] = {0xe7, 0x45}, }, - .num_phy = 1, - .check_efuse = true, - .check_efuse_version = CHECK_EFUSE_V1, - .efuse_dc_driving_rate = EFUS_USB_DC_CAL_RATE, - .dc_driving_mask = 0x1f, - .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE, - .dc_disconnect_mask = 0xf, - .usb_dc_disconnect_at_page0 = false, - .do_toggle = true, - .do_toggle_driving = true, - .driving_updated_for_dev_dis = 0x8, - .use_default_parameter = false, - .is_double_sensitivity_mode = true, -}; - -static const struct phy_cfg rtd1319d_phy_cfg = { - .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE, - .page0 = { [0] = {0xe0, 0xa3}, - [4] = {0xe4, 0x8e}, - [5] = {0xe5, 0x4f}, - [6] = {0xe6, 0x02}, }, - .page1_size = MAX_USB_PHY_PAGE1_DATA_SIZE, - .page1 = { [14] = {0xf5, 0x1}, }, - .page2_size = MAX_USB_PHY_PAGE2_DATA_SIZE, - .page2 = { [7] = {0xe7, 0x44}, }, - .check_efuse = true, - .num_phy = 1, - .check_efuse_version = CHECK_EFUSE_V1, - .efuse_dc_driving_rate = EFUS_USB_DC_CAL_RATE, - .dc_driving_mask = 0x1f, - .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE, - .dc_disconnect_mask = 0xf, - .usb_dc_disconnect_at_page0 = false, - .do_toggle = true, - .do_toggle_driving = false, - .driving_updated_for_dev_dis = 0x8, - .use_default_parameter = false, - .is_double_sensitivity_mode = true, -}; - -static const struct phy_cfg rtd1315e_phy_cfg = { - .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE, - .page0 = { [0] = {0xe0, 0xa3}, - [4] = {0xe4, 0x8c}, - [5] = {0xe5, 0x4f}, - [6] = {0xe6, 0x02}, }, - .page1_size = MAX_USB_PHY_PAGE1_DATA_SIZE, - .page1 = { [3] = {0xe3, 0x7f}, - [14] = {0xf5, 0x01}, }, - .page2_size = MAX_USB_PHY_PAGE2_DATA_SIZE, - .page2 = { [7] = {0xe7, 0x44}, }, - .num_phy = 1, - .check_efuse = true, - .check_efuse_version = CHECK_EFUSE_V2, - .efuse_dc_driving_rate = EFUS_USB_DC_CAL_RATE, - .dc_driving_mask = 0x1f, - .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE, - .dc_disconnect_mask = 0xf, - .usb_dc_disconnect_at_page0 = false, - .do_toggle = true, - .do_toggle_driving = false, - .driving_updated_for_dev_dis = 0x8, - .use_default_parameter = false, - .is_double_sensitivity_mode = true, -}; - -static const struct of_device_id usbphy_rtk_dt_match[] = { - { .compatible = "realtek,rtd1295-usb2phy", .data = &rtd1295_phy_cfg }, - { .compatible = "realtek,rtd1312c-usb2phy", .data = &rtd1312c_phy_cfg }, - { .compatible = "realtek,rtd1315e-usb2phy", .data = &rtd1315e_phy_cfg }, - { .compatible = "realtek,rtd1319-usb2phy", .data = &rtd1319_phy_cfg }, - { .compatible = "realtek,rtd1319d-usb2phy", .data = &rtd1319d_phy_cfg }, - { .compatible = "realtek,rtd1395-usb2phy", .data = &rtd1395_phy_cfg }, - { .compatible = "realtek,rtd1395-usb2phy-2port", .data = &rtd1395_phy_cfg_2port }, - { .compatible = "realtek,rtd1619-usb2phy", .data = &rtd1619_phy_cfg }, - { .compatible = "realtek,rtd1619b-usb2phy", .data = &rtd1619b_phy_cfg }, - {}, -}; -MODULE_DEVICE_TABLE(of, usbphy_rtk_dt_match); - -static struct platform_driver rtk_usb2phy_driver = { - .probe = rtk_usb2phy_probe, - .remove_new = rtk_usb2phy_remove, - .driver = { - .name = "rtk-usb2phy", - .of_match_table = usbphy_rtk_dt_match, - }, -}; - -module_platform_driver(rtk_usb2phy_driver); - -MODULE_LICENSE("GPL"); -MODULE_ALIAS("platform: rtk-usb2phy"); -MODULE_AUTHOR("Stanley Chang "); -MODULE_DESCRIPTION("Realtek usb 2.0 phy driver"); From 1a229d8690a0f8951fc4aa8b76a7efab0d8de342 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Nov 2023 12:06:54 +0100 Subject: [PATCH 003/234] Revert "usb: phy: add usb phy notify port status API" This reverts commit a08799cf17c22375752abfad3b4a2b34b3acb287. The recently added Realtek PHY drivers depend on the new port status notification mechanism which was built on the deprecated USB PHY implementation and devicetree binding. Specifically, using these PHYs would require describing the very same PHY using both the generic "phy" property and the deprecated "usb-phy" property which is clearly wrong. We should not be building new functionality on top of the legacy USB PHY implementation even if it is currently stuck in some kind of transitional limbo. Revert the new notification interface which is broken by design. Fixes: a08799cf17c2 ("usb: phy: add usb phy notify port status API") Cc: stable@vger.kernel.org # 6.6 Cc: Stanley Chang Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20231106110654.31090-4-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 23 ----------------------- include/linux/usb/phy.h | 13 ------------- 2 files changed, 36 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index b4584a0cd484..87480a6e6d93 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -622,29 +622,6 @@ static int hub_ext_port_status(struct usb_hub *hub, int port1, int type, ret = 0; } mutex_unlock(&hub->status_mutex); - - /* - * There is no need to lock status_mutex here, because status_mutex - * protects hub->status, and the phy driver only checks the port - * status without changing the status. - */ - if (!ret) { - struct usb_device *hdev = hub->hdev; - - /* - * Only roothub will be notified of port state changes, - * since the USB PHY only cares about changes at the next - * level. - */ - if (is_root_hub(hdev)) { - struct usb_hcd *hcd = bus_to_hcd(hdev->bus); - - if (hcd->usb_phy) - usb_phy_notify_port_status(hcd->usb_phy, - port1 - 1, *status, *change); - } - } - return ret; } diff --git a/include/linux/usb/phy.h b/include/linux/usb/phy.h index b513749582d7..e4de6bc1f69b 100644 --- a/include/linux/usb/phy.h +++ b/include/linux/usb/phy.h @@ -144,10 +144,6 @@ struct usb_phy { */ int (*set_wakeup)(struct usb_phy *x, bool enabled); - /* notify phy port status change */ - int (*notify_port_status)(struct usb_phy *x, int port, - u16 portstatus, u16 portchange); - /* notify phy connect status change */ int (*notify_connect)(struct usb_phy *x, enum usb_device_speed speed); @@ -320,15 +316,6 @@ usb_phy_set_wakeup(struct usb_phy *x, bool enabled) return 0; } -static inline int -usb_phy_notify_port_status(struct usb_phy *x, int port, u16 portstatus, u16 portchange) -{ - if (x && x->notify_port_status) - return x->notify_port_status(x, port, portstatus, portchange); - else - return 0; -} - static inline int usb_phy_notify_connect(struct usb_phy *x, enum usb_device_speed speed) { From c3803203bc5ec910a3eb06172cf6fb368e0e4390 Mon Sep 17 00:00:00 2001 From: Ani Sinha Date: Mon, 16 Oct 2023 19:01:22 +0530 Subject: [PATCH 004/234] hv/hv_kvp_daemon: Some small fixes for handling NM keyfiles Some small fixes: - lets make sure we are not adding ipv4 addresses in ipv6 section in keyfile and vice versa. - ADDR_FAMILY_IPV6 is a bit in addr_family. Test that bit instead of checking the whole value of addr_family. - Some trivial fixes in hv_set_ifconfig.sh. These fixes are proposed after doing some internal testing at Red Hat. CC: Shradha Gupta CC: Saurabh Sengar Fixes: 42999c904612 ("hv/hv_kvp_daemon:Support for keyfile based connection profile") Signed-off-by: Ani Sinha Reviewed-by: Shradha Gupta Signed-off-by: Wei Liu Message-ID: <20231016133122.2419537-1-anisinha@redhat.com> --- tools/hv/hv_kvp_daemon.c | 20 ++++++++++++-------- tools/hv/hv_set_ifconfig.sh | 4 ++-- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index 264eeb9c46a9..318e2dad27e0 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -1421,7 +1421,7 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) if (error) goto setval_error; - if (new_val->addr_family == ADDR_FAMILY_IPV6) { + if (new_val->addr_family & ADDR_FAMILY_IPV6) { error = fprintf(nmfile, "\n[ipv6]\n"); if (error < 0) goto setval_error; @@ -1455,14 +1455,18 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) if (error < 0) goto setval_error; - error = fprintf(nmfile, "gateway=%s\n", (char *)new_val->gate_way); - if (error < 0) - goto setval_error; - - error = fprintf(nmfile, "dns=%s\n", (char *)new_val->dns_addr); - if (error < 0) - goto setval_error; + /* we do not want ipv4 addresses in ipv6 section and vice versa */ + if (is_ipv6 != is_ipv4((char *)new_val->gate_way)) { + error = fprintf(nmfile, "gateway=%s\n", (char *)new_val->gate_way); + if (error < 0) + goto setval_error; + } + if (is_ipv6 != is_ipv4((char *)new_val->dns_addr)) { + error = fprintf(nmfile, "dns=%s\n", (char *)new_val->dns_addr); + if (error < 0) + goto setval_error; + } fclose(nmfile); fclose(ifcfg_file); diff --git a/tools/hv/hv_set_ifconfig.sh b/tools/hv/hv_set_ifconfig.sh index ae5a7a8249a2..440a91b35823 100755 --- a/tools/hv/hv_set_ifconfig.sh +++ b/tools/hv/hv_set_ifconfig.sh @@ -53,7 +53,7 @@ # or "manual" if no boot-time protocol should be used) # # address1=ipaddr1/plen -# address=ipaddr2/plen +# address2=ipaddr2/plen # # gateway=gateway1;gateway2 # @@ -61,7 +61,7 @@ # # [ipv6] # address1=ipaddr1/plen -# address2=ipaddr1/plen +# address2=ipaddr2/plen # # gateway=gateway1;gateway2 # From 7e8037b099c0bbe8f2109dc452dbcab8d400fc53 Mon Sep 17 00:00:00 2001 From: Saurabh Sengar Date: Sat, 11 Nov 2023 00:37:47 -0800 Subject: [PATCH 005/234] x86/hyperv: Fix the detection of E820_TYPE_PRAM in a Gen2 VM A Gen2 VM doesn't support legacy PCI/PCIe, so both raw_pci_ops and raw_pci_ext_ops are NULL, and pci_subsys_init() -> pcibios_init() doesn't call pcibios_resource_survey() -> e820__reserve_resources_late(); as a result, any emulated persistent memory of E820_TYPE_PRAM (12) via the kernel parameter memmap=nn[KMG]!ss is not added into iomem_resource and hence can't be detected by register_e820_pmem(). Fix this by directly calling e820__reserve_resources_late() in hv_pci_init(), which is called from arch_initcall(pci_arch_init). It's ok to move a Gen2 VM's e820__reserve_resources_late() from subsys_initcall(pci_subsys_init) to arch_initcall(pci_arch_init) because the code in-between doesn't depend on the E820 resources. e820__reserve_resources_late() depends on e820__reserve_resources(), which has been called earlier from setup_arch(). For a Gen-2 VM, the new hv_pci_init() also adds any memory of E820_TYPE_PMEM (7) into iomem_resource, and acpi_nfit_register_region() -> acpi_nfit_insert_resource() -> region_intersects() returns REGION_INTERSECTS, so the memory of E820_TYPE_PMEM won't get added twice. Changed the local variable "int gen2vm" to "bool gen2vm". Signed-off-by: Saurabh Sengar Signed-off-by: Dexuan Cui Signed-off-by: Wei Liu Message-ID: <1699691867-9827-1-git-send-email-ssengar@linux.microsoft.com> --- arch/x86/hyperv/hv_init.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 21556ad87f4b..8f3a4d16bb79 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -286,15 +287,31 @@ static int hv_cpu_die(unsigned int cpu) static int __init hv_pci_init(void) { - int gen2vm = efi_enabled(EFI_BOOT); + bool gen2vm = efi_enabled(EFI_BOOT); /* - * For Generation-2 VM, we exit from pci_arch_init() by returning 0. - * The purpose is to suppress the harmless warning: + * A Generation-2 VM doesn't support legacy PCI/PCIe, so both + * raw_pci_ops and raw_pci_ext_ops are NULL, and pci_subsys_init() -> + * pcibios_init() doesn't call pcibios_resource_survey() -> + * e820__reserve_resources_late(); as a result, any emulated persistent + * memory of E820_TYPE_PRAM (12) via the kernel parameter + * memmap=nn[KMG]!ss is not added into iomem_resource and hence can't be + * detected by register_e820_pmem(). Fix this by directly calling + * e820__reserve_resources_late() here: e820__reserve_resources_late() + * depends on e820__reserve_resources(), which has been called earlier + * from setup_arch(). Note: e820__reserve_resources_late() also adds + * any memory of E820_TYPE_PMEM (7) into iomem_resource, and + * acpi_nfit_register_region() -> acpi_nfit_insert_resource() -> + * region_intersects() returns REGION_INTERSECTS, so the memory of + * E820_TYPE_PMEM won't get added twice. + * + * We return 0 here so that pci_arch_init() won't print the warning: * "PCI: Fatal: No config space access function found" */ - if (gen2vm) + if (gen2vm) { + e820__reserve_resources_late(); return 0; + } /* For Generation-1 VM, we'll proceed in pci_arch_init(). */ return 1; From 9fadd4509966e375952f31ae954ab5eae76f90fe Mon Sep 17 00:00:00 2001 From: Jithu Joseph Date: Thu, 2 Nov 2023 12:52:18 -0700 Subject: [PATCH 006/234] MAINTAINERS: Remove stale entry for SBL platform driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Maurice is no longer with Intel and his e-mail address is no longer active. Remove the stale entry from Slim boot loader section. Signed-off-by: Jithu Joseph Link: https://lore.kernel.org/r/20231102195218.143440-1-jithu.joseph@intel.com Signed-off-by: Ilpo Järvinen --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 97f51d5ec1cf..0666ddb65552 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11025,7 +11025,6 @@ F: drivers/net/wireless/intel/iwlwifi/ INTEL WMI SLIM BOOTLOADER (SBL) FIRMWARE UPDATE DRIVER M: Jithu Joseph -R: Maurice Ma S: Maintained W: https://slimbootloader.github.io/security/firmware-update.html F: drivers/platform/x86/intel/wmi/sbl-fw-update.c From 7a3c36eef9a5d13b16aa954da54224c9c6bed339 Mon Sep 17 00:00:00 2001 From: Stuart Hayhurst Date: Tue, 14 Nov 2023 11:38:08 +0000 Subject: [PATCH 007/234] platform/x86: ideapad-laptop: Set max_brightness before using it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit max_brightness is used in ideapad_kbd_bl_brightness_get() before it's set, causing ideapad_kbd_bl_brightness_get() to return -EINVAL sometimes. Fixes: ecaa1867b524 ("platform/x86: ideapad-laptop: Add support for keyboard backlights using KBLC ACPI symbol") Signed-off-by: Stuart Hayhurst Cc: stable@vger.kernel.org Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20231114114055.6220-2-stuart.a.hayhurst@gmail.com Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/ideapad-laptop.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index ac037540acfc..88eefccb6ed2 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -1425,18 +1425,17 @@ static int ideapad_kbd_bl_init(struct ideapad_private *priv) if (WARN_ON(priv->kbd_bl.initialized)) return -EEXIST; - brightness = ideapad_kbd_bl_brightness_get(priv); - if (brightness < 0) - return brightness; - - priv->kbd_bl.last_brightness = brightness; - if (ideapad_kbd_bl_check_tristate(priv->kbd_bl.type)) { priv->kbd_bl.led.max_brightness = 2; } else { priv->kbd_bl.led.max_brightness = 1; } + brightness = ideapad_kbd_bl_brightness_get(priv); + if (brightness < 0) + return brightness; + + priv->kbd_bl.last_brightness = brightness; priv->kbd_bl.led.name = "platform::" LED_FUNCTION_KBD_BACKLIGHT; priv->kbd_bl.led.brightness_get = ideapad_kbd_bl_led_cdev_brightness_get; priv->kbd_bl.led.brightness_set_blocking = ideapad_kbd_bl_led_cdev_brightness_set; From c5dbf04160005e07e8ca7232a7faa77ab1547ae0 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Mon, 13 Nov 2023 12:07:37 -0800 Subject: [PATCH 008/234] platform/x86: hp-bioscfg: Simplify return check in hp_add_other_attributes() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All cases in switch-case have a same goto on error, move the return check out of the switch. This is a cleanup. Signed-off-by: Harshit Mogalapalli Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20231113200742.3593548-1-harshit.m.mogalapalli@oracle.com Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/hp/hp-bioscfg/bioscfg.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c b/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c index 5798b49ddaba..10676e1abc28 100644 --- a/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c +++ b/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c @@ -630,21 +630,19 @@ static int hp_add_other_attributes(int attr_type) switch (attr_type) { case HPWMI_SECURE_PLATFORM_TYPE: ret = hp_populate_secure_platform_data(attr_name_kobj); - if (ret) - goto err_other_attr_init; break; case HPWMI_SURE_START_TYPE: ret = hp_populate_sure_start_data(attr_name_kobj); - if (ret) - goto err_other_attr_init; break; default: ret = -EINVAL; - goto err_other_attr_init; } + if (ret) + goto err_other_attr_init; + mutex_unlock(&bioscfg_drv.mutex); return 0; From 5736aa9537c9b8927dec32d3d47c8c31fe560f62 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Mon, 13 Nov 2023 12:07:38 -0800 Subject: [PATCH 009/234] platform/x86: hp-bioscfg: move mutex_lock() down in hp_add_other_attributes() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit attr_name_kobj's memory allocation is done with mutex_lock() held, this is not needed. Move allocation outside of mutex_lock() so unlock is not needed when allocation fails. Suggested-by: Ilpo Järvinen Signed-off-by: Harshit Mogalapalli Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20231113200742.3593548-2-harshit.m.mogalapalli@oracle.com Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/hp/hp-bioscfg/bioscfg.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c b/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c index 10676e1abc28..a3599498c4e8 100644 --- a/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c +++ b/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c @@ -592,13 +592,11 @@ static int hp_add_other_attributes(int attr_type) int ret; char *attr_name; - mutex_lock(&bioscfg_drv.mutex); - attr_name_kobj = kzalloc(sizeof(*attr_name_kobj), GFP_KERNEL); - if (!attr_name_kobj) { - ret = -ENOMEM; - goto err_other_attr_init; - } + if (!attr_name_kobj) + return -ENOMEM; + + mutex_lock(&bioscfg_drv.mutex); /* Check if attribute type is supported */ switch (attr_type) { From f40f939917b2b4cbf18450096c0ce1c58ed59fae Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Mon, 13 Nov 2023 12:07:39 -0800 Subject: [PATCH 010/234] platform/x86: hp-bioscfg: Fix error handling in hp_add_other_attributes() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 'attr_name_kobj' is allocated using kzalloc, but on all the error paths it is not freed, hence we have a memory leak. Fix the error path before kobject_init_and_add() by adding kfree(). kobject_put() must be always called after passing the object to kobject_init_and_add(). Only the error path which is immediately next to kobject_init_and_add() calls kobject_put() and not any other error path after it. Fix the error handling after kobject_init_and_add() by moving the kobject_put() into the goto label err_other_attr_init that is already used by all the error paths after kobject_init_and_add(). Fixes: a34fc329b189 ("platform/x86: hp-bioscfg: bioscfg") Cc: stable@vger.kernel.org # 6.6.x: c5dbf0416000: platform/x86: hp-bioscfg: Simplify return check in hp_add_other_attributes() Cc: stable@vger.kernel.org # 6.6.x: 5736aa9537c9: platform/x86: hp-bioscfg: move mutex_lock() down in hp_add_other_attributes() Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202309201412.on0VXJGo-lkp@intel.com/ Signed-off-by: Harshit Mogalapalli [ij: Added the stable dep tags] Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20231113200742.3593548-3-harshit.m.mogalapalli@oracle.com Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/hp/hp-bioscfg/bioscfg.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c b/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c index a3599498c4e8..6ddca857cc4d 100644 --- a/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c +++ b/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c @@ -613,14 +613,14 @@ static int hp_add_other_attributes(int attr_type) default: pr_err("Error: Unknown attr_type: %d\n", attr_type); ret = -EINVAL; - goto err_other_attr_init; + kfree(attr_name_kobj); + goto unlock_drv_mutex; } ret = kobject_init_and_add(attr_name_kobj, &attr_name_ktype, NULL, "%s", attr_name); if (ret) { pr_err("Error encountered [%d]\n", ret); - kobject_put(attr_name_kobj); goto err_other_attr_init; } @@ -645,6 +645,8 @@ static int hp_add_other_attributes(int attr_type) return 0; err_other_attr_init: + kobject_put(attr_name_kobj); +unlock_drv_mutex: mutex_unlock(&bioscfg_drv.mutex); kfree(obj); return ret; From 92c47597db7d8fb500a4b04ebd457ec7360279cc Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Mon, 13 Nov 2023 12:07:40 -0800 Subject: [PATCH 011/234] platform/x86: hp-bioscfg: Remove unused obj in hp_add_other_attributes() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit acpi_object *obj is unused in this function, so delete it, also delete a unnecessary kfree(obj); Signed-off-by: Harshit Mogalapalli Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20231113200742.3593548-4-harshit.m.mogalapalli@oracle.com Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/hp/hp-bioscfg/bioscfg.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c b/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c index 6ddca857cc4d..8c9f4f3227fc 100644 --- a/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c +++ b/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c @@ -588,7 +588,6 @@ static void release_attributes_data(void) static int hp_add_other_attributes(int attr_type) { struct kobject *attr_name_kobj; - union acpi_object *obj = NULL; int ret; char *attr_name; @@ -648,7 +647,6 @@ err_other_attr_init: kobject_put(attr_name_kobj); unlock_drv_mutex: mutex_unlock(&bioscfg_drv.mutex); - kfree(obj); return ret; } From b3e0f94d15700ac8e8c1c2355834f5d5c753c41d Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Thu, 9 Nov 2023 19:02:14 -0500 Subject: [PATCH 012/234] drm/msm/dsi: use the correct VREG_CTRL_1 value for 4nm cphy Use the same value as the downstream driver. This change is needed for CPHY mode to work correctly. Fixes: 8b034e677111 ("drm/msm/dsi: add support for DSI-PHY on SM8550") Signed-off-by: Jonathan Marek Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/566987/ Link: https://lore.kernel.org/r/20231110000216.29979-1-jonathan@marek.ca Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c index 3b1ed02f644d..89a6344bc865 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c @@ -918,7 +918,7 @@ static int dsi_7nm_phy_enable(struct msm_dsi_phy *phy, if ((phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V5_2)) { if (phy->cphy_mode) { vreg_ctrl_0 = 0x45; - vreg_ctrl_1 = 0x45; + vreg_ctrl_1 = 0x41; glbl_rescode_top_ctrl = 0x00; glbl_rescode_bot_ctrl = 0x00; } else { From 3944e343e54b93d3fef30eacc4738e77fdf5444e Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 7 Nov 2023 13:14:13 +0200 Subject: [PATCH 013/234] drm/msm: remove exra drm_kms_helper_poll_init() call It seems during rebases I have left a call to drm_kms_helper_poll_init() which is not guarded by the (priv->kms_init) check. This leads to the crash for the boards which don't have KMS output. Drop this call, as there is a correctly guarded one next to the one being removed. Fixes: 506efcba3129 ("drm/msm: carve out KMS code from msm_drv.c") Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/566299/ Link: https://lore.kernel.org/r/20231107111413.2212942-1-dmitry.baryshkov@linaro.org Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/msm_drv.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index b61ccea05327..841f9c102b28 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -286,8 +286,6 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv) if (ret) goto err_msm_uninit; - drm_kms_helper_poll_init(ddev); - if (priv->kms_init) { drm_kms_helper_poll_init(ddev); msm_fbdev_setup(ddev); From a33b2431d11b4df137bbcfdd5a5adfa054c2479e Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 30 Oct 2023 16:23:20 -0700 Subject: [PATCH 014/234] drm/msm/dpu: Add missing safe_lut_tbl in sc8280xp catalog During USB transfers on the SC8280XP __arm_smmu_tlb_sync() is seen to typically take 1-2ms to complete. As expected this results in poor performance, something that has been mitigated by proposing running the iommu in non-strict mode (boot with iommu.strict=0). This turns out to be related to the SAFE logic, and programming the QOS SAFE values in the DPU (per suggestion from Rob and Doug) reduces the TLB sync time to below 10us, which means significant less time spent with interrupts disabled and a significant boost in throughput. Fixes: 4a352c2fc15a ("drm/msm/dpu: Introduce SC8280XP") Cc: stable@vger.kernel.org Suggested-by: Doug Anderson Suggested-by: Rob Clark Signed-off-by: Bjorn Andersson Tested-by: Johan Hovold Tested-by: Steev Klimaszewski Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/565094/ Link: https://lore.kernel.org/r/20231030-sc8280xp-dpu-safe-lut-v1-1-6d485d7b428f@quicinc.com Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h index 1ccd1edd693c..4c0528794e7a 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h @@ -406,6 +406,7 @@ static const struct dpu_perf_cfg sc8280xp_perf_data = { .min_llcc_ib = 0, .min_dram_ib = 800000, .danger_lut_tbl = {0xf, 0xffff, 0x0}, + .safe_lut_tbl = {0xfe00, 0xfe00, 0xffff}, .qos_lut_tbl = { {.nentry = ARRAY_SIZE(sc8180x_qos_linear), .entries = sc8180x_qos_linear From ebfa85c504cb34f2fd3257c6f5d54158a0ff1bf6 Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Wed, 25 Oct 2023 12:23:09 +0300 Subject: [PATCH 015/234] drm/msm/dp: don't touch DP subconnector property in eDP case In case of the eDP connection there is no subconnetor and as such no subconnector property. Put drm_dp_set_subconnector_property() calls under the !is_edp condition. Fixes: bfcc3d8f94f4 ("drm/msm/dp: support setting the DP subconnector type") Signed-off-by: Abel Vesa Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Tested-by: Jessica Zhang # SC7280 Reviewed-by: Johan Hovold Tested-by: Johan Hovold Patchwork: https://patchwork.freedesktop.org/patch/564284/ Link: https://lore.kernel.org/r/20231025092711.851168-2-dmitry.baryshkov@linaro.org Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/dp/dp_display.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 17bfa72727aa..165e4a51d52f 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -365,9 +365,11 @@ static int dp_display_send_hpd_notification(struct dp_display_private *dp, /* reset video pattern flag on disconnect */ if (!hpd) { dp->panel->video_test = false; - drm_dp_set_subconnector_property(dp->dp_display.connector, - connector_status_disconnected, - dp->panel->dpcd, dp->panel->downstream_ports); + if (!dp->dp_display.is_edp) + drm_dp_set_subconnector_property(dp->dp_display.connector, + connector_status_disconnected, + dp->panel->dpcd, + dp->panel->downstream_ports); } dp->dp_display.is_connected = hpd; @@ -396,8 +398,11 @@ static int dp_display_process_hpd_high(struct dp_display_private *dp) dp_link_process_request(dp->link); - drm_dp_set_subconnector_property(dp->dp_display.connector, connector_status_connected, - dp->panel->dpcd, dp->panel->downstream_ports); + if (!dp->dp_display.is_edp) + drm_dp_set_subconnector_property(dp->dp_display.connector, + connector_status_connected, + dp->panel->dpcd, + dp->panel->downstream_ports); edid = dp->panel->edid; From 21133266ca12f82b6e59c9711258cca2097c167c Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Wed, 25 Oct 2023 12:23:10 +0300 Subject: [PATCH 016/234] drm/msm/dp: attach the DP subconnector property While developing and testing the commit bfcc3d8f94f4 ("drm/msm/dp: support setting the DP subconnector type") I had the patch [1] in my tree. I haven't noticed that it was a dependency for the commit in question. Mea culpa. Since the patch has not landed yet (and even was not reviewed) and since one of the bridges erroneously uses USB connector type instead of DP, attach the property directly from the MSM DP driver. This fixes the following oops on DP HPD event: drm_object_property_set_value (drivers/gpu/drm/drm_mode_object.c:288) dp_display_process_hpd_high (drivers/gpu/drm/msm/dp/dp_display.c:402) dp_hpd_plug_handle.isra.0 (drivers/gpu/drm/msm/dp/dp_display.c:604) hpd_event_thread (drivers/gpu/drm/msm/dp/dp_display.c:1110) kthread (kernel/kthread.c:388) ret_from_fork (arch/arm64/kernel/entry.S:858) [1] https://patchwork.freedesktop.org/patch/555530/ Fixes: bfcc3d8f94f4 ("drm/msm/dp: support setting the DP subconnector type") Reviewed-by: Abhinav Kumar Signed-off-by: Dmitry Baryshkov Tested-by: Jessica Zhang # SC7280 Reviewed-by: Johan Hovold Tested-by: Johan Hovold Patchwork: https://patchwork.freedesktop.org/patch/564286/ Link: https://lore.kernel.org/r/20231025092711.851168-3-dmitry.baryshkov@linaro.org Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/dp/dp_drm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/msm/dp/dp_drm.c b/drivers/gpu/drm/msm/dp/dp_drm.c index 40e7344180e3..e3bdd7dd4cdc 100644 --- a/drivers/gpu/drm/msm/dp/dp_drm.c +++ b/drivers/gpu/drm/msm/dp/dp_drm.c @@ -345,6 +345,9 @@ struct drm_connector *dp_drm_connector_init(struct msm_dp *dp_display, struct dr if (IS_ERR(connector)) return connector; + if (!dp_display->is_edp) + drm_connector_attach_dp_subconnector_property(connector); + drm_connector_attach_encoder(connector, encoder); return connector; From ccab434e674ca95d483788b1895a70c21b7f016a Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 15 Nov 2023 11:08:57 +0100 Subject: [PATCH 017/234] usb: aqc111: check packet for fixup for true limit If a device sends a packet that is inbetween 0 and sizeof(u64) the value passed to skb_trim() as length will wrap around ending up as some very large value. The driver will then proceed to parse the header located at that position, which will either oops or process some random value. The fix is to check against sizeof(u64) rather than 0, which the driver currently does. The issue exists since the introduction of the driver. Signed-off-by: Oliver Neukum Signed-off-by: David S. Miller --- drivers/net/usb/aqc111.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/usb/aqc111.c b/drivers/net/usb/aqc111.c index a017e9de2119..7b8afa589a53 100644 --- a/drivers/net/usb/aqc111.c +++ b/drivers/net/usb/aqc111.c @@ -1079,17 +1079,17 @@ static int aqc111_rx_fixup(struct usbnet *dev, struct sk_buff *skb) u16 pkt_count = 0; u64 desc_hdr = 0; u16 vlan_tag = 0; - u32 skb_len = 0; + u32 skb_len; if (!skb) goto err; - if (skb->len == 0) + skb_len = skb->len; + if (skb_len < sizeof(desc_hdr)) goto err; - skb_len = skb->len; /* RX Descriptor Header */ - skb_trim(skb, skb->len - sizeof(desc_hdr)); + skb_trim(skb, skb_len - sizeof(desc_hdr)); desc_hdr = le64_to_cpup((u64 *)skb_tail_pointer(skb)); /* Check these packets */ From 7fbd5fc2b35a8f559a6b380dfa9bcd964a758186 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 15 Nov 2023 11:53:31 +0100 Subject: [PATCH 018/234] stmmac: dwmac-loongson: Add architecture dependency Only present the DWMAC_LOONGSON option on architectures where it can actually be used. This follows the same logic as the DWMAC_INTEL option. Signed-off-by: Jean Delvare Cc: Keguang Zhang Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index a2b9e289aa36..85dcda51df05 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -280,7 +280,7 @@ config DWMAC_INTEL config DWMAC_LOONGSON tristate "Loongson PCI DWMAC support" default MACH_LOONGSON64 - depends on STMMAC_ETH && PCI + depends on (MACH_LOONGSON64 || COMPILE_TEST) && STMMAC_ETH && PCI depends on COMMON_CLK help This selects the LOONGSON PCI bus support for the stmmac driver, From 0c3bd086d12d185650d095a906662593ec607bd0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 15 Nov 2023 17:15:40 +0000 Subject: [PATCH 019/234] rxrpc: Fix some minor issues with bundle tracing Fix some superficial issues with the tracing of rxrpc_bundle structs, including: (1) Set the debug_id when the bundle is allocated rather than when it is set up so that the "NEW" trace line displays the correct bundle ID. (2) Show the refcount when emitting the "FREE" traceline. Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- net/rxrpc/conn_client.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 981ca5b98bcb..1d95f8bc769f 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -73,6 +73,7 @@ static void rxrpc_destroy_client_conn_ids(struct rxrpc_local *local) static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_call *call, gfp_t gfp) { + static atomic_t rxrpc_bundle_id; struct rxrpc_bundle *bundle; bundle = kzalloc(sizeof(*bundle), gfp); @@ -85,6 +86,7 @@ static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_call *call, bundle->upgrade = test_bit(RXRPC_CALL_UPGRADE, &call->flags); bundle->service_id = call->dest_srx.srx_service; bundle->security_level = call->security_level; + bundle->debug_id = atomic_inc_return(&rxrpc_bundle_id); refcount_set(&bundle->ref, 1); atomic_set(&bundle->active, 1); INIT_LIST_HEAD(&bundle->waiting_calls); @@ -105,7 +107,8 @@ struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *bundle, static void rxrpc_free_bundle(struct rxrpc_bundle *bundle) { - trace_rxrpc_bundle(bundle->debug_id, 1, rxrpc_bundle_free); + trace_rxrpc_bundle(bundle->debug_id, refcount_read(&bundle->ref), + rxrpc_bundle_free); rxrpc_put_peer(bundle->peer, rxrpc_peer_put_bundle); key_put(bundle->key); kfree(bundle); @@ -239,7 +242,6 @@ dont_reuse: */ int rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t gfp) { - static atomic_t rxrpc_bundle_id; struct rxrpc_bundle *bundle, *candidate; struct rxrpc_local *local = call->local; struct rb_node *p, **pp, *parent; @@ -306,7 +308,6 @@ int rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t gfp) } _debug("new bundle"); - candidate->debug_id = atomic_inc_return(&rxrpc_bundle_id); rb_link_node(&candidate->local_node, parent, pp); rb_insert_color(&candidate->local_node, &local->client_bundles); call->bundle = rxrpc_get_bundle(candidate, rxrpc_bundle_get_client_call); From d565fa4300d9ebd5ba3bbd259ce841f8dab609d6 Mon Sep 17 00:00:00 2001 From: Gerd Bayer Date: Wed, 15 Nov 2023 16:59:58 +0100 Subject: [PATCH 020/234] s390/ism: ism driver implies smc protocol Since commit a72178cfe855 ("net/smc: Fix dependency of SMC on ISM") you can build the ism code without selecting the SMC network protocol. That leaves some ism functions be reported as unused. Move these functions under the conditional compile with CONFIG_SMC. Also codify the suggestion to also configure the SMC protocol in ism's Kconfig - but with an "imply" rather than a "select" as SMC depends on other config options and allow for a deliberate decision not to build SMC. Also, mention that in ISM's help. Fixes: a72178cfe855 ("net/smc: Fix dependency of SMC on ISM") Reported-by: Randy Dunlap Closes: https://lore.kernel.org/netdev/afd142a2-1fa0-46b9-8b2d-7652d41d3ab8@infradead.org/ Signed-off-by: Gerd Bayer Reviewed-by: Wenjia Zhang Reviewed-by: Simon Horman Acked-by: Randy Dunlap Tested-by: Randy Dunlap # build-tested Signed-off-by: David S. Miller --- drivers/s390/net/Kconfig | 3 +- drivers/s390/net/ism_drv.c | 93 +++++++++++++++++++------------------- 2 files changed, 48 insertions(+), 48 deletions(-) diff --git a/drivers/s390/net/Kconfig b/drivers/s390/net/Kconfig index 4902d45e929c..c61e6427384c 100644 --- a/drivers/s390/net/Kconfig +++ b/drivers/s390/net/Kconfig @@ -103,10 +103,11 @@ config CCWGROUP config ISM tristate "Support for ISM vPCI Adapter" depends on PCI + imply SMC default n help Select this option if you want to use the Internal Shared Memory - vPCI Adapter. + vPCI Adapter. The adapter can be used with the SMC network protocol. To compile as a module choose M. The module name is ism. If unsure, choose N. diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c index 6df7f377d2f9..81aabbfbbe2c 100644 --- a/drivers/s390/net/ism_drv.c +++ b/drivers/s390/net/ism_drv.c @@ -30,7 +30,6 @@ static const struct pci_device_id ism_device_table[] = { MODULE_DEVICE_TABLE(pci, ism_device_table); static debug_info_t *ism_debug_info; -static const struct smcd_ops ism_ops; #define NO_CLIENT 0xff /* must be >= MAX_CLIENTS */ static struct ism_client *clients[MAX_CLIENTS]; /* use an array rather than */ @@ -289,22 +288,6 @@ out: return ret; } -static int ism_query_rgid(struct ism_dev *ism, u64 rgid, u32 vid_valid, - u32 vid) -{ - union ism_query_rgid cmd; - - memset(&cmd, 0, sizeof(cmd)); - cmd.request.hdr.cmd = ISM_QUERY_RGID; - cmd.request.hdr.len = sizeof(cmd.request); - - cmd.request.rgid = rgid; - cmd.request.vlan_valid = vid_valid; - cmd.request.vlan_id = vid; - - return ism_cmd(ism, &cmd); -} - static void ism_free_dmb(struct ism_dev *ism, struct ism_dmb *dmb) { clear_bit(dmb->sba_idx, ism->sba_bitmap); @@ -429,23 +412,6 @@ static int ism_del_vlan_id(struct ism_dev *ism, u64 vlan_id) return ism_cmd(ism, &cmd); } -static int ism_signal_ieq(struct ism_dev *ism, u64 rgid, u32 trigger_irq, - u32 event_code, u64 info) -{ - union ism_sig_ieq cmd; - - memset(&cmd, 0, sizeof(cmd)); - cmd.request.hdr.cmd = ISM_SIGNAL_IEQ; - cmd.request.hdr.len = sizeof(cmd.request); - - cmd.request.rgid = rgid; - cmd.request.trigger_irq = trigger_irq; - cmd.request.event_code = event_code; - cmd.request.info = info; - - return ism_cmd(ism, &cmd); -} - static unsigned int max_bytes(unsigned int start, unsigned int len, unsigned int boundary) { @@ -503,14 +469,6 @@ u8 *ism_get_seid(void) } EXPORT_SYMBOL_GPL(ism_get_seid); -static u16 ism_get_chid(struct ism_dev *ism) -{ - if (!ism || !ism->pdev) - return 0; - - return to_zpci(ism->pdev)->pchid; -} - static void ism_handle_event(struct ism_dev *ism) { struct ism_event *entry; @@ -569,11 +527,6 @@ static irqreturn_t ism_handle_irq(int irq, void *data) return IRQ_HANDLED; } -static u64 ism_get_local_gid(struct ism_dev *ism) -{ - return ism->local_gid; -} - static int ism_dev_init(struct ism_dev *ism) { struct pci_dev *pdev = ism->pdev; @@ -774,6 +727,22 @@ module_exit(ism_exit); /*************************** SMC-D Implementation *****************************/ #if IS_ENABLED(CONFIG_SMC) +static int ism_query_rgid(struct ism_dev *ism, u64 rgid, u32 vid_valid, + u32 vid) +{ + union ism_query_rgid cmd; + + memset(&cmd, 0, sizeof(cmd)); + cmd.request.hdr.cmd = ISM_QUERY_RGID; + cmd.request.hdr.len = sizeof(cmd.request); + + cmd.request.rgid = rgid; + cmd.request.vlan_valid = vid_valid; + cmd.request.vlan_id = vid; + + return ism_cmd(ism, &cmd); +} + static int smcd_query_rgid(struct smcd_dev *smcd, u64 rgid, u32 vid_valid, u32 vid) { @@ -811,6 +780,23 @@ static int smcd_reset_vlan_required(struct smcd_dev *smcd) return ism_cmd_simple(smcd->priv, ISM_RESET_VLAN); } +static int ism_signal_ieq(struct ism_dev *ism, u64 rgid, u32 trigger_irq, + u32 event_code, u64 info) +{ + union ism_sig_ieq cmd; + + memset(&cmd, 0, sizeof(cmd)); + cmd.request.hdr.cmd = ISM_SIGNAL_IEQ; + cmd.request.hdr.len = sizeof(cmd.request); + + cmd.request.rgid = rgid; + cmd.request.trigger_irq = trigger_irq; + cmd.request.event_code = event_code; + cmd.request.info = info; + + return ism_cmd(ism, &cmd); +} + static int smcd_signal_ieq(struct smcd_dev *smcd, u64 rgid, u32 trigger_irq, u32 event_code, u64 info) { @@ -830,11 +816,24 @@ static int smcd_supports_v2(void) SYSTEM_EID.type[0] != '0'; } +static u64 ism_get_local_gid(struct ism_dev *ism) +{ + return ism->local_gid; +} + static u64 smcd_get_local_gid(struct smcd_dev *smcd) { return ism_get_local_gid(smcd->priv); } +static u16 ism_get_chid(struct ism_dev *ism) +{ + if (!ism || !ism->pdev) + return 0; + + return to_zpci(ism->pdev)->pchid; +} + static u16 smcd_get_chid(struct smcd_dev *smcd) { return ism_get_chid(smcd->priv); From 75a50c4f5b957a34dedb7c8cce52b582a9162828 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 16 Nov 2023 18:01:41 +0100 Subject: [PATCH 021/234] kselftest: rtnetlink: fix ip route command typo The blamed commit below introduced a typo causing 'gretap' test-case failures: ./rtnetlink.sh -t kci_test_gretap -v COMMAND: ip link add name test-dummy0 type dummy COMMAND: ip link set test-dummy0 up COMMAND: ip netns add testns COMMAND: ip link help gretap 2>&1 | grep -q '^Usage:' COMMAND: ip -netns testns link add dev gretap00 type gretap seq key 102 local 172.16.1.100 remote 172.16.1.200 COMMAND: ip -netns testns addr add dev gretap00 10.1.1.100/24 COMMAND: ip -netns testns link set dev gretap00 ups Error: either "dev" is duplicate, or "ups" is a garbage. COMMAND: ip -netns testns link del gretap00 COMMAND: ip -netns testns link add dev gretap00 type gretap external COMMAND: ip -netns testns link del gretap00 FAIL: gretap Fix it by using the correct keyword. Fixes: 9c2a19f71515 ("kselftest: rtnetlink.sh: add verbose flag") Signed-off-by: Paolo Abeni Reviewed-by: Hangbin Liu Signed-off-by: David S. Miller --- tools/testing/selftests/net/rtnetlink.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 5f2b3f6c0d74..38be9706c45f 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -859,7 +859,7 @@ kci_test_gretap() run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24 - run_cmd ip -netns "$testns" link set dev $DEV_NS ups + run_cmd ip -netns "$testns" link set dev $DEV_NS up run_cmd ip -netns "$testns" link del "$DEV_NS" # test external mode From 3798680f2fbbe0ca3ab6138b34e0d161c36497ee Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Nov 2023 13:12:58 +0000 Subject: [PATCH 022/234] rxrpc: Fix RTT determination to use any ACK as a source Fix RTT determination to be able to use any type of ACK as the response from which RTT can be calculated provided its ack.serial is non-zero and matches the serial number of an outgoing DATA or ACK packet. This shouldn't be limited to REQUESTED-type ACKs as these can have other types substituted for them for things like duplicate or out-of-order packets. Fixes: 4700c4d80b7b ("rxrpc: Fix loss of RTT samples due to interposed ACK") Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- include/trace/events/rxrpc.h | 2 +- net/rxrpc/input.c | 35 ++++++++++++++++------------------- 2 files changed, 17 insertions(+), 20 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 4c53a5ef6257..f7e537f64db4 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -328,7 +328,7 @@ E_(rxrpc_rtt_tx_ping, "PING") #define rxrpc_rtt_rx_traces \ - EM(rxrpc_rtt_rx_cancel, "CNCL") \ + EM(rxrpc_rtt_rx_other_ack, "OACK") \ EM(rxrpc_rtt_rx_obsolete, "OBSL") \ EM(rxrpc_rtt_rx_lost, "LOST") \ EM(rxrpc_rtt_rx_ping_response, "PONG") \ diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 030d64f282f3..3f9594d12519 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -643,12 +643,8 @@ static void rxrpc_complete_rtt_probe(struct rxrpc_call *call, clear_bit(i + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail); smp_mb(); /* Read data before setting avail bit */ set_bit(i, &call->rtt_avail); - if (type != rxrpc_rtt_rx_cancel) - rxrpc_peer_add_rtt(call, type, i, acked_serial, ack_serial, - sent_at, resp_time); - else - trace_rxrpc_rtt_rx(call, rxrpc_rtt_rx_cancel, i, - orig_serial, acked_serial, 0, 0); + rxrpc_peer_add_rtt(call, type, i, acked_serial, ack_serial, + sent_at, resp_time); matched = true; } @@ -801,20 +797,21 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) summary.ack_reason, nr_acks); rxrpc_inc_stat(call->rxnet, stat_rx_acks[ack.reason]); - switch (ack.reason) { - case RXRPC_ACK_PING_RESPONSE: - rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial, - rxrpc_rtt_rx_ping_response); - break; - case RXRPC_ACK_REQUESTED: - rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial, - rxrpc_rtt_rx_requested_ack); - break; - default: - if (acked_serial != 0) + if (acked_serial != 0) { + switch (ack.reason) { + case RXRPC_ACK_PING_RESPONSE: rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial, - rxrpc_rtt_rx_cancel); - break; + rxrpc_rtt_rx_ping_response); + break; + case RXRPC_ACK_REQUESTED: + rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial, + rxrpc_rtt_rx_requested_ack); + break; + default: + rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial, + rxrpc_rtt_rx_other_ack); + break; + } } if (ack.reason == RXRPC_ACK_PING) { From 1a01319feef7047aa2ba400ffa3e047776aa29ca Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Nov 2023 13:12:59 +0000 Subject: [PATCH 023/234] rxrpc: Defer the response to a PING ACK until we've parsed it Defer the generation of a PING RESPONSE ACK in response to a PING ACK until we've parsed the PING ACK so that we pick up any changes to the packet queue so that we can update ackinfo. This is also applied to an ACK generated in response to an ACK with the REQUEST_ACK flag set. Note that whilst the problem was added in commit 248f219cb8bc, it didn't really matter at that point because the ACK was proposed in softirq mode and generated asynchronously later in process context, taking the latest values at the time. But this fix is only needed since the move to parse incoming packets in an I/O thread rather than in softirq and generate the ACK at point of proposal (b0346843b1076b34a0278ff601f8f287535cb064). Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code") Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- net/rxrpc/input.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 3f9594d12519..92495e73b869 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -814,14 +814,6 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) } } - if (ack.reason == RXRPC_ACK_PING) { - rxrpc_send_ACK(call, RXRPC_ACK_PING_RESPONSE, ack_serial, - rxrpc_propose_ack_respond_to_ping); - } else if (sp->hdr.flags & RXRPC_REQUEST_ACK) { - rxrpc_send_ACK(call, RXRPC_ACK_REQUESTED, ack_serial, - rxrpc_propose_ack_respond_to_ack); - } - /* If we get an EXCEEDS_WINDOW ACK from the server, it probably * indicates that the client address changed due to NAT. The server * lost the call because it switched to a different peer. @@ -832,7 +824,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_is_client_call(call)) { rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, 0, -ENETRESET); - return; + goto send_response; } /* If we get an OUT_OF_SEQUENCE ACK from the server, that can also @@ -846,7 +838,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_is_client_call(call)) { rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, 0, -ENETRESET); - return; + goto send_response; } /* Discard any out-of-order or duplicate ACKs (outside lock). */ @@ -854,7 +846,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial, first_soft_ack, call->acks_first_seq, prev_pkt, call->acks_prev_seq); - return; + goto send_response; } info.rxMTU = 0; @@ -894,7 +886,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) case RXRPC_CALL_SERVER_AWAIT_ACK: break; default: - return; + goto send_response; } if (before(hard_ack, call->acks_hard_ack) || @@ -906,7 +898,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) if (after(hard_ack, call->acks_hard_ack)) { if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) { rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ack); - return; + goto send_response; } } @@ -924,6 +916,14 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_propose_ack_ping_for_lost_reply); rxrpc_congestion_management(call, skb, &summary, acked_serial); + +send_response: + if (ack.reason == RXRPC_ACK_PING) + rxrpc_send_ACK(call, RXRPC_ACK_PING_RESPONSE, ack_serial, + rxrpc_propose_ack_respond_to_ping); + else if (sp->hdr.flags & RXRPC_REQUEST_ACK) + rxrpc_send_ACK(call, RXRPC_ACK_REQUESTED, ack_serial, + rxrpc_propose_ack_respond_to_ack); } /* From 76df934c6d5f5c93ba7a0112b1818620ddc10b19 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 16 Nov 2023 12:11:51 -0800 Subject: [PATCH 024/234] MAINTAINERS: Add netdev subsystem profile link The netdev subsystem has had a subsystem process document for a while now. Link it appropriately in MAINTAINERS with the P: tag. Cc: Jakub Kicinski Cc: "David S. Miller" Cc: Eric Dumazet Cc: Paolo Abeni Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 482d428472e7..80a95878c61d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14994,6 +14994,7 @@ M: Jakub Kicinski M: Paolo Abeni L: netdev@vger.kernel.org S: Maintained +P: Documentation/process/maintainer-netdev.rst Q: https://patchwork.kernel.org/project/netdevbpf/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git @@ -15045,6 +15046,7 @@ M: Jakub Kicinski M: Paolo Abeni L: netdev@vger.kernel.org S: Maintained +P: Documentation/process/maintainer-netdev.rst Q: https://patchwork.kernel.org/project/netdevbpf/list/ B: mailto:netdev@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git From e6bace7313d61e31f2b16fa3d774fd8cb3cb869e Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2023 16:26:59 +0000 Subject: [PATCH 025/234] afs: Fix afs_server_list to be cleaned up with RCU afs_server_list is accessed with the rcu_read_lock() held from volume->servers, so it needs to be cleaned up correctly. Fix this by using kfree_rcu() instead of kfree(). Fixes: 8a070a964877 ("afs: Detect cell aliases 1 - Cells with root volumes") Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- fs/afs/internal.h | 1 + fs/afs/server_list.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index c9cef3782b4a..a812952be1c9 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -553,6 +553,7 @@ struct afs_server_entry { }; struct afs_server_list { + struct rcu_head rcu; afs_volid_t vids[AFS_MAXTYPES]; /* Volume IDs */ refcount_t usage; unsigned char nr_servers; diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c index ed9056703505..b59896b1de0a 100644 --- a/fs/afs/server_list.c +++ b/fs/afs/server_list.c @@ -17,7 +17,7 @@ void afs_put_serverlist(struct afs_net *net, struct afs_server_list *slist) for (i = 0; i < slist->nr_servers; i++) afs_unuse_server(net, slist->servers[i].server, afs_server_trace_put_slist); - kfree(slist); + kfree_rcu(slist, rcu); } } From 2a4ca1b4b77850544408595e2433f5d7811a9daa Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 8 Jun 2023 09:43:54 +0100 Subject: [PATCH 026/234] afs: Make error on cell lookup failure consistent with OpenAFS When kafs tries to look up a cell in the DNS or the local config, it will translate a lookup failure into EDESTADDRREQ whereas OpenAFS translates it into ENOENT. Applications such as West expect the latter behaviour and fail if they see the former. This can be seen by trying to mount an unknown cell: # mount -t afs %example.com:cell.root /mnt mount: /mnt: mount(2) system call failed: Destination address required. Fixes: 4d673da14533 ("afs: Support the AFS dynamic root") Reported-by: Markus Suvanto Link: https://bugzilla.kernel.org/show_bug.cgi?id=216637 Signed-off-by: David Howells Reviewed-by: Jeffrey Altman cc: Marc Dionne cc: linux-afs@lists.infradead.org --- fs/afs/dynroot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index 4d04ef2d3ae7..1fa8cf23bd36 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -132,8 +132,8 @@ static int afs_probe_cell_name(struct dentry *dentry) ret = dns_query(net->net, "afsdb", name, len, "srv=1", NULL, NULL, false); - if (ret == -ENODATA) - ret = -EDESTADDRREQ; + if (ret == -ENODATA || ret == -ENOKEY) + ret = -ENOENT; return ret; } From bff2a2d453a1b683378b4508b86b84389f551a00 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 6 Nov 2023 18:12:30 +0100 Subject: [PATCH 027/234] swiotlb-xen: provide the "max_mapping_size" method MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's a bug that when using the XEN hypervisor with bios with large multi-page bio vectors on NVMe, the kernel deadlocks [1]. The deadlocks are caused by inability to map a large bio vector - dma_map_sgtable always returns an error, this gets propagated to the block layer as BLK_STS_RESOURCE and the block layer retries the request indefinitely. XEN uses the swiotlb framework to map discontiguous pages into contiguous runs that are submitted to the PCIe device. The swiotlb framework has a limitation on the length of a mapping - this needs to be announced with the max_mapping_size method to make sure that the hardware drivers do not create larger mappings. Without max_mapping_size, the NVMe block driver would create large mappings that overrun the maximum mapping size. Reported-by: Marek Marczykowski-Górecki Link: https://lore.kernel.org/stable/ZTNH0qtmint%2FzLJZ@mail-itl/ [1] Tested-by: Marek Marczykowski-Górecki Suggested-by: Christoph Hellwig Cc: stable@vger.kernel.org Signed-off-by: Keith Busch Signed-off-by: Mikulas Patocka Acked-by: Stefano Stabellini Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/151bef41-e817-aea9-675-a35fdac4ed@redhat.com Signed-off-by: Juergen Gross --- drivers/xen/swiotlb-xen.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 946bd56f0ac5..0e6c6c25d154 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -405,4 +405,5 @@ const struct dma_map_ops xen_swiotlb_dma_ops = { .get_sgtable = dma_common_get_sgtable, .alloc_pages = dma_common_alloc_pages, .free_pages = dma_common_free_pages, + .max_mapping_size = swiotlb_max_mapping_size, }; From 295b202227e98edb2fb5cc29b6ec4b96b2792d9c Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 16 Nov 2023 12:54:59 -0600 Subject: [PATCH 028/234] xen: privcmd: Replace zero-length array with flex-array member and use __counted_by Fake flexible arrays (zero-length and one-element arrays) are deprecated, and should be replaced by flexible-array members. So, replace zero-length array with a flexible-array member in `struct privcmd_kernel_ioreq`. Also annotate array `ports` with `__counted_by()` to prepare for the coming implementation by GCC and Clang of the `__counted_by` attribute. Flexible array members annotated with `__counted_by` can have their accesses bounds-checked at run-time via `CONFIG_UBSAN_BOUNDS` (for array indexing) and `CONFIG_FORTIFY_SOURCE` (for strcpy/memcpy-family functions). This fixes multiple -Warray-bounds warnings: drivers/xen/privcmd.c:1239:30: warning: array subscript i is outside array bounds of 'struct ioreq_port[0]' [-Warray-bounds=] drivers/xen/privcmd.c:1240:30: warning: array subscript i is outside array bounds of 'struct ioreq_port[0]' [-Warray-bounds=] drivers/xen/privcmd.c:1241:30: warning: array subscript i is outside array bounds of 'struct ioreq_port[0]' [-Warray-bounds=] drivers/xen/privcmd.c:1245:33: warning: array subscript i is outside array bounds of 'struct ioreq_port[0]' [-Warray-bounds=] drivers/xen/privcmd.c:1258:67: warning: array subscript i is outside array bounds of 'struct ioreq_port[0]' [-Warray-bounds=] This results in no differences in binary output. Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/ZVZlg3tPMPCRdteh@work Signed-off-by: Juergen Gross --- drivers/xen/privcmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 1ce7f3c7a950..0eb337a8ec0f 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -1115,7 +1115,7 @@ struct privcmd_kernel_ioreq { spinlock_t lock; /* Protects ioeventfds list */ struct list_head ioeventfds; struct list_head list; - struct ioreq_port ports[0]; + struct ioreq_port ports[] __counted_by(vcpus); }; static irqreturn_t ioeventfd_interrupt(int irq, void *dev_id) From c6ea14d557343cd3af6c6be2f5a78c98bdb281bb Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Thu, 16 Nov 2023 22:31:21 +0530 Subject: [PATCH 029/234] platform/x86/amd/pmc: adjust getting DRAM size behavior MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit amd_pmc_get_dram_size() is used to get the DRAM size information. But in the current code, mailbox command to get the DRAM size info is sent based on the values of dev->major and dev->minor. But dev->major and dev->minor will have either junk or zero assigned to them until at least once a call to amd_pmc_get_smu_version() is made which ideally populates dev->major and dev->minor. However, adding a amd_pmc_get_smu_version() call to amd_pmc_get_dram_size() has a downside of elevating the boot times. After talking to the PMFW team, it's understood that the "get dram size" mbox command would only be supported on specific platforms (like Mendocino) and not all. So, adjust getting DRAM size behavior such that, - if running on Rembrandt or Mendocino and the underlying PMFW knows how to execute the "get dram size" command it shall give the custom dram size. - if the underlying FW does not report the dram size, we just proceed further and assign the default dram size. The simplest way to address this is to remove amd_pmc_get_dram_size() function and directly call the "get dram size" command in the amd_pmc_s2d_init(). Reported-by: Mark Hasemeyer Fixes: be8325fb3d8c ("platform/x86/amd: pmc: Get STB DRAM size from PMFW") Cc: stable@vger.kernel.org Suggested-by: Sanket Goswami Signed-off-by: Shyam Sundar S K Link: https://lore.kernel.org/r/20231116170121.3372222-1-Shyam-sundar.S-k@amd.com Reviewed-by: Mario Limonciello Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/pmc/pmc.c | 31 ++---------------------------- 1 file changed, 2 insertions(+), 29 deletions(-) diff --git a/drivers/platform/x86/amd/pmc/pmc.c b/drivers/platform/x86/amd/pmc/pmc.c index cd6ac04c1468..c3104714b480 100644 --- a/drivers/platform/x86/amd/pmc/pmc.c +++ b/drivers/platform/x86/amd/pmc/pmc.c @@ -964,33 +964,6 @@ static const struct pci_device_id pmc_pci_ids[] = { { } }; -static int amd_pmc_get_dram_size(struct amd_pmc_dev *dev) -{ - int ret; - - switch (dev->cpu_id) { - case AMD_CPU_ID_YC: - if (!(dev->major > 90 || (dev->major == 90 && dev->minor > 39))) { - ret = -EINVAL; - goto err_dram_size; - } - break; - default: - ret = -EINVAL; - goto err_dram_size; - } - - ret = amd_pmc_send_cmd(dev, S2D_DRAM_SIZE, &dev->dram_size, dev->s2d_msg_id, true); - if (ret || !dev->dram_size) - goto err_dram_size; - - return 0; - -err_dram_size: - dev_err(dev->dev, "DRAM size command not supported for this platform\n"); - return ret; -} - static int amd_pmc_s2d_init(struct amd_pmc_dev *dev) { u32 phys_addr_low, phys_addr_hi; @@ -1009,8 +982,8 @@ static int amd_pmc_s2d_init(struct amd_pmc_dev *dev) return -EIO; /* Get DRAM size */ - ret = amd_pmc_get_dram_size(dev); - if (ret) + ret = amd_pmc_send_cmd(dev, S2D_DRAM_SIZE, &dev->dram_size, dev->s2d_msg_id, true); + if (ret || !dev->dram_size) dev->dram_size = S2D_TELEMETRY_DRAMBYTES_MAX; /* Get STB DRAM address */ From 24d85bb3be373b5831699bddf698b392bd2b904d Mon Sep 17 00:00:00 2001 From: Gil Fine Date: Tue, 7 Nov 2023 12:22:40 +0200 Subject: [PATCH 030/234] thunderbolt: Set lane bonding bit only for downstream port Fix the lane bonding procedure to follow the steps described in USB4 Connection Manager guide. Hence, set the lane bonding bit only for downstream port. This is needed for certain ASMedia device, otherwise lane bonding fails and the device disconnects. Cc: stable@vger.kernel.org Signed-off-by: Gil Fine Signed-off-by: Mika Westerberg --- drivers/thunderbolt/switch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index 1e15ffa79295..9e5cc285cc8d 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -1143,7 +1143,7 @@ int tb_port_lane_bonding_enable(struct tb_port *port) * Only set bonding if the link was not already bonded. This * avoids the lane adapter to re-enter bonding state. */ - if (width == TB_LINK_WIDTH_SINGLE) { + if (width == TB_LINK_WIDTH_SINGLE && !tb_is_upstream_port(port)) { ret = tb_port_set_lane_bonding(port, true); if (ret) goto err_lane1; From 5391bcfa56c79a891734e4d22aa0ca3217b86491 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 7 Nov 2023 14:34:27 +0200 Subject: [PATCH 031/234] thunderbolt: Send uevent after asymmetric/symmetric switch We should send uevent to userspace whenever the link speed or width changes but tb_switch_asym_enable() and tb_switch_asym_disable() set the sw->link_width already so tb_switch_update_link_attributes() never noticed the change. Fix this so that we let tb_switch_update_link_attributes() update the fields accordingly. Fixes: 81af2952e606 ("thunderbolt: Add support for asymmetric link") Reported-by: Pengfei Xu Tested-by: Pengfei Xu Signed-off-by: Mika Westerberg --- drivers/thunderbolt/switch.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index 9e5cc285cc8d..44e9b09de47a 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -2880,6 +2880,7 @@ static int tb_switch_lane_bonding_disable(struct tb_switch *sw) return tb_port_wait_for_link_width(down, TB_LINK_WIDTH_SINGLE, 100); } +/* Note updating sw->link_width done in tb_switch_update_link_attributes() */ static int tb_switch_asym_enable(struct tb_switch *sw, enum tb_link_width width) { struct tb_port *up, *down, *port; @@ -2919,10 +2920,10 @@ static int tb_switch_asym_enable(struct tb_switch *sw, enum tb_link_width width) return ret; } - sw->link_width = width; return 0; } +/* Note updating sw->link_width done in tb_switch_update_link_attributes() */ static int tb_switch_asym_disable(struct tb_switch *sw) { struct tb_port *up, *down; @@ -2957,7 +2958,6 @@ static int tb_switch_asym_disable(struct tb_switch *sw) return ret; } - sw->link_width = TB_LINK_WIDTH_DUAL; return 0; } From 480713b1ba8eac4617936f8404da34bda991c30e Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 13 Nov 2023 12:49:13 +0200 Subject: [PATCH 032/234] thunderbolt: Only add device router DP IN to the head of the DP resource list When pairing DP IN and DP OUT adapters for DisplayPort tunneling, we should prioritize the possible external GPU DP IN adapters to take advantage of the its capabilities. However the commit in question did this for host router DP IN adapters too and that changes ordering of the initial DP IN resources in such way that resuming from suspend may end up using different resource and that may confuse the user. Fix this so that we only put DP IN adapters of device routers to the top of the resource list and leave host routers as is. Fixes: 274baf695b08 ("thunderbolt: Add DP IN added last in the head of the list of DP resources") Reported-by: Pengfei Xu Signed-off-by: Mika Westerberg --- drivers/thunderbolt/tb.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c index 5acdeb766860..fd49f86e0353 100644 --- a/drivers/thunderbolt/tb.c +++ b/drivers/thunderbolt/tb.c @@ -213,7 +213,17 @@ static void tb_add_dp_resources(struct tb_switch *sw) if (!tb_switch_query_dp_resource(sw, port)) continue; - list_add(&port->list, &tcm->dp_resources); + /* + * If DP IN on device router exist, position it at the + * beginning of the DP resources list, so that it is used + * before DP IN of the host router. This way external GPU(s) + * will be prioritized when pairing DP IN to a DP OUT. + */ + if (tb_route(sw)) + list_add(&port->list, &tcm->dp_resources); + else + list_add_tail(&port->list, &tcm->dp_resources); + tb_port_dbg(port, "DP IN resource available\n"); } } From 914fa861e3d7803c9bbafc229652c2a69edb8b60 Mon Sep 17 00:00:00 2001 From: Ferry Meng Date: Thu, 9 Nov 2023 19:18:22 +0800 Subject: [PATCH 033/234] erofs: simplify erofs_read_inode() After commit 1c7f49a76773 ("erofs: tidy up EROFS on-disk naming"), there is a unique `union erofs_inode_i_u` so that we could parse the union directly. Besides, it also replaces `inode->i_sb` with `sb` for simplicity. Signed-off-by: Ferry Meng Reviewed-by: Gao Xiang Reviewed-by: Yue Hu Reviewed-by: Chao Yu Link: https://lore.kernel.org/r/20231109111822.17944-1-mengferry@linux.alibaba.com Signed-off-by: Gao Xiang --- fs/erofs/inode.c | 98 +++++++++++++++++------------------------------- 1 file changed, 35 insertions(+), 63 deletions(-) diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index b8ad05b4509d..14a79d3226ab 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -15,11 +15,11 @@ static void *erofs_read_inode(struct erofs_buf *buf, struct erofs_sb_info *sbi = EROFS_SB(sb); struct erofs_inode *vi = EROFS_I(inode); const erofs_off_t inode_loc = erofs_iloc(inode); - erofs_blk_t blkaddr, nblks = 0; void *kaddr; struct erofs_inode_compact *dic; struct erofs_inode_extended *die, *copied = NULL; + union erofs_inode_i_u iu; unsigned int ifmt; int err; @@ -35,9 +35,8 @@ static void *erofs_read_inode(struct erofs_buf *buf, dic = kaddr + *ofs; ifmt = le16_to_cpu(dic->i_format); - if (ifmt & ~EROFS_I_ALL) { - erofs_err(inode->i_sb, "unsupported i_format %u of nid %llu", + erofs_err(sb, "unsupported i_format %u of nid %llu", ifmt, vi->nid); err = -EOPNOTSUPP; goto err_out; @@ -45,7 +44,7 @@ static void *erofs_read_inode(struct erofs_buf *buf, vi->datalayout = erofs_inode_datalayout(ifmt); if (vi->datalayout >= EROFS_INODE_DATALAYOUT_MAX) { - erofs_err(inode->i_sb, "unsupported datalayout %u of nid %llu", + erofs_err(sb, "unsupported datalayout %u of nid %llu", vi->datalayout, vi->nid); err = -EOPNOTSUPP; goto err_out; @@ -82,40 +81,15 @@ static void *erofs_read_inode(struct erofs_buf *buf, vi->xattr_isize = erofs_xattr_ibody_size(die->i_xattr_icount); inode->i_mode = le16_to_cpu(die->i_mode); - switch (inode->i_mode & S_IFMT) { - case S_IFREG: - case S_IFDIR: - case S_IFLNK: - vi->raw_blkaddr = le32_to_cpu(die->i_u.raw_blkaddr); - break; - case S_IFCHR: - case S_IFBLK: - inode->i_rdev = - new_decode_dev(le32_to_cpu(die->i_u.rdev)); - break; - case S_IFIFO: - case S_IFSOCK: - inode->i_rdev = 0; - break; - default: - goto bogusimode; - } + iu = die->i_u; i_uid_write(inode, le32_to_cpu(die->i_uid)); i_gid_write(inode, le32_to_cpu(die->i_gid)); set_nlink(inode, le32_to_cpu(die->i_nlink)); - - /* extended inode has its own timestamp */ + /* each extended inode has its own timestamp */ inode_set_ctime(inode, le64_to_cpu(die->i_mtime), le32_to_cpu(die->i_mtime_nsec)); inode->i_size = le64_to_cpu(die->i_size); - - /* total blocks for compressed files */ - if (erofs_inode_is_data_compressed(vi->datalayout)) - nblks = le32_to_cpu(die->i_u.compressed_blocks); - else if (vi->datalayout == EROFS_INODE_CHUNK_BASED) - /* fill chunked inode summary info */ - vi->chunkformat = le16_to_cpu(die->i_u.c.format); kfree(copied); copied = NULL; break; @@ -125,49 +99,51 @@ static void *erofs_read_inode(struct erofs_buf *buf, vi->xattr_isize = erofs_xattr_ibody_size(dic->i_xattr_icount); inode->i_mode = le16_to_cpu(dic->i_mode); - switch (inode->i_mode & S_IFMT) { - case S_IFREG: - case S_IFDIR: - case S_IFLNK: - vi->raw_blkaddr = le32_to_cpu(dic->i_u.raw_blkaddr); - break; - case S_IFCHR: - case S_IFBLK: - inode->i_rdev = - new_decode_dev(le32_to_cpu(dic->i_u.rdev)); - break; - case S_IFIFO: - case S_IFSOCK: - inode->i_rdev = 0; - break; - default: - goto bogusimode; - } + iu = dic->i_u; i_uid_write(inode, le16_to_cpu(dic->i_uid)); i_gid_write(inode, le16_to_cpu(dic->i_gid)); set_nlink(inode, le16_to_cpu(dic->i_nlink)); - /* use build time for compact inodes */ inode_set_ctime(inode, sbi->build_time, sbi->build_time_nsec); inode->i_size = le32_to_cpu(dic->i_size); - if (erofs_inode_is_data_compressed(vi->datalayout)) - nblks = le32_to_cpu(dic->i_u.compressed_blocks); - else if (vi->datalayout == EROFS_INODE_CHUNK_BASED) - vi->chunkformat = le16_to_cpu(dic->i_u.c.format); break; default: - erofs_err(inode->i_sb, - "unsupported on-disk inode version %u of nid %llu", + erofs_err(sb, "unsupported on-disk inode version %u of nid %llu", erofs_inode_version(ifmt), vi->nid); err = -EOPNOTSUPP; goto err_out; } - if (vi->datalayout == EROFS_INODE_CHUNK_BASED) { + switch (inode->i_mode & S_IFMT) { + case S_IFREG: + case S_IFDIR: + case S_IFLNK: + vi->raw_blkaddr = le32_to_cpu(iu.raw_blkaddr); + break; + case S_IFCHR: + case S_IFBLK: + inode->i_rdev = new_decode_dev(le32_to_cpu(iu.rdev)); + break; + case S_IFIFO: + case S_IFSOCK: + inode->i_rdev = 0; + break; + default: + erofs_err(sb, "bogus i_mode (%o) @ nid %llu", inode->i_mode, + vi->nid); + err = -EFSCORRUPTED; + goto err_out; + } + + /* total blocks for compressed files */ + if (erofs_inode_is_data_compressed(vi->datalayout)) { + nblks = le32_to_cpu(iu.compressed_blocks); + } else if (vi->datalayout == EROFS_INODE_CHUNK_BASED) { + /* fill chunked inode summary info */ + vi->chunkformat = le16_to_cpu(iu.c.format); if (vi->chunkformat & ~EROFS_CHUNK_FORMAT_ALL) { - erofs_err(inode->i_sb, - "unsupported chunk format %x of nid %llu", + erofs_err(sb, "unsupported chunk format %x of nid %llu", vi->chunkformat, vi->nid); err = -EOPNOTSUPP; goto err_out; @@ -191,10 +167,6 @@ static void *erofs_read_inode(struct erofs_buf *buf, inode->i_blocks = nblks << (sb->s_blocksize_bits - 9); return kaddr; -bogusimode: - erofs_err(inode->i_sb, "bogus i_mode (%o) @ nid %llu", - inode->i_mode, vi->nid); - err = -EFSCORRUPTED; err_out: DBG_BUGON(1); kfree(copied); From 8bd90b6ae7856dd5000b75691d905b39b9ea5d6b Mon Sep 17 00:00:00 2001 From: Jingbo Xu Date: Tue, 14 Nov 2023 15:07:04 +0800 Subject: [PATCH 034/234] erofs: fix NULL dereference of dif->bdev_handle in fscache mode Avoid NULL dereference of dif->bdev_handle, as dif->bdev_handle is NULL in fscache mode. BUG: kernel NULL pointer dereference, address: 0000000000000000 RIP: 0010:erofs_map_dev+0xbd/0x1c0 Call Trace: erofs_fscache_data_read_slice+0xa7/0x340 erofs_fscache_data_read+0x11/0x30 erofs_fscache_readahead+0xd9/0x100 read_pages+0x47/0x1f0 page_cache_ra_order+0x1e5/0x270 filemap_get_pages+0xf2/0x5f0 filemap_read+0xb8/0x2e0 vfs_read+0x18d/0x2b0 ksys_read+0x53/0xd0 do_syscall_64+0x42/0xf0 entry_SYSCALL_64_after_hwframe+0x6e/0x76 Reported-by: Yiqun Leng Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=7245 Fixes: 49845720080d ("erofs: Convert to use bdev_open_by_path()") Signed-off-by: Jingbo Xu Reviewed-by: Gao Xiang Reviewed-by: Yue Hu Reviewed-by: Chao Yu Link: https://lore.kernel.org/r/20231114070704.23398-1-jefflexu@linux.alibaba.com Signed-off-by: Gao Xiang --- fs/erofs/data.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/erofs/data.c b/fs/erofs/data.c index 029c761670bf..c98aeda8abb2 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -220,7 +220,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) up_read(&devs->rwsem); return 0; } - map->m_bdev = dif->bdev_handle->bdev; + map->m_bdev = dif->bdev_handle ? dif->bdev_handle->bdev : NULL; map->m_daxdev = dif->dax_dev; map->m_dax_part_off = dif->dax_part_off; map->m_fscache = dif->fscache; @@ -238,7 +238,8 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) if (map->m_pa >= startoff && map->m_pa < startoff + length) { map->m_pa -= startoff; - map->m_bdev = dif->bdev_handle->bdev; + map->m_bdev = dif->bdev_handle ? + dif->bdev_handle->bdev : NULL; map->m_daxdev = dif->dax_dev; map->m_dax_part_off = dif->dax_part_off; map->m_fscache = dif->fscache; From 62b241efff99fc4d88a86f1c67c7516e31f432a3 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Fri, 17 Nov 2023 16:53:29 +0800 Subject: [PATCH 035/234] MAINTAINERS: erofs: add EROFS webpage Add a new `W:` field of the EROFS entry points to the documentation site at . In addition, update the in-tree documentation and Kconfig too. Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20231117085329.1624223-1-hsiangkao@linux.alibaba.com --- Documentation/filesystems/erofs.rst | 4 ++++ MAINTAINERS | 1 + fs/erofs/Kconfig | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst index 57c6ae23b3fc..cc4626d6ee4f 100644 --- a/Documentation/filesystems/erofs.rst +++ b/Documentation/filesystems/erofs.rst @@ -91,6 +91,10 @@ compatibility checking tool (fsck.erofs), and a debugging tool (dump.erofs): - git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git +For more information, please also refer to the documentation site: + +- https://erofs.docs.kernel.org + Bugs and patches are welcome, please kindly help us and send to the following linux-erofs mailing list: diff --git a/MAINTAINERS b/MAINTAINERS index 97f51d5ec1cf..cf39d16ad22a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7855,6 +7855,7 @@ R: Yue Hu R: Jeffle Xu L: linux-erofs@lists.ozlabs.org S: Maintained +W: https://erofs.docs.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs.git F: Documentation/ABI/testing/sysfs-fs-erofs F: Documentation/filesystems/erofs.rst diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig index e540648dedc2..1d318f85232d 100644 --- a/fs/erofs/Kconfig +++ b/fs/erofs/Kconfig @@ -21,7 +21,7 @@ config EROFS_FS performance under extremely memory pressure without extra cost. See the documentation at - for more details. + and the web pages at for more details. If unsure, say N. From 27b13e209ddca5979847a1b57890e0372c1edcee Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 17 Nov 2023 10:35:22 +0800 Subject: [PATCH 036/234] blk-throttle: fix lockdep warning of "cgroup_mutex or RCU read lock required!" Inside blkg_for_each_descendant_pre(), both css_for_each_descendant_pre() and blkg_lookup() requires RCU read lock, and either cgroup_assert_mutex_or_rcu_locked() or rcu_read_lock_held() is called. Fix the warning by adding rcu read lock. Reported-by: Changhui Zhong Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20231117023527.3188627-2-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/blk-throttle.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 13e4377a8b28..16f5766620a4 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -1320,6 +1320,7 @@ static void tg_conf_updated(struct throtl_grp *tg, bool global) tg_bps_limit(tg, READ), tg_bps_limit(tg, WRITE), tg_iops_limit(tg, READ), tg_iops_limit(tg, WRITE)); + rcu_read_lock(); /* * Update has_rules[] flags for the updated tg's subtree. A tg is * considered to have rules if either the tg itself or any of its @@ -1347,6 +1348,7 @@ static void tg_conf_updated(struct throtl_grp *tg, bool global) this_tg->latency_target = max(this_tg->latency_target, parent_tg->latency_target); } + rcu_read_unlock(); /* * We're already holding queue_lock and know @tg is valid. Let's From 35a99d6557cacbc177314735342f77a2dda41872 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 17 Nov 2023 10:35:23 +0800 Subject: [PATCH 037/234] blk-cgroup: avoid to warn !rcu_read_lock_held() in blkg_lookup() So far, all callers either holds spin lock or rcu read explicitly, and most of the caller has added WARN_ON_ONCE(!rcu_read_lock_held()) or lockdep_assert_held(&disk->queue->queue_lock). Remove WARN_ON_ONCE(!rcu_read_lock_held()) from blkg_lookup() for killing the false positive warning from blkg_conf_prep(). Reported-by: Changhui Zhong Fixes: 83462a6c971c ("blkcg: Drop unnecessary RCU read [un]locks from blkg_conf_prep/finish()") Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20231117023527.3188627-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/blk-cgroup.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 624c03c8fe64..fd482439afbc 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -249,8 +249,6 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, { struct blkcg_gq *blkg; - WARN_ON_ONCE(!rcu_read_lock_held()); - if (blkcg == &blkcg_root) return q->root_blkg; From e63a57303599b17290cd8bc48e6f20b24289a8bc Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 17 Nov 2023 10:35:24 +0800 Subject: [PATCH 038/234] blk-cgroup: bypass blkcg_deactivate_policy after destroying blkcg_deactivate_policy() can be called after blkg_destroy_all() returns, and it isn't necessary since blkg_destroy_all has covered policy deactivation. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20231117023527.3188627-4-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 4a42ea2972ad..4b48c2c44098 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -577,6 +577,7 @@ static void blkg_destroy_all(struct gendisk *disk) struct request_queue *q = disk->queue; struct blkcg_gq *blkg, *n; int count = BLKG_DESTROY_BATCH_SIZE; + int i; restart: spin_lock_irq(&q->queue_lock); @@ -602,6 +603,18 @@ restart: } } + /* + * Mark policy deactivated since policy offline has been done, and + * the free is scheduled, so future blkcg_deactivate_policy() can + * be bypassed + */ + for (i = 0; i < BLKCG_MAX_POLS; i++) { + struct blkcg_policy *pol = blkcg_policy[i]; + + if (pol) + __clear_bit(pol->plid, q->blkcg_pols); + } + q->root_blkg = NULL; spin_unlock_irq(&q->queue_lock); } From 6965809e526917b73c8f9178173184dcf13cec4b Mon Sep 17 00:00:00 2001 From: Xuxin Xiong Date: Tue, 14 Nov 2023 12:42:05 +0800 Subject: [PATCH 039/234] drm/panel: auo,b101uan08.3: Fine tune the panel power sequence For "auo,b101uan08.3" this panel, it is stipulated in the panel spec that MIPI needs to keep the LP11 state before the lcm_reset pin is pulled high. Fixes: 56ad624b4cb5 ("drm/panel: support for auo, b101uan08.3 wuxga dsi video mode panel") Signed-off-by: Xuxin Xiong Reviewed-by: Douglas Anderson Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20231114044205.613421-1-xuxinxiong@huaqin.corp-partner.google.com --- drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c index 9323e7b9e384..a287be1aaf70 100644 --- a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c +++ b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c @@ -1709,6 +1709,7 @@ static const struct panel_desc auo_b101uan08_3_desc = { .mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_SYNC_PULSE | MIPI_DSI_MODE_LPM, .init_cmds = auo_b101uan08_3_init_cmd, + .lp11_before_reset = true, }; static const struct drm_display_mode boe_tv105wum_nw0_default_mode = { From 56466f653cb59a8f46e991ad1e285f43afdca7d4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 13 Oct 2023 11:25:15 +0300 Subject: [PATCH 040/234] drm/msm: remove unnecessary NULL check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This NULL check was required when it was added, but we shuffled the code around and now it's not. The inconsistent NULL checking triggers a Smatch warning: drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c:847 mdp5_init() warn: variable dereferenced before check 'mdp5_kms' (see line 782) Fixes: 1f50db2f3e1e ("drm/msm/mdp5: move resource allocation to the _probe function") Signed-off-by: Dan Carpenter Reviewed-by: Uwe Kleine-König Reviewed-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/562559/ Link: https://lore.kernel.org/r/ZSj+6/J6YsoSpLak@kadam Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c index a28fbcd09684..40cda31c8ed1 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c @@ -844,8 +844,7 @@ static int mdp5_init(struct platform_device *pdev, struct drm_device *dev) return 0; fail: - if (mdp5_kms) - mdp5_destroy(mdp5_kms); + mdp5_destroy(mdp5_kms); return ret; } From 8a924db2d7b5eb69ba08b1a0af46e9f1359a9bdf Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Mon, 2 Oct 2023 08:57:33 -0400 Subject: [PATCH 041/234] fs: Pass AT_GETATTR_NOSEC flag to getattr interface function When vfs_getattr_nosec() calls a filesystem's getattr interface function then the 'nosec' should propagate into this function so that vfs_getattr_nosec() can again be called from the filesystem's gettattr rather than vfs_getattr(). The latter would add unnecessary security checks that the initial vfs_getattr_nosec() call wanted to avoid. Therefore, introduce the getattr flag GETATTR_NOSEC and allow to pass with the new getattr_flags parameter to the getattr interface function. In overlayfs and ecryptfs use this flag to determine which one of the two functions to call. In a recent code change introduced to IMA vfs_getattr_nosec() ended up calling vfs_getattr() in overlayfs, which in turn called security_inode_getattr() on an exiting process that did not have current->fs set anymore, which then caused a kernel NULL pointer dereference. With this change the call to security_inode_getattr() can be avoided, thus avoiding the NULL pointer dereference. Reported-by: Fixes: db1d1e8b9867 ("IMA: use vfs_getattr_nosec to get the i_version") Cc: Alexander Viro Cc: Cc: Miklos Szeredi Cc: Amir Goldstein Cc: Tyler Hicks Cc: Mimi Zohar Suggested-by: Christian Brauner Co-developed-by: Amir Goldstein Signed-off-by: Stefan Berger Link: https://lore.kernel.org/r/20231002125733.1251467-1-stefanb@linux.vnet.ibm.com Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- fs/ecryptfs/inode.c | 12 ++++++++++-- fs/overlayfs/inode.c | 10 +++++----- fs/overlayfs/overlayfs.h | 8 ++++++++ fs/stat.c | 6 +++++- include/uapi/linux/fcntl.h | 3 +++ 5 files changed, 31 insertions(+), 8 deletions(-) diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index a25dd3d20008..b0e8774c435a 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -998,6 +998,14 @@ static int ecryptfs_getattr_link(struct mnt_idmap *idmap, return rc; } +static int ecryptfs_do_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) +{ + if (flags & AT_GETATTR_NOSEC) + return vfs_getattr_nosec(path, stat, request_mask, flags); + return vfs_getattr(path, stat, request_mask, flags); +} + static int ecryptfs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) @@ -1006,8 +1014,8 @@ static int ecryptfs_getattr(struct mnt_idmap *idmap, struct kstat lower_stat; int rc; - rc = vfs_getattr(ecryptfs_dentry_to_lower_path(dentry), &lower_stat, - request_mask, flags); + rc = ecryptfs_do_getattr(ecryptfs_dentry_to_lower_path(dentry), + &lower_stat, request_mask, flags); if (!rc) { fsstack_copy_attr_all(d_inode(dentry), ecryptfs_inode_to_lower(d_inode(dentry))); diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 345b8f161ca4..c63b31a460be 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -171,7 +171,7 @@ int ovl_getattr(struct mnt_idmap *idmap, const struct path *path, type = ovl_path_real(dentry, &realpath); old_cred = ovl_override_creds(dentry->d_sb); - err = vfs_getattr(&realpath, stat, request_mask, flags); + err = ovl_do_getattr(&realpath, stat, request_mask, flags); if (err) goto out; @@ -196,8 +196,8 @@ int ovl_getattr(struct mnt_idmap *idmap, const struct path *path, (!is_dir ? STATX_NLINK : 0); ovl_path_lower(dentry, &realpath); - err = vfs_getattr(&realpath, &lowerstat, - lowermask, flags); + err = ovl_do_getattr(&realpath, &lowerstat, lowermask, + flags); if (err) goto out; @@ -249,8 +249,8 @@ int ovl_getattr(struct mnt_idmap *idmap, const struct path *path, ovl_path_lowerdata(dentry, &realpath); if (realpath.dentry) { - err = vfs_getattr(&realpath, &lowerdatastat, - lowermask, flags); + err = ovl_do_getattr(&realpath, &lowerdatastat, + lowermask, flags); if (err) goto out; } else { diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index ca88b2636a57..05c3dd597fa8 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -408,6 +408,14 @@ static inline bool ovl_open_flags_need_copy_up(int flags) return ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC)); } +static inline int ovl_do_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) +{ + if (flags & AT_GETATTR_NOSEC) + return vfs_getattr_nosec(path, stat, request_mask, flags); + return vfs_getattr(path, stat, request_mask, flags); +} + /* util.c */ int ovl_get_write_access(struct dentry *dentry); void ovl_put_write_access(struct dentry *dentry); diff --git a/fs/stat.c b/fs/stat.c index 24bb0209e459..f721d26ec3f7 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -133,7 +133,8 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat, idmap = mnt_idmap(path->mnt); if (inode->i_op->getattr) return inode->i_op->getattr(idmap, path, stat, - request_mask, query_flags); + request_mask, + query_flags | AT_GETATTR_NOSEC); generic_fillattr(idmap, request_mask, inode, stat); return 0; @@ -166,6 +167,9 @@ int vfs_getattr(const struct path *path, struct kstat *stat, { int retval; + if (WARN_ON_ONCE(query_flags & AT_GETATTR_NOSEC)) + return -EPERM; + retval = security_inode_getattr(path); if (retval) return retval; diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index 6c80f96049bd..282e90aeb163 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -116,5 +116,8 @@ #define AT_HANDLE_FID AT_REMOVEDIR /* file handle is needed to compare object identity and may not be usable to open_by_handle_at(2) */ +#if defined(__KERNEL__) +#define AT_GETATTR_NOSEC 0x80000000 +#endif #endif /* _UAPI_LINUX_FCNTL_H */ From fe2c34bab6d46469ad3095955dc37e984dc24e38 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 17 Nov 2023 13:38:46 -0800 Subject: [PATCH 042/234] iov_iter: fix copy_page_to_iter_nofault() The recent conversion to inline functions made two mistakes: 1. It tries to copy the full amount requested (bytes), not just what's available in the kmap'd page (n). 2. It's not applying the offset in the first page. Note that copy_page_to_iter_nofault() is only used by /proc/kcore. This was detected by drgn's test suite. Fixes: f1982740f5e7 ("iov_iter: Convert iterate*() to inline funcs") Signed-off-by: Omar Sandoval Link: https://lore.kernel.org/r/c1616e06b5248013cbbb1881bb4fef85a7a69ccb.1700257019.git.osandov@fb.com Acked-by: David Howells Signed-off-by: Christian Brauner --- lib/iov_iter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index de7d11cf4c63..8ff6824a1005 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -409,7 +409,7 @@ size_t copy_page_to_iter_nofault(struct page *page, unsigned offset, size_t byte void *kaddr = kmap_local_page(page); size_t n = min(bytes, (size_t)PAGE_SIZE - offset); - n = iterate_and_advance(i, bytes, kaddr, + n = iterate_and_advance(i, n, kaddr + offset, copy_to_user_iter_nofault, memcpy_to_iter); kunmap_local(kaddr); From 06fc41b09cfbc02977acd9189473593a37d82d9b Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 9 Oct 2023 00:33:15 +0200 Subject: [PATCH 043/234] drm/panel: simple: Fix Innolux G101ICE-L01 bus flags Add missing .bus_flags = DRM_BUS_FLAG_DE_HIGH to this panel description, ones which match both the datasheet and the panel display_timing flags . Fixes: 1e29b840af9f ("drm/panel: simple: Add Innolux G101ICE-L01 panel") Signed-off-by: Marek Vasut Reviewed-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20231008223315.279215-1-marex@denx.de --- drivers/gpu/drm/panel/panel-simple.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 6cd32b909087..086750f99d27 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -2402,6 +2402,7 @@ static const struct panel_desc innolux_g101ice_l01 = { .disable = 200, }, .bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG, + .bus_flags = DRM_BUS_FLAG_DE_HIGH, .connector_type = DRM_MODE_CONNECTOR_LVDS, }; From 3f9a91b6c00e655d27bd785dcda1742dbdc31bda Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 9 Oct 2023 00:32:56 +0200 Subject: [PATCH 044/234] drm/panel: simple: Fix Innolux G101ICE-L01 timings The Innolux G101ICE-L01 datasheet [1] page 17 table 6.1 INPUT SIGNAL TIMING SPECIFICATIONS indicates that maximum vertical blanking time is 40 lines. Currently the driver uses 29 lines. Fix it, and since this panel is a DE panel, adjust the timings to make them less hostile to controllers which cannot do 1 px HSA/VSA, distribute the delays evenly between all three parts. [1] https://www.data-modul.com/sites/default/files/products/G101ICE-L01-C2-specification-12042389.pdf Fixes: 1e29b840af9f ("drm/panel: simple: Add Innolux G101ICE-L01 panel") Signed-off-by: Marek Vasut Reviewed-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20231008223256.279196-1-marex@denx.de --- drivers/gpu/drm/panel/panel-simple.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 086750f99d27..9367a4572dcf 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -2379,13 +2379,13 @@ static const struct panel_desc innolux_g070y2_t02 = { static const struct display_timing innolux_g101ice_l01_timing = { .pixelclock = { 60400000, 71100000, 74700000 }, .hactive = { 1280, 1280, 1280 }, - .hfront_porch = { 41, 80, 100 }, - .hback_porch = { 40, 79, 99 }, - .hsync_len = { 1, 1, 1 }, + .hfront_porch = { 30, 60, 70 }, + .hback_porch = { 30, 60, 70 }, + .hsync_len = { 22, 40, 60 }, .vactive = { 800, 800, 800 }, - .vfront_porch = { 5, 11, 14 }, - .vback_porch = { 4, 11, 14 }, - .vsync_len = { 1, 1, 1 }, + .vfront_porch = { 3, 8, 14 }, + .vback_porch = { 3, 8, 14 }, + .vsync_len = { 4, 7, 12 }, .flags = DISPLAY_FLAGS_DE_HIGH, }; From 8ba2c459668cfe2aaacc5ebcd35b4b9ef8643013 Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Fri, 17 Nov 2023 18:11:08 +0800 Subject: [PATCH 045/234] net: wangxun: fix kernel panic due to null pointer When the device uses a custom subsystem vendor ID, the function wx_sw_init() returns before the memory of 'wx->mac_table' is allocated. The null pointer will causes the kernel panic. Fixes: 79625f45ca73 ("net: wangxun: Move MAC address handling to libwx") Signed-off-by: Jiawen Wu Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/wangxun/libwx/wx_hw.c | 8 +++++--- drivers/net/ethernet/wangxun/ngbe/ngbe_main.c | 4 +--- drivers/net/ethernet/wangxun/txgbe/txgbe_main.c | 4 +--- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index a3c5de9d547a..533e912af089 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -1769,10 +1769,12 @@ int wx_sw_init(struct wx *wx) wx->subsystem_device_id = pdev->subsystem_device; } else { err = wx_flash_read_dword(wx, 0xfffdc, &ssid); - if (!err) - wx->subsystem_device_id = swab16((u16)ssid); + if (err < 0) { + wx_err(wx, "read of internal subsystem device id failed\n"); + return err; + } - return err; + wx->subsystem_device_id = swab16((u16)ssid); } wx->mac_table = kcalloc(wx->mac.num_rar_entries, diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index 3d43f808c86b..8db804543e66 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c @@ -121,10 +121,8 @@ static int ngbe_sw_init(struct wx *wx) /* PCI config space info */ err = wx_sw_init(wx); - if (err < 0) { - wx_err(wx, "read of internal subsystem device id failed\n"); + if (err < 0) return err; - } /* mac type, phy type , oem type */ ngbe_init_type_code(wx); diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index 70f0b5c01dac..526250102db2 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -364,10 +364,8 @@ static int txgbe_sw_init(struct wx *wx) /* PCI config space info */ err = wx_sw_init(wx); - if (err < 0) { - wx_err(wx, "read of internal subsystem device id failed\n"); + if (err < 0) return err; - } txgbe_init_type_code(wx); From 93da8d75a66568ba4bb5b14ad2833acd7304cd02 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 17 Nov 2023 14:17:33 +0000 Subject: [PATCH 046/234] wireguard: use DEV_STATS_INC() wg_xmit() can be called concurrently, KCSAN reported [1] some device stats updates can be lost. Use DEV_STATS_INC() for this unlikely case. [1] BUG: KCSAN: data-race in wg_xmit / wg_xmit read-write to 0xffff888104239160 of 8 bytes by task 1375 on cpu 0: wg_xmit+0x60f/0x680 drivers/net/wireguard/device.c:231 __netdev_start_xmit include/linux/netdevice.h:4918 [inline] netdev_start_xmit include/linux/netdevice.h:4932 [inline] xmit_one net/core/dev.c:3543 [inline] dev_hard_start_xmit+0x11b/0x3f0 net/core/dev.c:3559 ... read-write to 0xffff888104239160 of 8 bytes by task 1378 on cpu 1: wg_xmit+0x60f/0x680 drivers/net/wireguard/device.c:231 __netdev_start_xmit include/linux/netdevice.h:4918 [inline] netdev_start_xmit include/linux/netdevice.h:4932 [inline] xmit_one net/core/dev.c:3543 [inline] dev_hard_start_xmit+0x11b/0x3f0 net/core/dev.c:3559 ... v2: also change wg_packet_consume_data_done() (Hangbin Liu) and wg_packet_purge_staged_packets() Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Jason A. Donenfeld Cc: Hangbin Liu Signed-off-by: Jason A. Donenfeld Reviewed-by: Hangbin Liu Signed-off-by: David S. Miller --- drivers/net/wireguard/device.c | 4 ++-- drivers/net/wireguard/receive.c | 12 ++++++------ drivers/net/wireguard/send.c | 3 ++- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c index 258dcc103921..deb9636b0ecf 100644 --- a/drivers/net/wireguard/device.c +++ b/drivers/net/wireguard/device.c @@ -210,7 +210,7 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev) */ while (skb_queue_len(&peer->staged_packet_queue) > MAX_STAGED_PACKETS) { dev_kfree_skb(__skb_dequeue(&peer->staged_packet_queue)); - ++dev->stats.tx_dropped; + DEV_STATS_INC(dev, tx_dropped); } skb_queue_splice_tail(&packets, &peer->staged_packet_queue); spin_unlock_bh(&peer->staged_packet_queue.lock); @@ -228,7 +228,7 @@ err_icmp: else if (skb->protocol == htons(ETH_P_IPV6)) icmpv6_ndo_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); err: - ++dev->stats.tx_errors; + DEV_STATS_INC(dev, tx_errors); kfree_skb(skb); return ret; } diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c index 0b3f0c843550..a176653c8861 100644 --- a/drivers/net/wireguard/receive.c +++ b/drivers/net/wireguard/receive.c @@ -416,20 +416,20 @@ dishonest_packet_peer: net_dbg_skb_ratelimited("%s: Packet has unallowed src IP (%pISc) from peer %llu (%pISpfsc)\n", dev->name, skb, peer->internal_id, &peer->endpoint.addr); - ++dev->stats.rx_errors; - ++dev->stats.rx_frame_errors; + DEV_STATS_INC(dev, rx_errors); + DEV_STATS_INC(dev, rx_frame_errors); goto packet_processed; dishonest_packet_type: net_dbg_ratelimited("%s: Packet is neither ipv4 nor ipv6 from peer %llu (%pISpfsc)\n", dev->name, peer->internal_id, &peer->endpoint.addr); - ++dev->stats.rx_errors; - ++dev->stats.rx_frame_errors; + DEV_STATS_INC(dev, rx_errors); + DEV_STATS_INC(dev, rx_frame_errors); goto packet_processed; dishonest_packet_size: net_dbg_ratelimited("%s: Packet has incorrect size from peer %llu (%pISpfsc)\n", dev->name, peer->internal_id, &peer->endpoint.addr); - ++dev->stats.rx_errors; - ++dev->stats.rx_length_errors; + DEV_STATS_INC(dev, rx_errors); + DEV_STATS_INC(dev, rx_length_errors); goto packet_processed; packet_processed: dev_kfree_skb(skb); diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c index 95c853b59e1d..0d48e0f4a1ba 100644 --- a/drivers/net/wireguard/send.c +++ b/drivers/net/wireguard/send.c @@ -333,7 +333,8 @@ err: void wg_packet_purge_staged_packets(struct wg_peer *peer) { spin_lock_bh(&peer->staged_packet_queue.lock); - peer->device->dev->stats.tx_dropped += peer->staged_packet_queue.qlen; + DEV_STATS_ADD(peer->device->dev, tx_dropped, + peer->staged_packet_queue.qlen); __skb_queue_purge(&peer->staged_packet_queue); spin_unlock_bh(&peer->staged_packet_queue.lock); } From 5f228d7c8a539714c1e9b7e7534f76bb7979f268 Mon Sep 17 00:00:00 2001 From: Suman Ghosh Date: Fri, 17 Nov 2023 16:10:18 +0530 Subject: [PATCH 047/234] octeontx2-pf: Fix memory leak during interface down During 'ifconfig down' one RSS memory was not getting freed. This patch fixes the same. Fixes: 81a4362016e7 ("octeontx2-pf: Add RSS multi group support") Signed-off-by: Suman Ghosh Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 91b99fd70361..ba95ac913274 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1934,6 +1934,8 @@ int otx2_stop(struct net_device *netdev) /* Clear RSS enable flag */ rss = &pf->hw.rss_info; rss->enable = false; + if (!netif_is_rxfh_configured(netdev)) + kfree(rss->rss_ctx[DEFAULT_RSS_CONTEXT_GROUP]); /* Cleanup Queue IRQ */ vec = pci_irq_vector(pf->pdev, From 938dbead34cd139c50c5d51cc58e18ef2f1c3d2c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 18 Nov 2023 19:30:06 -0800 Subject: [PATCH 048/234] net: fill in MODULE_DESCRIPTION()s for SOCK_DIAG modules W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to all the sock diag modules in one fell swoop. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/ipv4/inet_diag.c | 1 + net/ipv4/raw_diag.c | 1 + net/ipv4/tcp_diag.c | 1 + net/ipv4/udp_diag.c | 1 + net/mptcp/mptcp_diag.c | 1 + net/packet/diag.c | 1 + net/sctp/diag.c | 1 + net/smc/smc_diag.c | 1 + net/tipc/diag.c | 1 + net/unix/diag.c | 1 + net/vmw_vsock/diag.c | 1 + net/xdp/xsk_diag.c | 1 + 12 files changed, 12 insertions(+) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index f01aee832aab..7d0e7aaa71e0 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -1481,5 +1481,6 @@ static void __exit inet_diag_exit(void) module_init(inet_diag_init); module_exit(inet_diag_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("INET/INET6: socket monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2 /* AF_INET */); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10 /* AF_INET6 */); diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c index 63a40e4b678f..fe2140c8375c 100644 --- a/net/ipv4/raw_diag.c +++ b/net/ipv4/raw_diag.c @@ -257,5 +257,6 @@ static void __exit raw_diag_exit(void) module_init(raw_diag_init); module_exit(raw_diag_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("RAW socket monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-255 /* AF_INET - IPPROTO_RAW */); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10-255 /* AF_INET6 - IPPROTO_RAW */); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 01b50fa79189..4cbe4b44425a 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -247,4 +247,5 @@ static void __exit tcp_diag_exit(void) module_init(tcp_diag_init); module_exit(tcp_diag_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("TCP socket monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-6 /* AF_INET - IPPROTO_TCP */); diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index de3f2d31f510..dc41a22ee80e 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -296,5 +296,6 @@ static void __exit udp_diag_exit(void) module_init(udp_diag_init); module_exit(udp_diag_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("UDP socket monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-17 /* AF_INET - IPPROTO_UDP */); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-136 /* AF_INET - IPPROTO_UDPLITE */); diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c index 8df1bdb647e2..5409c2ea3f57 100644 --- a/net/mptcp/mptcp_diag.c +++ b/net/mptcp/mptcp_diag.c @@ -245,4 +245,5 @@ static void __exit mptcp_diag_exit(void) module_init(mptcp_diag_init); module_exit(mptcp_diag_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("MPTCP socket monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-262 /* AF_INET - IPPROTO_MPTCP */); diff --git a/net/packet/diag.c b/net/packet/diag.c index f6b200cb3c06..9a7980e3309d 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -262,4 +262,5 @@ static void __exit packet_diag_exit(void) module_init(packet_diag_init); module_exit(packet_diag_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("PACKET socket monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 17 /* AF_PACKET */); diff --git a/net/sctp/diag.c b/net/sctp/diag.c index c3d6b92dd386..eb05131ff1dd 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -527,4 +527,5 @@ static void __exit sctp_diag_exit(void) module_init(sctp_diag_init); module_exit(sctp_diag_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SCTP socket monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-132); diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index 7ff2152971a5..a584613aca12 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -268,5 +268,6 @@ static void __exit smc_diag_exit(void) module_init(smc_diag_init); module_exit(smc_diag_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SMC socket monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 43 /* AF_SMC */); MODULE_ALIAS_GENL_FAMILY(SMCR_GENL_FAMILY_NAME); diff --git a/net/tipc/diag.c b/net/tipc/diag.c index 73137f4aeb68..18733451c9e0 100644 --- a/net/tipc/diag.c +++ b/net/tipc/diag.c @@ -113,4 +113,5 @@ module_init(tipc_diag_init); module_exit(tipc_diag_exit); MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("TIPC socket monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_TIPC); diff --git a/net/unix/diag.c b/net/unix/diag.c index 616b55c5b890..bec09a3a1d44 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -339,4 +339,5 @@ static void __exit unix_diag_exit(void) module_init(unix_diag_init); module_exit(unix_diag_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("UNIX socket monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 1 /* AF_LOCAL */); diff --git a/net/vmw_vsock/diag.c b/net/vmw_vsock/diag.c index a2823b1c5e28..2e29994f92ff 100644 --- a/net/vmw_vsock/diag.c +++ b/net/vmw_vsock/diag.c @@ -174,5 +174,6 @@ static void __exit vsock_diag_exit(void) module_init(vsock_diag_init); module_exit(vsock_diag_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("VMware Virtual Sockets monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 40 /* AF_VSOCK */); diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c index 22b36c8143cf..9f8955367275 100644 --- a/net/xdp/xsk_diag.c +++ b/net/xdp/xsk_diag.c @@ -211,4 +211,5 @@ static void __exit xsk_diag_exit(void) module_init(xsk_diag_init); module_exit(xsk_diag_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("XDP socket monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_XDP); From a6925165ea82b7765269ddd8dcad57c731aa00de Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Tue, 31 Oct 2023 04:00:07 +0000 Subject: [PATCH 049/234] ata: pata_isapnp: Add missing error check for devm_ioport_map() Add missing error return check for devm_ioport_map() and return the error if this function call fails. Fixes: 0d5ff566779f ("libata: convert to iomap") Signed-off-by: Chen Ni Reviewed-by: Sergey Shtylyov Signed-off-by: Damien Le Moal --- drivers/ata/pata_isapnp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/ata/pata_isapnp.c b/drivers/ata/pata_isapnp.c index 25a63d043c8e..0f77e0424066 100644 --- a/drivers/ata/pata_isapnp.c +++ b/drivers/ata/pata_isapnp.c @@ -82,6 +82,9 @@ static int isapnp_init_one(struct pnp_dev *idev, const struct pnp_device_id *dev if (pnp_port_valid(idev, 1)) { ctl_addr = devm_ioport_map(&idev->dev, pnp_port_start(idev, 1), 1); + if (!ctl_addr) + return -ENOMEM; + ap->ioaddr.altstatus_addr = ctl_addr; ap->ioaddr.ctl_addr = ctl_addr; ap->ops = &isapnp_port_ops; From 45b478951b2ba5aea70b2850c49c1aa83aedd0d2 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 17 Nov 2023 15:56:30 -0800 Subject: [PATCH 050/234] md: fix bi_status reporting in md_end_clone_io md_end_clone_io() may overwrite error status in orig_bio->bi_status with BLK_STS_OK. This could happen when orig_bio has BIO_CHAIN (split by md_submit_bio => bio_split_to_limits, for example). As a result, upper layer may miss error reported from md (or the device) and consider the failed IO was successful. Fix this by only update orig_bio->bi_status when current bio reports error and orig_bio is BLK_STS_OK. This is the same behavior as __bio_chain_endio(). Fixes: 10764815ff47 ("md: add io accounting for raid0 and raid5") Cc: stable@vger.kernel.org # v5.14+ Reported-by: Bhanu Victor DiCara <00bvd0+linux@gmail.com> Closes: https://lore.kernel.org/regressions/5727380.DvuYhMxLoT@bvd0/ Signed-off-by: Song Liu Tested-by: Xiao Ni Reviewed-by: Yu Kuai Acked-by: Guoqing Jiang --- drivers/md/md.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 4ee4593c874a..c94373d64f2c 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8666,7 +8666,8 @@ static void md_end_clone_io(struct bio *bio) struct bio *orig_bio = md_io_clone->orig_bio; struct mddev *mddev = md_io_clone->mddev; - orig_bio->bi_status = bio->bi_status; + if (bio->bi_status && !orig_bio->bi_status) + orig_bio->bi_status = bio->bi_status; if (md_io_clone->start_time) bio_end_io_acct(orig_bio, md_io_clone->start_time); From 018903e1cec3421a6198589fabd30682eb277904 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 30 Oct 2023 01:13:08 +0200 Subject: [PATCH 051/234] drm/i915/dp_mst: Fix race between connector registration and setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After drm_connector_init() is called the connector is visible to the rest of the kernel via the drm_mode_config::connector_list. Make sure that the DSC AUX device and capabilities are setup by that time. Another race condition is adding the connector to the connector list before drm_connector_helper_add() sets the connector helper functions. That's an unrelated issue, for which the fix is for a follow-up. One solution would be adding the connector to the connector list only during its registration in drm_connector_register(). Cc: Stanislav Lisovskiy Cc: Ville Syrjälä Fixes: 808b43fa7e56 ("drm/i915/dp_mst: Set connector DSC capabilities and decompression AUX") Reviewed-by: Stanislav Lisovskiy Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20231030155843.2251023-2-imre.deak@intel.com (cherry picked from commit 560ea72c76eb6d0c59f77580414e64cc09f1093d) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 7b4628f4f124..851b312bd844 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -1161,6 +1161,14 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo intel_connector->port = port; drm_dp_mst_get_port_malloc(port); + /* + * TODO: set the AUX for the actual MST port decompressing the stream. + * At the moment the driver only supports enabling this globally in the + * first downstream MST branch, via intel_dp's (root port) AUX. + */ + intel_connector->dp.dsc_decompression_aux = &intel_dp->aux; + intel_dp_mst_read_decompression_port_dsc_caps(intel_dp, intel_connector); + connector = &intel_connector->base; ret = drm_connector_init(dev, connector, &intel_dp_mst_connector_funcs, DRM_MODE_CONNECTOR_DisplayPort); @@ -1172,14 +1180,6 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo drm_connector_helper_add(connector, &intel_dp_mst_connector_helper_funcs); - /* - * TODO: set the AUX for the actual MST port decompressing the stream. - * At the moment the driver only supports enabling this globally in the - * first downstream MST branch, via intel_dp's (root port) AUX. - */ - intel_connector->dp.dsc_decompression_aux = &intel_dp->aux; - intel_dp_mst_read_decompression_port_dsc_caps(intel_dp, intel_connector); - for_each_pipe(dev_priv, pipe) { struct drm_encoder *enc = &intel_dp->mst_encoders[pipe]->base.base; From 0561794b6b642b84b879bf97061c4b4fa692839e Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Wed, 15 Nov 2023 11:54:03 +0100 Subject: [PATCH 052/234] drm/i915: do not clean GT table on error path The only task of intel_gt_release_all is to zero gt table. Calling it on error path prevents intel_gt_driver_late_release_all (called from i915_driver_late_release) to cleanup GTs, causing leakage. After i915_driver_late_release GT array is not used anymore so it does not need cleaning at all. Sample leak report: BUG i915_request (...): Objects remaining in i915_request on __kmem_cache_shutdown() ... Object 0xffff888113420040 @offset=64 Allocated in __i915_request_create+0x75/0x610 [i915] age=18339 cpu=1 pid=1454 kmem_cache_alloc+0x25b/0x270 __i915_request_create+0x75/0x610 [i915] i915_request_create+0x109/0x290 [i915] __engines_record_defaults+0xca/0x440 [i915] intel_gt_init+0x275/0x430 [i915] i915_gem_init+0x135/0x2c0 [i915] i915_driver_probe+0x8d1/0xdc0 [i915] v2: removed whole intel_gt_release_all Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8489 Fixes: bec68cc9ea42 ("drm/i915: Prepare for multiple GTs") Signed-off-by: Andrzej Hajda Reviewed-by: Tvrtko Ursulin Reviewed-by: Nirmoy Das Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20231115-dont_clean_gt_on_error_path-v2-1-54250125470a@intel.com (cherry picked from commit e899505533852bf1da133f2f4c9a9655ff77f7e5) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_gt.c | 11 ----------- drivers/gpu/drm/i915/i915_driver.c | 4 +--- 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index ed32bf5b1546..ba1186fc524f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -982,8 +982,6 @@ int intel_gt_probe_all(struct drm_i915_private *i915) err: i915_probe_error(i915, "Failed to initialize %s! (%d)\n", gtdef->name, ret); - intel_gt_release_all(i915); - return ret; } @@ -1002,15 +1000,6 @@ int intel_gt_tiles_init(struct drm_i915_private *i915) return 0; } -void intel_gt_release_all(struct drm_i915_private *i915) -{ - struct intel_gt *gt; - unsigned int id; - - for_each_gt(gt, i915, id) - i915->gt[id] = NULL; -} - void intel_gt_info_print(const struct intel_gt_info *info, struct drm_printer *p) { diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 8a0e2c745e1f..802de2c6decb 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -782,7 +782,7 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) ret = i915_driver_mmio_probe(i915); if (ret < 0) - goto out_tiles_cleanup; + goto out_runtime_pm_put; ret = i915_driver_hw_probe(i915); if (ret < 0) @@ -842,8 +842,6 @@ out_cleanup_hw: i915_ggtt_driver_late_release(i915); out_cleanup_mmio: i915_driver_mmio_release(i915); -out_tiles_cleanup: - intel_gt_release_all(i915); out_runtime_pm_put: enable_rpm_wakeref_asserts(&i915->runtime_pm); i915_driver_late_release(i915); From 66917f85db6002ed09cd24186258892fcfca64b6 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Mon, 20 Nov 2023 06:53:19 +0800 Subject: [PATCH 053/234] autofs: add: new_inode check in autofs_fill_super() Add missing NULL check of root_inode in autofs_fill_super(). While we are at it simplify the logic by taking advantage of the VFS cleanup procedures and get rid of the goto error handling, as suggested by Al Viro. Signed-off-by: Ian Kent Link: https://lore.kernel.org/r/20231119225319.331156-1-raven@themaw.net Reviewed-by: Bill O'Donnell Cc: Al Viro Cc: Christian Brauner Cc: Bill O'Donnell Reported-by: Signed-off-by: Christian Brauner --- fs/autofs/inode.c | 56 ++++++++++++++++++----------------------------- 1 file changed, 21 insertions(+), 35 deletions(-) diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index a5083d447a62..1f5db6863663 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c @@ -309,9 +309,7 @@ static int autofs_fill_super(struct super_block *s, struct fs_context *fc) struct autofs_fs_context *ctx = fc->fs_private; struct autofs_sb_info *sbi = s->s_fs_info; struct inode *root_inode; - struct dentry *root; struct autofs_info *ino; - int ret = -ENOMEM; pr_debug("starting up, sbi = %p\n", sbi); @@ -328,56 +326,44 @@ static int autofs_fill_super(struct super_block *s, struct fs_context *fc) */ ino = autofs_new_ino(sbi); if (!ino) - goto fail; + return -ENOMEM; root_inode = autofs_get_inode(s, S_IFDIR | 0755); + if (!root_inode) + return -ENOMEM; + root_inode->i_uid = ctx->uid; root_inode->i_gid = ctx->gid; + root_inode->i_fop = &autofs_root_operations; + root_inode->i_op = &autofs_dir_inode_operations; - root = d_make_root(root_inode); - if (!root) - goto fail_ino; - - root->d_fsdata = ino; + s->s_root = d_make_root(root_inode); + if (unlikely(!s->s_root)) { + autofs_free_ino(ino); + return -ENOMEM; + } + s->s_root->d_fsdata = ino; if (ctx->pgrp_set) { sbi->oz_pgrp = find_get_pid(ctx->pgrp); - if (!sbi->oz_pgrp) { - ret = invalf(fc, "Could not find process group %d", - ctx->pgrp); - goto fail_dput; - } - } else { + if (!sbi->oz_pgrp) + return invalf(fc, "Could not find process group %d", + ctx->pgrp); + } else sbi->oz_pgrp = get_task_pid(current, PIDTYPE_PGID); - } if (autofs_type_trigger(sbi->type)) - __managed_dentry_set_managed(root); - - root_inode->i_fop = &autofs_root_operations; - root_inode->i_op = &autofs_dir_inode_operations; + /* s->s_root won't be contended so there's little to + * be gained by not taking the d_lock when setting + * d_flags, even when a lot mounts are being done. + */ + managed_dentry_set_managed(s->s_root); pr_debug("pipe fd = %d, pgrp = %u\n", sbi->pipefd, pid_nr(sbi->oz_pgrp)); sbi->flags &= ~AUTOFS_SBI_CATATONIC; - - /* - * Success! Install the root dentry now to indicate completion. - */ - s->s_root = root; return 0; - - /* - * Failure ... clean up. - */ -fail_dput: - dput(root); - goto fail; -fail_ino: - autofs_free_ino(ino); -fail: - return ret; } /* From 762321dab9a72760bf9aec48362f932717c9424d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 25 Oct 2023 16:10:17 +0200 Subject: [PATCH 054/234] filemap: add a per-mapping stable writes flag folio_wait_stable waits for writeback to finish before modifying the contents of a folio again, e.g. to support check summing of the data in the block integrity code. Currently this behavior is controlled by the SB_I_STABLE_WRITES flag on the super_block, which means it is uniform for the entire file system. This is wrong for the block device pseudofs which is shared by all block devices, or file systems that can use multiple devices like XFS witht the RT subvolume or btrfs (although btrfs currently reimplements folio_wait_stable anyway). Add a per-address_space AS_STABLE_WRITES flag to control the behavior in a more fine grained way. The existing SB_I_STABLE_WRITES is kept to initialize AS_STABLE_WRITES to the existing default which covers most cases. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20231025141020.192413-2-hch@lst.de Tested-by: Ilya Dryomov Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- fs/inode.c | 2 ++ include/linux/pagemap.h | 17 +++++++++++++++++ mm/page-writeback.c | 2 +- 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/fs/inode.c b/fs/inode.c index edcd8a61975f..f238d987dec9 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -215,6 +215,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode) lockdep_set_class_and_name(&mapping->invalidate_lock, &sb->s_type->invalidate_lock_key, "mapping.invalidate_lock"); + if (sb->s_iflags & SB_I_STABLE_WRITES) + mapping_set_stable_writes(mapping); inode->i_private = NULL; inode->i_mapping = mapping; INIT_HLIST_HEAD(&inode->i_dentry); /* buggered by rcu freeing */ diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index bcc1ea44b4e8..06142ff7f9ce 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -204,6 +204,8 @@ enum mapping_flags { AS_NO_WRITEBACK_TAGS = 5, AS_LARGE_FOLIO_SUPPORT = 6, AS_RELEASE_ALWAYS, /* Call ->release_folio(), even if no private data */ + AS_STABLE_WRITES, /* must wait for writeback before modifying + folio contents */ }; /** @@ -289,6 +291,21 @@ static inline void mapping_clear_release_always(struct address_space *mapping) clear_bit(AS_RELEASE_ALWAYS, &mapping->flags); } +static inline bool mapping_stable_writes(const struct address_space *mapping) +{ + return test_bit(AS_STABLE_WRITES, &mapping->flags); +} + +static inline void mapping_set_stable_writes(struct address_space *mapping) +{ + set_bit(AS_STABLE_WRITES, &mapping->flags); +} + +static inline void mapping_clear_stable_writes(struct address_space *mapping) +{ + clear_bit(AS_STABLE_WRITES, &mapping->flags); +} + static inline gfp_t mapping_gfp_mask(struct address_space * mapping) { return mapping->gfp_mask; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 46f2f5d3d183..ee2fd6a6af40 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -3107,7 +3107,7 @@ EXPORT_SYMBOL_GPL(folio_wait_writeback_killable); */ void folio_wait_stable(struct folio *folio) { - if (folio_inode(folio)->i_sb->s_iflags & SB_I_STABLE_WRITES) + if (mapping_stable_writes(folio_mapping(folio))) folio_wait_writeback(folio); } EXPORT_SYMBOL_GPL(folio_wait_stable); From 1898efcdbed32bb1c67269c985a50bab0dbc9493 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 25 Oct 2023 16:10:18 +0200 Subject: [PATCH 055/234] block: update the stable_writes flag in bdev_add Propagate the per-queue stable_write flags into each bdev inode in bdev_add. This makes sure devices that require stable writes have it set for I/O on the block device node as well. Note that this doesn't cover the case of a flag changing on a live device yet. We should handle that as well, but I plan to cover it as part of a more general rework of how changing runtime paramters on block devices works. Fixes: 1cb039f3dc16 ("bdi: replace BDI_CAP_STABLE_WRITES with a queue and a sb flag") Reported-by: Ilya Dryomov Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20231025141020.192413-3-hch@lst.de Tested-by: Ilya Dryomov Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- block/bdev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/bdev.c b/block/bdev.c index e4cfb7adb645..750aec178b6a 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -425,6 +425,8 @@ void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors) void bdev_add(struct block_device *bdev, dev_t dev) { + if (bdev_stable_writes(bdev)) + mapping_set_stable_writes(bdev->bd_inode->i_mapping); bdev->bd_dev = dev; bdev->bd_inode->i_rdev = dev; bdev->bd_inode->i_ino = dev; From c421df0b19430417a04f68919fc3d1943d20ac04 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 25 Oct 2023 16:10:19 +0200 Subject: [PATCH 056/234] xfs: clean up FS_XFLAG_REALTIME handling in xfs_ioctl_setattr_xflags Introduce a local boolean variable if FS_XFLAG_REALTIME to make the checks for it more obvious, and de-densify a few of the conditionals using it to make them more readable while at it. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20231025141020.192413-4-hch@lst.de Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- fs/xfs/xfs_ioctl.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index a82470e027f7..022dc2dc78d2 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1121,23 +1121,25 @@ xfs_ioctl_setattr_xflags( struct fileattr *fa) { struct xfs_mount *mp = ip->i_mount; + bool rtflag = (fa->fsx_xflags & FS_XFLAG_REALTIME); uint64_t i_flags2; - /* Can't change realtime flag if any extents are allocated. */ - if ((ip->i_df.if_nextents || ip->i_delayed_blks) && - XFS_IS_REALTIME_INODE(ip) != (fa->fsx_xflags & FS_XFLAG_REALTIME)) - return -EINVAL; - - /* If realtime flag is set then must have realtime device */ - if (fa->fsx_xflags & FS_XFLAG_REALTIME) { - if (mp->m_sb.sb_rblocks == 0 || mp->m_sb.sb_rextsize == 0 || - xfs_extlen_to_rtxmod(mp, ip->i_extsize)) + if (rtflag != XFS_IS_REALTIME_INODE(ip)) { + /* Can't change realtime flag if any extents are allocated. */ + if (ip->i_df.if_nextents || ip->i_delayed_blks) return -EINVAL; } - /* Clear reflink if we are actually able to set the rt flag. */ - if ((fa->fsx_xflags & FS_XFLAG_REALTIME) && xfs_is_reflink_inode(ip)) - ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK; + if (rtflag) { + /* If realtime flag is set then must have realtime device */ + if (mp->m_sb.sb_rblocks == 0 || mp->m_sb.sb_rextsize == 0 || + xfs_extlen_to_rtxmod(mp, ip->i_extsize)) + return -EINVAL; + + /* Clear reflink if we are actually able to set the rt flag. */ + if (xfs_is_reflink_inode(ip)) + ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK; + } /* diflags2 only valid for v3 inodes. */ i_flags2 = xfs_flags2diflags2(ip, fa->fsx_xflags); From 9c04138414c00ae61421f36ada002712c4bac94a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 25 Oct 2023 16:10:20 +0200 Subject: [PATCH 057/234] xfs: respect the stable writes flag on the RT device Update the per-folio stable writes flag dependening on which device an inode resides on. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20231025141020.192413-5-hch@lst.de Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- fs/xfs/xfs_inode.h | 8 ++++++++ fs/xfs/xfs_ioctl.c | 8 ++++++++ fs/xfs/xfs_iops.c | 7 +++++++ 3 files changed, 23 insertions(+) diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 3dc47937da5d..3beb470f1892 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -569,6 +569,14 @@ extern void xfs_setup_inode(struct xfs_inode *ip); extern void xfs_setup_iops(struct xfs_inode *ip); extern void xfs_diflags_to_iflags(struct xfs_inode *ip, bool init); +static inline void xfs_update_stable_writes(struct xfs_inode *ip) +{ + if (bdev_stable_writes(xfs_inode_buftarg(ip)->bt_bdev)) + mapping_set_stable_writes(VFS_I(ip)->i_mapping); + else + mapping_clear_stable_writes(VFS_I(ip)->i_mapping); +} + /* * When setting up a newly allocated inode, we need to call * xfs_finish_inode_setup() once the inode is fully instantiated at diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 022dc2dc78d2..6c3919687ea6 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1150,6 +1150,14 @@ xfs_ioctl_setattr_xflags( ip->i_diflags2 = i_flags2; xfs_diflags_to_iflags(ip, false); + + /* + * Make the stable writes flag match that of the device the inode + * resides on when flipping the RT flag. + */ + if (rtflag != XFS_IS_REALTIME_INODE(ip) && S_ISREG(VFS_I(ip)->i_mode)) + xfs_update_stable_writes(ip); + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); XFS_STATS_INC(mp, xs_ig_attrchg); diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index fdfda4fba12b..a0d77f5f512e 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -1298,6 +1298,13 @@ xfs_setup_inode( gfp_mask = mapping_gfp_mask(inode->i_mapping); mapping_set_gfp_mask(inode->i_mapping, (gfp_mask & ~(__GFP_FS))); + /* + * For real-time inodes update the stable write flags to that of the RT + * device instead of the data device. + */ + if (S_ISREG(inode->i_mode) && XFS_IS_REALTIME_INODE(ip)) + xfs_update_stable_writes(ip); + /* * If there is no attribute fork no ACL can exist on this inode, * and it can't have any file capabilities attached to it either. From 796432efab1e372d404e7a71cc6891a53f105051 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 19 Nov 2023 18:56:17 -0500 Subject: [PATCH 058/234] libfs: getdents() should return 0 after reaching EOD The new directory offset helpers don't conform with the convention of getdents() returning no more entries once a directory file descriptor has reached the current end-of-directory. To address this, copy the logic from dcache_readdir() to mark the open directory file descriptor once EOD has been reached. Seeking resets the mark. Reported-by: Tavian Barnes Closes: https://lore.kernel.org/linux-fsdevel/20231113180616.2831430-1-tavianator@tavianator.com/ Fixes: 6faddda69f62 ("libfs: Add directory operations for stable offsets") Signed-off-by: Chuck Lever Link: https://lore.kernel.org/r/170043792492.4628.15646203084646716134.stgit@bazille.1015granger.net Signed-off-by: Christian Brauner --- fs/libfs.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/libfs.c b/fs/libfs.c index e9440d55073c..c2aa6fd4795c 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -399,6 +399,8 @@ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence) return -EINVAL; } + /* In this case, ->private_data is protected by f_pos_lock */ + file->private_data = NULL; return vfs_setpos(file, offset, U32_MAX); } @@ -428,7 +430,7 @@ static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry) inode->i_ino, fs_umode_to_dtype(inode->i_mode)); } -static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx) +static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx) { struct offset_ctx *so_ctx = inode->i_op->get_offset_ctx(inode); XA_STATE(xas, &so_ctx->xa, ctx->pos); @@ -437,7 +439,7 @@ static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx) while (true) { dentry = offset_find_next(&xas); if (!dentry) - break; + return ERR_PTR(-ENOENT); if (!offset_dir_emit(ctx, dentry)) { dput(dentry); @@ -447,6 +449,7 @@ static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx) dput(dentry); ctx->pos = xas.xa_index + 1; } + return NULL; } /** @@ -479,7 +482,12 @@ static int offset_readdir(struct file *file, struct dir_context *ctx) if (!dir_emit_dots(file, ctx)) return 0; - offset_iterate_dir(d_inode(dir), ctx); + /* In this case, ->private_data is protected by f_pos_lock */ + if (ctx->pos == 2) + file->private_data = NULL; + else if (file->private_data == ERR_PTR(-ENOENT)) + return 0; + file->private_data = offset_iterate_dir(d_inode(dir), ctx); return 0; } From 8479063f1fbee201a8739130e816cc331b675838 Mon Sep 17 00:00:00 2001 From: Charles Mirabile Date: Mon, 20 Nov 2023 05:55:45 -0500 Subject: [PATCH 059/234] io_uring/fs: consider link->flags when getting path for LINKAT In order for `AT_EMPTY_PATH` to work as expected, the fact that the user wants that behavior needs to make it to `getname_flags` or it will return ENOENT. Fixes: cf30da90bc3a ("io_uring: add support for IORING_OP_LINKAT") Cc: Link: https://github.com/axboe/liburing/issues/995 Signed-off-by: Charles Mirabile Link: https://lore.kernel.org/r/20231120105545.1209530-1-cmirabil@redhat.com Signed-off-by: Jens Axboe --- io_uring/fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/fs.c b/io_uring/fs.c index 08e3b175469c..eccea851dd5a 100644 --- a/io_uring/fs.c +++ b/io_uring/fs.c @@ -254,7 +254,7 @@ int io_linkat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) newf = u64_to_user_ptr(READ_ONCE(sqe->addr2)); lnk->flags = READ_ONCE(sqe->hardlink_flags); - lnk->oldpath = getname(oldf); + lnk->oldpath = getname_uflags(oldf, lnk->flags); if (IS_ERR(lnk->oldpath)) return PTR_ERR(lnk->oldpath); From bb0a05acd6121ff0e810b44fdc24dbdfaa46b642 Mon Sep 17 00:00:00 2001 From: Jonas Karlman Date: Thu, 26 Oct 2023 19:14:58 +0000 Subject: [PATCH 060/234] drm/rockchip: vop: Fix color for RGB888/BGR888 format on VOP full Use of DRM_FORMAT_RGB888 and DRM_FORMAT_BGR888 on e.g. RK3288, RK3328 and RK3399 result in wrong colors being displayed. The issue can be observed using modetest: modetest -s @:1920x1080-60@RG24 modetest -s @:1920x1080-60@BG24 Vendor 4.4 kernel apply an inverted rb swap for these formats on VOP full framework (IP version 3.x) compared to VOP little framework (2.x). Fix colors by applying different rb swap for VOP full framework (3.x) and VOP little framework (2.x) similar to vendor 4.4 kernel. Fixes: 85a359f25388 ("drm/rockchip: Add BGR formats to VOP") Signed-off-by: Jonas Karlman Tested-by: Diederik de Haas Reviewed-by: Christopher Obbard Tested-by: Christopher Obbard Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20231026191500.2994225-1-jonas@kwiboo.se --- drivers/gpu/drm/rockchip/rockchip_drm_vop.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c index 066299894d04..a13473b2d54c 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c @@ -247,14 +247,22 @@ static inline void vop_cfg_done(struct vop *vop) VOP_REG_SET(vop, common, cfg_done, 1); } -static bool has_rb_swapped(uint32_t format) +static bool has_rb_swapped(uint32_t version, uint32_t format) { switch (format) { case DRM_FORMAT_XBGR8888: case DRM_FORMAT_ABGR8888: - case DRM_FORMAT_BGR888: case DRM_FORMAT_BGR565: return true; + /* + * full framework (IP version 3.x) only need rb swapped for RGB888 and + * little framework (IP version 2.x) only need rb swapped for BGR888, + * check for 3.x to also only rb swap BGR888 for unknown vop version + */ + case DRM_FORMAT_RGB888: + return VOP_MAJOR(version) == 3; + case DRM_FORMAT_BGR888: + return VOP_MAJOR(version) != 3; default: return false; } @@ -1030,7 +1038,7 @@ static void vop_plane_atomic_update(struct drm_plane *plane, VOP_WIN_SET(vop, win, dsp_info, dsp_info); VOP_WIN_SET(vop, win, dsp_st, dsp_st); - rb_swap = has_rb_swapped(fb->format->format); + rb_swap = has_rb_swapped(vop->data->version, fb->format->format); VOP_WIN_SET(vop, win, rb_swap, rb_swap); /* From baf8fb7e0e5ec54ea0839f0c534f2cdcd79bea9c Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 20 Nov 2023 13:24:54 +0800 Subject: [PATCH 061/234] bcache: avoid oversize memory allocation by small stripe_size Arraies bcache->stripe_sectors_dirty and bcache->full_dirty_stripes are used for dirty data writeback, their sizes are decided by backing device capacity and stripe size. Larger backing device capacity or smaller stripe size make these two arraies occupies more dynamic memory space. Currently bcache->stripe_size is directly inherited from queue->limits.io_opt of underlying storage device. For normal hard drives, its limits.io_opt is 0, and bcache sets the corresponding stripe_size to 1TB (1<<31 sectors), it works fine 10+ years. But for devices do declare value for queue->limits.io_opt, small stripe_size (comparing to 1TB) becomes an issue for oversize memory allocations of bcache->stripe_sectors_dirty and bcache->full_dirty_stripes, while the capacity of hard drives gets much larger in recent decade. For example a raid5 array assembled by three 20TB hardrives, the raid device capacity is 40TB with typical 512KB limits.io_opt. After the math calculation in bcache code, these two arraies will occupy 400MB dynamic memory. Even worse Andrea Tomassetti reports that a 4KB limits.io_opt is declared on a new 2TB hard drive, then these two arraies request 2GB and 512MB dynamic memory from kzalloc(). The result is that bcache device always fails to initialize on his system. To avoid the oversize memory allocation, bcache->stripe_size should not directly inherited by queue->limits.io_opt from the underlying device. This patch defines BCH_MIN_STRIPE_SZ (4MB) as minimal bcache stripe size and set bcache device's stripe size against the declared limits.io_opt value from the underlying storage device, - If the declared limits.io_opt > BCH_MIN_STRIPE_SZ, bcache device will set its stripe size directly by this limits.io_opt value. - If the declared limits.io_opt < BCH_MIN_STRIPE_SZ, bcache device will set its stripe size by a value multiplying limits.io_opt and euqal or large than BCH_MIN_STRIPE_SZ. Then the minimal stripe size of a bcache device will always be >= 4MB. For a 40TB raid5 device with 512KB limits.io_opt, memory occupied by bcache->stripe_sectors_dirty and bcache->full_dirty_stripes will be 50MB in total. For a 2TB hard drive with 4KB limits.io_opt, memory occupied by these two arraies will be 2.5MB in total. Such mount of memory allocated for bcache->stripe_sectors_dirty and bcache->full_dirty_stripes is reasonable for most of storage devices. Reported-by: Andrea Tomassetti Signed-off-by: Coly Li Reviewed-by: Eric Wheeler Link: https://lore.kernel.org/r/20231120052503.6122-2-colyli@suse.de Signed-off-by: Jens Axboe --- drivers/md/bcache/bcache.h | 1 + drivers/md/bcache/super.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 05be59ae21b2..6ae2329052c9 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -265,6 +265,7 @@ struct bcache_device { #define BCACHE_DEV_WB_RUNNING 3 #define BCACHE_DEV_RATE_DW_RUNNING 4 int nr_stripes; +#define BCH_MIN_STRIPE_SZ ((4 << 20) >> SECTOR_SHIFT) unsigned int stripe_size; atomic_t *stripe_sectors_dirty; unsigned long *full_dirty_stripes; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 8bd899766372..c7ecc7058d77 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -905,6 +905,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, if (!d->stripe_size) d->stripe_size = 1 << 31; + else if (d->stripe_size < BCH_MIN_STRIPE_SZ) + d->stripe_size = roundup(BCH_MIN_STRIPE_SZ, d->stripe_size); n = DIV_ROUND_UP_ULL(sectors, d->stripe_size); if (!n || n > max_stripes) { From 777967e7e9f6f5f3e153abffb562bffaf4430d26 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 20 Nov 2023 13:24:55 +0800 Subject: [PATCH 062/234] bcache: check return value from btree_node_alloc_replacement() In btree_gc_rewrite_node(), pointer 'n' is not checked after it returns from btree_gc_rewrite_node(). There is potential possibility that 'n' is a non NULL ERR_PTR(), referencing such error code is not permitted in following code. Therefore a return value checking is necessary after 'n' is back from btree_node_alloc_replacement(). Signed-off-by: Coly Li Reported-by: Dan Carpenter Cc: Link: https://lore.kernel.org/r/20231120052503.6122-3-colyli@suse.de Signed-off-by: Jens Axboe --- drivers/md/bcache/btree.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index ae5cbb55861f..de8d552201dc 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1532,6 +1532,8 @@ static int btree_gc_rewrite_node(struct btree *b, struct btree_op *op, return 0; n = btree_node_alloc_replacement(replace, NULL); + if (IS_ERR(n)) + return 0; /* recheck reserve after allocating replacement node */ if (btree_check_reserve(b, NULL)) { From be93825f0e6428c2d3f03a6e4d447dc48d33d7ff Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 20 Nov 2023 13:24:56 +0800 Subject: [PATCH 063/234] bcache: remove redundant assignment to variable cur_idx Variable cur_idx is being initialized with a value that is never read, it is being re-assigned later in a while-loop. Remove the redundant assignment. Cleans up clang scan build warning: drivers/md/bcache/writeback.c:916:2: warning: Value stored to 'cur_idx' is never read [deadcode.DeadStores] Signed-off-by: Colin Ian King Reviewed-by: Coly Li Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20231120052503.6122-4-colyli@suse.de Signed-off-by: Jens Axboe --- drivers/md/bcache/writeback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 24c049067f61..c3e872e0a6f2 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -913,7 +913,7 @@ static int bch_dirty_init_thread(void *arg) int cur_idx, prev_idx, skip_nr; k = p = NULL; - cur_idx = prev_idx = 0; + prev_idx = 0; bch_btree_iter_init(&c->root->keys, &iter, NULL); k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad); From 2c7f497ac274a14330208b18f6f734000868ebf9 Mon Sep 17 00:00:00 2001 From: Rand Deeb Date: Mon, 20 Nov 2023 13:24:57 +0800 Subject: [PATCH 064/234] bcache: prevent potential division by zero error In SHOW(), the variable 'n' is of type 'size_t.' While there is a conditional check to verify that 'n' is not equal to zero before executing the 'do_div' macro, concerns arise regarding potential division by zero error in 64-bit environments. The concern arises when 'n' is 64 bits in size, greater than zero, and the lower 32 bits of it are zeros. In such cases, the conditional check passes because 'n' is non-zero, but the 'do_div' macro casts 'n' to 'uint32_t,' effectively truncating it to its lower 32 bits. Consequently, the 'n' value becomes zero. To fix this potential division by zero error and ensure precise division handling, this commit replaces the 'do_div' macro with div64_u64(). div64_u64() is designed to work with 64-bit operands, guaranteeing that division is performed correctly. This change enhances the robustness of the code, ensuring that division operations yield accurate results in all scenarios, eliminating the possibility of division by zero, and improving compatibility across different 64-bit environments. Found by Linux Verification Center (linuxtesting.org) with SVACE. Signed-off-by: Rand Deeb Cc: Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20231120052503.6122-5-colyli@suse.de Signed-off-by: Jens Axboe --- drivers/md/bcache/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 45d8af755de6..a438efb66069 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -1104,7 +1104,7 @@ SHOW(__bch_cache) sum += INITIAL_PRIO - cached[i]; if (n) - do_div(sum, n); + sum = div64_u64(sum, n); for (i = 0; i < ARRAY_SIZE(q); i++) q[i] = INITIAL_PRIO - cached[n * (i + 1) / From 7cc47e64d3d69786a2711a4767e26b26ba63d7ed Mon Sep 17 00:00:00 2001 From: Mingzhe Zou Date: Mon, 20 Nov 2023 13:24:58 +0800 Subject: [PATCH 065/234] bcache: fixup init dirty data errors We found that after long run, the dirty_data of the bcache device will have errors. This error cannot be eliminated unless re-register. We also found that reattach after detach, this error can accumulate. In bch_sectors_dirty_init(), all inode <= d->id keys will be recounted again. This is wrong, we only need to count the keys of the current device. Fixes: b144e45fc576 ("bcache: make bch_sectors_dirty_init() to be multithreaded") Signed-off-by: Mingzhe Zou Cc: Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20231120052503.6122-6-colyli@suse.de Signed-off-by: Jens Axboe --- drivers/md/bcache/writeback.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index c3e872e0a6f2..77fb72ac6b81 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -991,8 +991,11 @@ void bch_sectors_dirty_init(struct bcache_device *d) op.count = 0; for_each_key_filter(&c->root->keys, - k, &iter, bch_ptr_invalid) + k, &iter, bch_ptr_invalid) { + if (KEY_INODE(k) != op.inode) + continue; sectors_dirty_init_fn(&op.op, c->root, k); + } rw_unlock(0, c->root); return; From e34820f984512b433ee1fc291417e60c47d56727 Mon Sep 17 00:00:00 2001 From: Mingzhe Zou Date: Mon, 20 Nov 2023 13:24:59 +0800 Subject: [PATCH 066/234] bcache: fixup lock c->root error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We had a problem with io hung because it was waiting for c->root to release the lock. crash> cache_set.root -l cache_set.list ffffa03fde4c0050 root = 0xffff802ef454c800 crash> btree -o 0xffff802ef454c800 | grep rw_semaphore [ffff802ef454c858] struct rw_semaphore lock; crash> struct rw_semaphore ffff802ef454c858 struct rw_semaphore { count = { counter = -4294967297 }, wait_list = { next = 0xffff00006786fc28, prev = 0xffff00005d0efac8 }, wait_lock = { raw_lock = { { val = { counter = 0 }, { locked = 0 '\000', pending = 0 '\000' }, { locked_pending = 0, tail = 0 } } } }, osq = { tail = { counter = 0 } }, owner = 0xffffa03fdc586603 } The "counter = -4294967297" means that lock count is -1 and a write lock is being attempted. Then, we found that there is a btree with a counter of 1 in btree_cache_freeable. crash> cache_set -l cache_set.list ffffa03fde4c0050 -o|grep btree_cache [ffffa03fde4c1140] struct list_head btree_cache; [ffffa03fde4c1150] struct list_head btree_cache_freeable; [ffffa03fde4c1160] struct list_head btree_cache_freed; [ffffa03fde4c1170] unsigned int btree_cache_used; [ffffa03fde4c1178] wait_queue_head_t btree_cache_wait; [ffffa03fde4c1190] struct task_struct *btree_cache_alloc_lock; crash> list -H ffffa03fde4c1140|wc -l 973 crash> list -H ffffa03fde4c1150|wc -l 1123 crash> cache_set.btree_cache_used -l cache_set.list ffffa03fde4c0050 btree_cache_used = 2097 crash> list -s btree -l btree.list -H ffffa03fde4c1140|grep -E -A2 "^ lock = {" > btree_cache.txt crash> list -s btree -l btree.list -H ffffa03fde4c1150|grep -E -A2 "^ lock = {" > btree_cache_freeable.txt [root@node-3 127.0.0.1-2023-08-04-16:40:28]# pwd /var/crash/127.0.0.1-2023-08-04-16:40:28 [root@node-3 127.0.0.1-2023-08-04-16:40:28]# cat btree_cache.txt|grep counter|grep -v "counter = 0" [root@node-3 127.0.0.1-2023-08-04-16:40:28]# cat btree_cache_freeable.txt|grep counter|grep -v "counter = 0" counter = 1 We found that this is a bug in bch_sectors_dirty_init() when locking c->root: (1). Thread X has locked c->root(A) write. (2). Thread Y failed to lock c->root(A), waiting for the lock(c->root A). (3). Thread X bch_btree_set_root() changes c->root from A to B. (4). Thread X releases the lock(c->root A). (5). Thread Y successfully locks c->root(A). (6). Thread Y releases the lock(c->root B). down_write locked ---(1)----------------------┐ | | | down_read waiting ---(2)----┐ | | | ┌-------------┐ ┌-------------┐ bch_btree_set_root ===(3)========>> | c->root A | | c->root B | | | └-------------┘ └-------------┘ up_write ---(4)---------------------┘ | | | | | down_read locked ---(5)-----------┘ | | | up_read ---(6)-----------------------------┘ Since c->root may change, the correct steps to lock c->root should be the same as bch_root_usage(), compare after locking. static unsigned int bch_root_usage(struct cache_set *c) { unsigned int bytes = 0; struct bkey *k; struct btree *b; struct btree_iter iter; goto lock_root; do { rw_unlock(false, b); lock_root: b = c->root; rw_lock(false, b, b->level); } while (b != c->root); for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad) bytes += bkey_bytes(k); rw_unlock(false, b); return (bytes * 100) / btree_bytes(c); } Fixes: b144e45fc576 ("bcache: make bch_sectors_dirty_init() to be multithreaded") Signed-off-by: Mingzhe Zou Cc: Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20231120052503.6122-7-colyli@suse.de Signed-off-by: Jens Axboe --- drivers/md/bcache/writeback.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 77fb72ac6b81..a1d760916246 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -977,14 +977,22 @@ static int bch_btre_dirty_init_thread_nr(void) void bch_sectors_dirty_init(struct bcache_device *d) { int i; + struct btree *b = NULL; struct bkey *k = NULL; struct btree_iter iter; struct sectors_dirty_init op; struct cache_set *c = d->c; struct bch_dirty_init_state state; +retry_lock: + b = c->root; + rw_lock(0, b, b->level); + if (b != c->root) { + rw_unlock(0, b); + goto retry_lock; + } + /* Just count root keys if no leaf node */ - rw_lock(0, c->root, c->root->level); if (c->root->level == 0) { bch_btree_op_init(&op.op, -1); op.inode = d->id; @@ -997,7 +1005,7 @@ void bch_sectors_dirty_init(struct bcache_device *d) sectors_dirty_init_fn(&op.op, c->root, k); } - rw_unlock(0, c->root); + rw_unlock(0, b); return; } @@ -1033,7 +1041,7 @@ void bch_sectors_dirty_init(struct bcache_device *d) out: /* Must wait for all threads to stop. */ wait_event(state.wait, atomic_read(&state.started) == 0); - rw_unlock(0, c->root); + rw_unlock(0, b); } void bch_cached_dev_writeback_init(struct cached_dev *dc) From 2faac25d7958c4761bb8cec54adb79f806783ad6 Mon Sep 17 00:00:00 2001 From: Mingzhe Zou Date: Mon, 20 Nov 2023 13:25:00 +0800 Subject: [PATCH 067/234] bcache: fixup multi-threaded bch_sectors_dirty_init() wake-up race We get a kernel crash about "unable to handle kernel paging request": ```dmesg [368033.032005] BUG: unable to handle kernel paging request at ffffffffad9ae4b5 [368033.032007] PGD fc3a0d067 P4D fc3a0d067 PUD fc3a0e063 PMD 8000000fc38000e1 [368033.032012] Oops: 0003 [#1] SMP PTI [368033.032015] CPU: 23 PID: 55090 Comm: bch_dirtcnt[0] Kdump: loaded Tainted: G OE --------- - - 4.18.0-147.5.1.es8_24.x86_64 #1 [368033.032017] Hardware name: Tsinghua Tongfang THTF Chaoqiang Server/072T6D, BIOS 2.4.3 01/17/2017 [368033.032027] RIP: 0010:native_queued_spin_lock_slowpath+0x183/0x1d0 [368033.032029] Code: 8b 02 48 85 c0 74 f6 48 89 c1 eb d0 c1 e9 12 83 e0 03 83 e9 01 48 c1 e0 05 48 63 c9 48 05 c0 3d 02 00 48 03 04 cd 60 68 93 ad <48> 89 10 8b 42 08 85 c0 75 09 f3 90 8b 42 08 85 c0 74 f7 48 8b 02 [368033.032031] RSP: 0018:ffffbb48852abe00 EFLAGS: 00010082 [368033.032032] RAX: ffffffffad9ae4b5 RBX: 0000000000000246 RCX: 0000000000003bf3 [368033.032033] RDX: ffff97b0ff8e3dc0 RSI: 0000000000600000 RDI: ffffbb4884743c68 [368033.032034] RBP: 0000000000000001 R08: 0000000000000000 R09: 000007ffffffffff [368033.032035] R10: ffffbb486bb01000 R11: 0000000000000001 R12: ffffffffc068da70 [368033.032036] R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000000 [368033.032038] FS: 0000000000000000(0000) GS:ffff97b0ff8c0000(0000) knlGS:0000000000000000 [368033.032039] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [368033.032040] CR2: ffffffffad9ae4b5 CR3: 0000000fc3a0a002 CR4: 00000000003626e0 [368033.032042] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [368033.032043] bcache: bch_cached_dev_attach() Caching rbd479 as bcache462 on set 8cff3c36-4a76-4242-afaa-7630206bc70b [368033.032045] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [368033.032046] Call Trace: [368033.032054] _raw_spin_lock_irqsave+0x32/0x40 [368033.032061] __wake_up_common_lock+0x63/0xc0 [368033.032073] ? bch_ptr_invalid+0x10/0x10 [bcache] [368033.033502] bch_dirty_init_thread+0x14c/0x160 [bcache] [368033.033511] ? read_dirty_submit+0x60/0x60 [bcache] [368033.033516] kthread+0x112/0x130 [368033.033520] ? kthread_flush_work_fn+0x10/0x10 [368033.034505] ret_from_fork+0x35/0x40 ``` The crash occurred when call wake_up(&state->wait), and then we want to look at the value in the state. However, bch_sectors_dirty_init() is not found in the stack of any task. Since state is allocated on the stack, we guess that bch_sectors_dirty_init() has exited, causing bch_dirty_init_thread() to be unable to handle kernel paging request. In order to verify this idea, we added some printing information during wake_up(&state->wait). We find that "wake up" is printed twice, however we only expect the last thread to wake up once. ```dmesg [ 994.641004] alcache: bch_dirty_init_thread() wake up [ 994.641018] alcache: bch_dirty_init_thread() wake up [ 994.641523] alcache: bch_sectors_dirty_init() init exit ``` There is a race. If bch_sectors_dirty_init() exits after the first wake up, the second wake up will trigger this bug("unable to handle kernel paging request"). Proceed as follows: bch_sectors_dirty_init kthread_run ==============> bch_dirty_init_thread(bch_dirtcnt[0]) ... ... atomic_inc(&state.started) ... ... ... atomic_read(&state.enough) ... ... atomic_set(&state->enough, 1) kthread_run ======================================================> bch_dirty_init_thread(bch_dirtcnt[1]) ... atomic_dec_and_test(&state->started) ... atomic_inc(&state.started) ... ... ... wake_up(&state->wait) ... atomic_read(&state.enough) atomic_dec_and_test(&state->started) ... ... wait_event(state.wait, atomic_read(&state.started) == 0) ... return ... wake_up(&state->wait) We believe it is very common to wake up twice if there is no dirty, but crash is an extremely low probability event. It's hard for us to reproduce this issue. We attached and detached continuously for a week, with a total of more than one million attaches and only one crash. Putting atomic_inc(&state.started) before kthread_run() can avoid waking up twice. Fixes: b144e45fc576 ("bcache: make bch_sectors_dirty_init() to be multithreaded") Signed-off-by: Mingzhe Zou Cc: Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20231120052503.6122-8-colyli@suse.de Signed-off-by: Jens Axboe --- drivers/md/bcache/writeback.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index a1d760916246..3accfdaee6b1 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -1025,17 +1025,18 @@ retry_lock: if (atomic_read(&state.enough)) break; + atomic_inc(&state.started); state.infos[i].state = &state; state.infos[i].thread = kthread_run(bch_dirty_init_thread, &state.infos[i], "bch_dirtcnt[%d]", i); if (IS_ERR(state.infos[i].thread)) { pr_err("fails to run thread bch_dirty_init[%d]\n", i); + atomic_dec(&state.started); for (--i; i >= 0; i--) kthread_stop(state.infos[i].thread); goto out; } - atomic_inc(&state.started); } out: From f72f4312d4388376fc8a1f6cf37cb21a0d41758b Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 20 Nov 2023 13:25:01 +0800 Subject: [PATCH 068/234] bcache: replace a mistaken IS_ERR() by IS_ERR_OR_NULL() in btree_gc_coalesce() Commit 028ddcac477b ("bcache: Remove unnecessary NULL point check in node allocations") do the following change inside btree_gc_coalesce(), 31 @@ -1340,7 +1340,7 @@ static int btree_gc_coalesce( 32 memset(new_nodes, 0, sizeof(new_nodes)); 33 closure_init_stack(&cl); 34 35 - while (nodes < GC_MERGE_NODES && !IS_ERR_OR_NULL(r[nodes].b)) 36 + while (nodes < GC_MERGE_NODES && !IS_ERR(r[nodes].b)) 37 keys += r[nodes++].keys; 38 39 blocks = btree_default_blocks(b->c) * 2 / 3; At line 35 the original r[nodes].b is not always allocatored from __bch_btree_node_alloc(), and possibly initialized as NULL pointer by caller of btree_gc_coalesce(). Therefore the change at line 36 is not correct. This patch replaces the mistaken IS_ERR() by IS_ERR_OR_NULL() to avoid potential issue. Fixes: 028ddcac477b ("bcache: Remove unnecessary NULL point check in node allocations") Cc: # 6.5+ Cc: Zheng Wang Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20231120052503.6122-9-colyli@suse.de Signed-off-by: Jens Axboe --- drivers/md/bcache/btree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index de8d552201dc..79f1fa4a0d55 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1368,7 +1368,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, memset(new_nodes, 0, sizeof(new_nodes)); closure_init_stack(&cl); - while (nodes < GC_MERGE_NODES && !IS_ERR(r[nodes].b)) + while (nodes < GC_MERGE_NODES && !IS_ERR_OR_NULL(r[nodes].b)) keys += r[nodes++].keys; blocks = btree_default_blocks(b->c) * 2 / 3; From 31f5b956a197d4ec25c8a07cb3a2ab69d0c0b82f Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 20 Nov 2023 13:25:02 +0800 Subject: [PATCH 069/234] bcache: add code comments for bch_btree_node_get() and __bch_btree_node_alloc() This patch adds code comments to bch_btree_node_get() and __bch_btree_node_alloc() that NULL pointer will not be returned and it is unnecessary to check NULL pointer by the callers of these routines. Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20231120052503.6122-10-colyli@suse.de Signed-off-by: Jens Axboe --- drivers/md/bcache/btree.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 79f1fa4a0d55..de3019972b35 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1000,6 +1000,9 @@ err: * * The btree node will have either a read or a write lock held, depending on * level and op->lock. + * + * Note: Only error code or btree pointer will be returned, it is unncessary + * for callers to check NULL pointer. */ struct btree *bch_btree_node_get(struct cache_set *c, struct btree_op *op, struct bkey *k, int level, bool write, @@ -1111,6 +1114,10 @@ retry: mutex_unlock(&b->c->bucket_lock); } +/* + * Only error code or btree pointer will be returned, it is unncessary for + * callers to check NULL pointer. + */ struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op, int level, bool wait, struct btree *parent) From 3eba5e0b2422aec3c9e79822029599961fdcab97 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 20 Nov 2023 13:25:03 +0800 Subject: [PATCH 070/234] bcache: avoid NULL checking to c->root in run_cache_set() In run_cache_set() after c->root returned from bch_btree_node_get(), it is checked by IS_ERR_OR_NULL(). Indeed it is unncessary to check NULL because bch_btree_node_get() will not return NULL pointer to caller. This patch replaces IS_ERR_OR_NULL() by IS_ERR() for the above reason. Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20231120052503.6122-11-colyli@suse.de Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index c7ecc7058d77..bfe1685dbae5 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -2018,7 +2018,7 @@ static int run_cache_set(struct cache_set *c) c->root = bch_btree_node_get(c, NULL, k, j->btree_level, true, NULL); - if (IS_ERR_OR_NULL(c->root)) + if (IS_ERR(c->root)) goto err; list_del_init(&c->root->list); From 9bb69ba4c177dccaa1f5b5cbdf80b67813328348 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 27 Oct 2023 19:36:51 +0100 Subject: [PATCH 071/234] ACPI: processor_idle: use raw_safe_halt() in acpi_idle_play_dead() Xen HVM guests were observed taking triple-faults when attempting to online a previously offlined vCPU. Investigation showed that the fault was coming from a failing call to lockdep_assert_irqs_disabled(), in load_current_idt() which was too early in the CPU bringup to actually catch the exception and report the failure cleanly. This was a false positive, caused by acpi_idle_play_dead() setting the per-cpu hardirqs_enabled flag by calling safe_halt(). Switch it to use raw_safe_halt() instead, which doesn't do so. Signed-off-by: David Woodhouse Acked-by: Peter Zijlstra (Intel) Cc: 6.6+ # 6.6+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/processor_idle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 3a34a8c425fe..55437f5e0c3a 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -592,7 +592,7 @@ static int acpi_idle_play_dead(struct cpuidle_device *dev, int index) while (1) { if (cx->entry_method == ACPI_CSTATE_HALT) - safe_halt(); + raw_safe_halt(); else if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) { io_idle(cx->address); } else From 37ba91a82e3b9de35f64348c62b5ec7d74e3a41c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 12 Nov 2023 21:36:26 +0100 Subject: [PATCH 072/234] ACPI: PM: Add acpi_device_fix_up_power_children() function In some cases it is necessary to fix-up the power-state of an ACPI device's children without touching the ACPI device itself add a new acpi_device_fix_up_power_children() function for this. Signed-off-by: Hans de Goede Cc: 6.6+ # 6.6+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/device_pm.c | 13 +++++++++++++ include/acpi/acpi_bus.h | 1 + 2 files changed, 14 insertions(+) diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index f007116a8427..3b4d048c4941 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -397,6 +397,19 @@ void acpi_device_fix_up_power_extended(struct acpi_device *adev) } EXPORT_SYMBOL_GPL(acpi_device_fix_up_power_extended); +/** + * acpi_device_fix_up_power_children - Force a device's children into D0. + * @adev: Parent device object whose children's power state is to be fixed up. + * + * Call acpi_device_fix_up_power() for @adev's children so long as they + * are reported as present and enabled. + */ +void acpi_device_fix_up_power_children(struct acpi_device *adev) +{ + acpi_dev_for_each_child(adev, fix_up_power_if_applicable, NULL); +} +EXPORT_SYMBOL_GPL(acpi_device_fix_up_power_children); + int acpi_device_update_power(struct acpi_device *device, int *state_p) { int state; diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index afeed6e72049..1216d72c650f 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -542,6 +542,7 @@ int acpi_device_set_power(struct acpi_device *device, int state); int acpi_bus_init_power(struct acpi_device *device); int acpi_device_fix_up_power(struct acpi_device *device); void acpi_device_fix_up_power_extended(struct acpi_device *adev); +void acpi_device_fix_up_power_children(struct acpi_device *adev); int acpi_bus_update_power(acpi_handle handle, int *state_p); int acpi_device_update_power(struct acpi_device *device, int *state_p); bool acpi_bus_power_manageable(acpi_handle handle); From c93695494606326d7fd72b46a2a657139ccb0dec Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 12 Nov 2023 21:36:27 +0100 Subject: [PATCH 073/234] ACPI: video: Use acpi_device_fix_up_power_children() Commit 89c290ea7589 ("ACPI: video: Put ACPI video and its child devices into D0 on boot") introduced calling acpi_device_fix_up_power_extended() on the video card for which the ACPI video bus is the companion device. This unnecessarily touches the power-state of the GPU itself, while the issue it tries to address only requires calling _PS0 on the child devices. Touching the power-state of the GPU itself is causing suspend / resume issues on e.g. a Lenovo ThinkPad W530. Instead use acpi_device_fix_up_power_children(), which only touches the child devices, to fix this. Fixes: 89c290ea7589 ("ACPI: video: Put ACPI video and its child devices into D0 on boot") Reported-by: Owen T. Heisler Closes: https://lore.kernel.org/regressions/9f36fb06-64c4-4264-aaeb-4e1289e764c4@owenh.net/ Closes: https://gitlab.freedesktop.org/drm/nouveau/-/issues/273 Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218124 Tested-by: Kai-Heng Feng Tested-by: Owen T. Heisler Signed-off-by: Hans de Goede Cc: 6.6+ # 6.6+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_video.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index 0b7a01f38b65..d321ca7160d9 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -2031,7 +2031,7 @@ static int acpi_video_bus_add(struct acpi_device *device) * HP ZBook Fury 16 G10 requires ACPI video's child devices have _PS0 * evaluated to have functional panel brightness control. */ - acpi_device_fix_up_power_extended(device); + acpi_device_fix_up_power_children(device); pr_info("%s [%s] (multi-head: %s rom: %s post: %s)\n", ACPI_VIDEO_DEVICE_NAME, acpi_device_bid(device), From bd911485294a6f0596e4592ed442438015cffc8a Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 15 Nov 2023 19:02:22 +0100 Subject: [PATCH 074/234] ACPI: resource: Skip IRQ override on ASUS ExpertBook B1402CVA Like various other ASUS ExpertBook-s, the ASUS ExpertBook B1402CVA has an ACPI DSDT table that describes IRQ 1 as ActiveLow while the kernel overrides it to EdgeHigh. This prevents the keyboard from working. To fix this issue, add this laptop to the skip_override_table so that the kernel does not override IRQ 1. Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218114 Cc: All applicable Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/resource.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 15a3bdbd0755..9bd9f79cd409 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -447,6 +447,13 @@ static const struct dmi_system_id irq1_level_low_skip_override[] = { DMI_MATCH(DMI_BOARD_NAME, "B1402CBA"), }, }, + { + /* Asus ExpertBook B1402CVA */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_BOARD_NAME, "B1402CVA"), + }, + }, { /* Asus ExpertBook B1502CBA */ .matches = { From b85e2dab33ce467e8dcf1cb6c0c587132ff17f56 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 15 Nov 2023 11:47:51 -0500 Subject: [PATCH 075/234] PM: tools: Fix sleepgraph syntax error The sleepgraph tool currently fails: File "/usr/bin/sleepgraph", line 4155 or re.match('psci: CPU(?P[0-9]*) killed.*', msg)): ^ SyntaxError: unmatched ')' Fixes: 34ea427e01ea ("PM: tools: sleepgraph: Recognize "CPU killed" messages") Signed-off-by: David Woodhouse Reviewed-by: Wolfram Sang Signed-off-by: Rafael J. Wysocki --- tools/power/pm-graph/sleepgraph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index 4a356a706785..40ad221e8881 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -4151,7 +4151,7 @@ def parseKernelLog(data): elif(re.match('Enabling non-boot CPUs .*', msg)): # start of first cpu resume cpu_start = ktime - elif(re.match('smpboot: CPU (?P[0-9]*) is now offline', msg)) \ + elif(re.match('smpboot: CPU (?P[0-9]*) is now offline', msg) \ or re.match('psci: CPU(?P[0-9]*) killed.*', msg)): # end of a cpu suspend, start of the next m = re.match('smpboot: CPU (?P[0-9]*) is now offline', msg) From 1b59860540a4018e8071dc18d4893ec389506b7d Mon Sep 17 00:00:00 2001 From: Li Nan Date: Fri, 17 Nov 2023 00:23:14 +0800 Subject: [PATCH 076/234] nbd: fold nbd config initialization into nbd_alloc_config() There are no functional changes, make the code cleaner and prepare to fix null-ptr-dereference while accessing 'nbd->config'. Signed-off-by: Li Nan Reviewed-by: Josef Bacik Link: https://lore.kernel.org/r/20231116162316.1740402-2-linan666@huaweicloud.com Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 41 +++++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 855fdf5c3b4e..02f844832d91 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1530,17 +1530,20 @@ static int nbd_ioctl(struct block_device *bdev, blk_mode_t mode, return error; } -static struct nbd_config *nbd_alloc_config(void) +static int nbd_alloc_and_init_config(struct nbd_device *nbd) { struct nbd_config *config; + if (WARN_ON(nbd->config)) + return -EINVAL; + if (!try_module_get(THIS_MODULE)) - return ERR_PTR(-ENODEV); + return -ENODEV; config = kzalloc(sizeof(struct nbd_config), GFP_NOFS); if (!config) { module_put(THIS_MODULE); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } atomic_set(&config->recv_threads, 0); @@ -1548,7 +1551,10 @@ static struct nbd_config *nbd_alloc_config(void) init_waitqueue_head(&config->conn_wait); config->blksize_bits = NBD_DEF_BLKSIZE_BITS; atomic_set(&config->live_connections, 0); - return config; + nbd->config = config; + refcount_set(&nbd->config_refs, 1); + + return 0; } static int nbd_open(struct gendisk *disk, blk_mode_t mode) @@ -1567,21 +1573,17 @@ static int nbd_open(struct gendisk *disk, blk_mode_t mode) goto out; } if (!refcount_inc_not_zero(&nbd->config_refs)) { - struct nbd_config *config; - mutex_lock(&nbd->config_lock); if (refcount_inc_not_zero(&nbd->config_refs)) { mutex_unlock(&nbd->config_lock); goto out; } - config = nbd_alloc_config(); - if (IS_ERR(config)) { - ret = PTR_ERR(config); + ret = nbd_alloc_and_init_config(nbd); + if (ret) { mutex_unlock(&nbd->config_lock); goto out; } - nbd->config = config; - refcount_set(&nbd->config_refs, 1); + refcount_inc(&nbd->refs); mutex_unlock(&nbd->config_lock); if (max_part) @@ -1990,22 +1992,17 @@ again: pr_err("nbd%d already in use\n", index); return -EBUSY; } - if (WARN_ON(nbd->config)) { - mutex_unlock(&nbd->config_lock); - nbd_put(nbd); - return -EINVAL; - } - config = nbd_alloc_config(); - if (IS_ERR(config)) { + + ret = nbd_alloc_and_init_config(nbd); + if (ret) { mutex_unlock(&nbd->config_lock); nbd_put(nbd); pr_err("couldn't allocate config\n"); - return PTR_ERR(config); + return ret; } - nbd->config = config; - refcount_set(&nbd->config_refs, 1); - set_bit(NBD_RT_BOUND, &config->runtime_flags); + config = nbd->config; + set_bit(NBD_RT_BOUND, &config->runtime_flags); ret = nbd_genl_size_set(info, nbd); if (ret) goto out; From 3123ac77923341774ca3ad1196ad20bb0732bf70 Mon Sep 17 00:00:00 2001 From: Li Nan Date: Fri, 17 Nov 2023 00:23:15 +0800 Subject: [PATCH 077/234] nbd: factor out a helper to get nbd_config without holding 'config_lock' There are no functional changes, just to make code cleaner and prepare to fix null-ptr-dereference while accessing 'nbd->config'. Signed-off-by: Li Nan Reviewed-by: Josef Bacik Link: https://lore.kernel.org/r/20231116162316.1740402-3-linan666@huaweicloud.com Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 02f844832d91..daaf8805e876 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -395,6 +395,14 @@ static u32 req_to_nbd_cmd_type(struct request *req) } } +static struct nbd_config *nbd_get_config_unlocked(struct nbd_device *nbd) +{ + if (refcount_inc_not_zero(&nbd->config_refs)) + return nbd->config; + + return NULL; +} + static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req) { struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); @@ -409,13 +417,13 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req) return BLK_EH_DONE; } - if (!refcount_inc_not_zero(&nbd->config_refs)) { + config = nbd_get_config_unlocked(nbd); + if (!config) { cmd->status = BLK_STS_TIMEOUT; __clear_bit(NBD_CMD_INFLIGHT, &cmd->flags); mutex_unlock(&cmd->lock); goto done; } - config = nbd->config; if (config->num_connections > 1 || (config->num_connections == 1 && nbd->tag_set.timeout)) { @@ -977,12 +985,12 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index) struct nbd_sock *nsock; int ret; - if (!refcount_inc_not_zero(&nbd->config_refs)) { + config = nbd_get_config_unlocked(nbd); + if (!config) { dev_err_ratelimited(disk_to_dev(nbd->disk), "Socks array is empty\n"); return -EINVAL; } - config = nbd->config; if (index >= config->num_connections) { dev_err_ratelimited(disk_to_dev(nbd->disk), @@ -1560,6 +1568,7 @@ static int nbd_alloc_and_init_config(struct nbd_device *nbd) static int nbd_open(struct gendisk *disk, blk_mode_t mode) { struct nbd_device *nbd; + struct nbd_config *config; int ret = 0; mutex_lock(&nbd_index_mutex); @@ -1572,7 +1581,9 @@ static int nbd_open(struct gendisk *disk, blk_mode_t mode) ret = -ENXIO; goto out; } - if (!refcount_inc_not_zero(&nbd->config_refs)) { + + config = nbd_get_config_unlocked(nbd); + if (!config) { mutex_lock(&nbd->config_lock); if (refcount_inc_not_zero(&nbd->config_refs)) { mutex_unlock(&nbd->config_lock); @@ -1588,7 +1599,7 @@ static int nbd_open(struct gendisk *disk, blk_mode_t mode) mutex_unlock(&nbd->config_lock); if (max_part) set_bit(GD_NEED_PART_SCAN, &disk->state); - } else if (nbd_disconnected(nbd->config)) { + } else if (nbd_disconnected(config)) { if (max_part) set_bit(GD_NEED_PART_SCAN, &disk->state); } @@ -2205,7 +2216,8 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) } mutex_unlock(&nbd_index_mutex); - if (!refcount_inc_not_zero(&nbd->config_refs)) { + config = nbd_get_config_unlocked(nbd); + if (!config) { dev_err(nbd_to_dev(nbd), "not configured, cannot reconfigure\n"); nbd_put(nbd); @@ -2213,7 +2225,6 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) } mutex_lock(&nbd->config_lock); - config = nbd->config; if (!test_bit(NBD_RT_BOUND, &config->runtime_flags) || !nbd->pid) { dev_err(nbd_to_dev(nbd), From c2da049f419417808466c529999170f5c3ef7d3d Mon Sep 17 00:00:00 2001 From: Li Nan Date: Fri, 17 Nov 2023 00:23:16 +0800 Subject: [PATCH 078/234] nbd: fix null-ptr-dereference while accessing 'nbd->config' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Memory reordering may occur in nbd_genl_connect(), causing config_refs to be set to 1 while nbd->config is still empty. Opening nbd at this time will cause null-ptr-dereference. T1 T2 nbd_open nbd_get_config_unlocked nbd_genl_connect nbd_alloc_and_init_config //memory reordered refcount_set(&nbd->config_refs, 1) // 2 nbd->config ->null point nbd->config = config // 1 Fix it by adding smp barrier to guarantee the execution sequence. Signed-off-by: Li Nan Reviewed-by: Josef Bacik Link: https://lore.kernel.org/r/20231116162316.1740402-4-linan666@huaweicloud.com Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index daaf8805e876..3f03cb3dc33c 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -397,8 +397,16 @@ static u32 req_to_nbd_cmd_type(struct request *req) static struct nbd_config *nbd_get_config_unlocked(struct nbd_device *nbd) { - if (refcount_inc_not_zero(&nbd->config_refs)) + if (refcount_inc_not_zero(&nbd->config_refs)) { + /* + * Add smp_mb__after_atomic to ensure that reading nbd->config_refs + * and reading nbd->config is ordered. The pair is the barrier in + * nbd_alloc_and_init_config(), avoid nbd->config_refs is set + * before nbd->config. + */ + smp_mb__after_atomic(); return nbd->config; + } return NULL; } @@ -1559,7 +1567,15 @@ static int nbd_alloc_and_init_config(struct nbd_device *nbd) init_waitqueue_head(&config->conn_wait); config->blksize_bits = NBD_DEF_BLKSIZE_BITS; atomic_set(&config->live_connections, 0); + nbd->config = config; + /* + * Order refcount_set(&nbd->config_refs, 1) and nbd->config assignment, + * its pair is the barrier in nbd_get_config_unlocked(). + * So nbd_get_config_unlocked() won't see nbd->config as null after + * refcount_inc_not_zero() succeed. + */ + smp_mb__before_atomic(); refcount_set(&nbd->config_refs, 1); return 0; From c96b8175522a2c52e297ee0a49827a668f95e1e8 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 20 Nov 2023 16:06:11 +0900 Subject: [PATCH 079/234] block: Remove blk_set_runtime_active() The function blk_set_runtime_active() is called only from blk_post_runtime_resume(), so there is no need for that function to be exported. Open-code this function directly in blk_post_runtime_resume() and remove it. Signed-off-by: Damien Le Moal Reviewed-by: Bart Van Assche Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20231120070611.33951-1-dlemoal@kernel.org Signed-off-by: Jens Axboe --- block/blk-pm.c | 33 +++++---------------------------- include/linux/blk-pm.h | 1 - 2 files changed, 5 insertions(+), 29 deletions(-) diff --git a/block/blk-pm.c b/block/blk-pm.c index 6b72b2e03fc8..42e842074715 100644 --- a/block/blk-pm.c +++ b/block/blk-pm.c @@ -163,38 +163,15 @@ EXPORT_SYMBOL(blk_pre_runtime_resume); * @q: the queue of the device * * Description: - * For historical reasons, this routine merely calls blk_set_runtime_active() - * to do the real work of restarting the queue. It does this regardless of - * whether the device's runtime-resume succeeded; even if it failed the + * Restart the queue of a runtime suspended device. It does this regardless + * of whether the device's runtime-resume succeeded; even if it failed the * driver or error handler will need to communicate with the device. * * This function should be called near the end of the device's - * runtime_resume callback. + * runtime_resume callback to correct queue runtime PM status and re-enable + * peeking requests from the queue. */ void blk_post_runtime_resume(struct request_queue *q) -{ - blk_set_runtime_active(q); -} -EXPORT_SYMBOL(blk_post_runtime_resume); - -/** - * blk_set_runtime_active - Force runtime status of the queue to be active - * @q: the queue of the device - * - * If the device is left runtime suspended during system suspend the resume - * hook typically resumes the device and corrects runtime status - * accordingly. However, that does not affect the queue runtime PM status - * which is still "suspended". This prevents processing requests from the - * queue. - * - * This function can be used in driver's resume hook to correct queue - * runtime PM status and re-enable peeking requests from the queue. It - * should be called before first request is added to the queue. - * - * This function is also called by blk_post_runtime_resume() for - * runtime resumes. It does everything necessary to restart the queue. - */ -void blk_set_runtime_active(struct request_queue *q) { int old_status; @@ -211,4 +188,4 @@ void blk_set_runtime_active(struct request_queue *q) if (old_status != RPM_ACTIVE) blk_clear_pm_only(q); } -EXPORT_SYMBOL(blk_set_runtime_active); +EXPORT_SYMBOL(blk_post_runtime_resume); diff --git a/include/linux/blk-pm.h b/include/linux/blk-pm.h index 2580e05a8ab6..004b38a538ff 100644 --- a/include/linux/blk-pm.h +++ b/include/linux/blk-pm.h @@ -15,7 +15,6 @@ extern int blk_pre_runtime_suspend(struct request_queue *q); extern void blk_post_runtime_suspend(struct request_queue *q, int err); extern void blk_pre_runtime_resume(struct request_queue *q); extern void blk_post_runtime_resume(struct request_queue *q); -extern void blk_set_runtime_active(struct request_queue *q); #else static inline void blk_pm_runtime_init(struct request_queue *q, struct device *dev) {} From 616add70bfdc0274a253e84fc78155c27aacde91 Mon Sep 17 00:00:00 2001 From: Mark O'Donovan Date: Wed, 11 Oct 2023 08:45:11 +0000 Subject: [PATCH 080/234] nvme-auth: unlock mutex in one place only Signed-off-by: Mark O'Donovan Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Signed-off-by: Keith Busch --- drivers/nvme/host/auth.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index 48328e36e93b..0f5ea63d3c8d 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -757,12 +757,11 @@ static void nvme_queue_auth_work(struct work_struct *work) __func__, chap->qid); mutex_lock(&ctrl->dhchap_auth_mutex); ret = nvme_auth_dhchap_setup_host_response(ctrl, chap); + mutex_unlock(&ctrl->dhchap_auth_mutex); if (ret) { - mutex_unlock(&ctrl->dhchap_auth_mutex); chap->error = ret; goto fail2; } - mutex_unlock(&ctrl->dhchap_auth_mutex); /* DH-HMAC-CHAP Step 3: send reply */ dev_dbg(ctrl->device, "%s: qid %d send reply\n", From 38ce1570e2c46e7e9af983aa337edd7e43723aa2 Mon Sep 17 00:00:00 2001 From: Mark O'Donovan Date: Wed, 11 Oct 2023 08:45:12 +0000 Subject: [PATCH 081/234] nvme-auth: set explanation code for failure2 msgs Some error cases were not setting an auth-failure-reason-code-explanation. This means an AUTH_Failure2 message will be sent with an explanation value of 0 which is a reserved value. Signed-off-by: Mark O'Donovan Reviewed-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/auth.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index 0f5ea63d3c8d..72c0525c75f5 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -838,6 +838,8 @@ static void nvme_queue_auth_work(struct work_struct *work) } fail2: + if (chap->status == 0) + chap->status = NVME_AUTH_DHCHAP_FAILURE_FAILED; dev_dbg(ctrl->device, "%s: qid %d send failure2, status %x\n", __func__, chap->qid, chap->status); tl = nvme_auth_set_dhchap_failure2_data(ctrl, chap); From 23441536b63677cb2ed9b1637d8ca70315e44bd0 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 14 Nov 2023 14:18:21 +0100 Subject: [PATCH 082/234] nvme-tcp: only evaluate 'tls' option if TLS is selected We only need to evaluate the 'tls' connect option if TLS is enabled; otherwise we might be getting a link error. Fixes: 706add13676d ("nvme: keyring: fix conditional compilation") Reported-by: kernel test robot Closes: https://lore.kernel.org/r/202311140426.0eHrTXBr-lkp@intel.com/ Signed-off-by: Hannes Reinecke Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 89661a9cf850..6ed794815517 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1916,7 +1916,7 @@ static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl) int ret; key_serial_t pskid = 0; - if (ctrl->opts->tls) { + if (IS_ENABLED(CONFIG_NVME_TCP_TLS) && ctrl->opts->tls) { if (ctrl->opts->tls_key) pskid = key_serial(ctrl->opts->tls_key); else From cd9aed606088d36a7ffff3e808db4e76b1854285 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 14 Nov 2023 14:27:01 +0100 Subject: [PATCH 083/234] nvme: catch errors from nvme_configure_metadata() nvme_configure_metadata() is issuing I/O, so we might incur an I/O error which will cause the connection to be reset. But in that case any further probing will race with reset and cause UAF errors. So return a status from nvme_configure_metadata() and abort probing if there was an I/O error. Signed-off-by: Hannes Reinecke Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 88b54cdcbd68..fd28e6b6574c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1814,16 +1814,18 @@ set_pi: return ret; } -static void nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) +static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) { struct nvme_ctrl *ctrl = ns->ctrl; + int ret; - if (nvme_init_ms(ns, id)) - return; + ret = nvme_init_ms(ns, id); + if (ret) + return ret; ns->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS); if (!ns->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) - return; + return 0; if (ctrl->ops->flags & NVME_F_FABRICS) { /* @@ -1832,7 +1834,7 @@ static void nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) * remap the separate metadata buffer from the block layer. */ if (WARN_ON_ONCE(!(id->flbas & NVME_NS_FLBAS_META_EXT))) - return; + return 0; ns->features |= NVME_NS_EXT_LBAS; @@ -1859,6 +1861,7 @@ static void nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) else ns->features |= NVME_NS_METADATA_SUPPORTED; } + return 0; } static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, @@ -2032,7 +2035,11 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, ns->lba_shift = id->lbaf[lbaf].ds; nvme_set_queue_limits(ns->ctrl, ns->queue); - nvme_configure_metadata(ns, id); + ret = nvme_configure_metadata(ns, id); + if (ret < 0) { + blk_mq_unfreeze_queue(ns->disk->queue); + goto out; + } nvme_set_chunk_sectors(ns, id); nvme_update_disk_info(ns->disk, ns, id); From c7ca9757bda35ff9ce27ab42f2cb8b84d983e6ad Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 16 Nov 2023 13:14:35 +0100 Subject: [PATCH 084/234] nvme: blank out authentication fabrics options if not configured If the config option NVME_HOST_AUTH is not selected we should not accept the corresponding fabrics options. This allows userspace to detect if NVMe authentication has been enabled for the kernel. Cc: Shin'ichiro Kawasaki Fixes: f50fff73d620 ("nvme: implement In-Band authentication") Signed-off-by: Hannes Reinecke Tested-by: Shin'ichiro Kawasaki Reviewed-by: Daniel Wagner Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/fabrics.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 4673ead69c5f..aa88606a44c4 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -667,8 +667,10 @@ static const match_table_t opt_tokens = { #endif { NVMF_OPT_FAIL_FAST_TMO, "fast_io_fail_tmo=%d" }, { NVMF_OPT_DISCOVERY, "discovery" }, +#ifdef CONFIG_NVME_HOST_AUTH { NVMF_OPT_DHCHAP_SECRET, "dhchap_secret=%s" }, { NVMF_OPT_DHCHAP_CTRL_SECRET, "dhchap_ctrl_secret=%s" }, +#endif #ifdef CONFIG_NVME_TCP_TLS { NVMF_OPT_TLS, "tls" }, #endif From 1c22e0295a5eb571c27b53c7371f95699ef705ff Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Nov 2023 08:13:36 -0500 Subject: [PATCH 085/234] nvmet: nul-terminate the NQNs passed in the connect command The host and subsystem NQNs are passed in the connect command payload and interpreted as nul-terminated strings. Ensure they actually are nul-terminated before using them. Fixes: a07b4970f464 "nvmet: add a generic NVMe target") Reported-by: Alon Zahavi Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/fabrics-cmd.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 43b5bd8bb6a5..d8da840a1c0e 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -244,6 +244,8 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) goto out; } + d->subsysnqn[NVMF_NQN_FIELD_LEN - 1] = '\0'; + d->hostnqn[NVMF_NQN_FIELD_LEN - 1] = '\0'; status = nvmet_alloc_ctrl(d->subsysnqn, d->hostnqn, req, le32_to_cpu(c->kato), &ctrl); if (status) @@ -313,6 +315,8 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) goto out; } + d->subsysnqn[NVMF_NQN_FIELD_LEN - 1] = '\0'; + d->hostnqn[NVMF_NQN_FIELD_LEN - 1] = '\0'; ctrl = nvmet_ctrl_find_get(d->subsysnqn, d->hostnqn, le16_to_cpu(d->cntlid), req); if (!ctrl) { From 11b9d0b4999705105d1fb7f0e8ac969e0f41b1b8 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 20 Oct 2023 07:06:06 +0200 Subject: [PATCH 086/234] nvmet-tcp: always initialize tls_handshake_tmo_work The TLS handshake timeout work item should always be initialized to avoid a crash when cancelling the workqueue. Fixes: 675b453e0241 ("nvmet-tcp: enable TLS handshake upcall") Suggested-by: Maurizio Lombardi Signed-off-by: Hannes Reinecke Tested-by: Shin'ichiro Kawasaki Tested-by: Yi Zhang Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/tcp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 92b74d0b8686..4cc27856aa8f 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1854,6 +1854,8 @@ static int nvmet_tcp_tls_handshake(struct nvmet_tcp_queue *queue) } return ret; } +#else +static void nvmet_tcp_tls_handshake_timeout(struct work_struct *w) {} #endif static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port, @@ -1911,9 +1913,9 @@ static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port, list_add_tail(&queue->queue_list, &nvmet_tcp_queue_list); mutex_unlock(&nvmet_tcp_queue_mutex); -#ifdef CONFIG_NVME_TARGET_TCP_TLS INIT_DELAYED_WORK(&queue->tls_handshake_tmo_work, nvmet_tcp_tls_handshake_timeout); +#ifdef CONFIG_NVME_TARGET_TCP_TLS if (queue->state == NVMET_TCP_Q_TLS_HANDSHAKE) { struct sock *sk = queue->sock->sk; From 53f2bca2609237f910531f2c1a7779b16ce7952d Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Mon, 20 Nov 2023 03:25:21 +0000 Subject: [PATCH 087/234] block/null_blk: Fix double blk_mq_start_request() warning When CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION is enabled, null_queue_rq() would return BLK_STS_RESOURCE or BLK_STS_DEV_RESOURCE for the request, which has been marked as MQ_RQ_IN_FLIGHT by blk_mq_start_request(). Then null_queue_rqs() put these requests in the rqlist, return back to the block layer core, which would try to queue them individually again, so the warning in blk_mq_start_request() triggered. Fix it by splitting the null_queue_rq() into two parts: the first is the preparation of request, the second is the handling of request. We put the blk_mq_start_request() after the preparation part, which may fail and return back to the block layer core. The throttling also belongs to the preparation part, so move it before blk_mq_start_request(). And change the return type of null_handle_cmd() to void, since it always return BLK_STS_OK now. Reported-by: Closes: https://lore.kernel.org/all/0000000000000e6aac06098aee0c@google.com/ Fixes: d78bfa1346ab ("block/null_blk: add queue_rqs() support") Suggested-by: Bart Van Assche Signed-off-by: Chengming Zhou Link: https://lore.kernel.org/r/20231120032521.1012037-1-chengming.zhou@linux.dev Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 22a3cf7f32e2..3021d58ca51c 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1464,19 +1464,13 @@ blk_status_t null_process_cmd(struct nullb_cmd *cmd, enum req_op op, return BLK_STS_OK; } -static blk_status_t null_handle_cmd(struct nullb_cmd *cmd, sector_t sector, - sector_t nr_sectors, enum req_op op) +static void null_handle_cmd(struct nullb_cmd *cmd, sector_t sector, + sector_t nr_sectors, enum req_op op) { struct nullb_device *dev = cmd->nq->dev; struct nullb *nullb = dev->nullb; blk_status_t sts; - if (test_bit(NULLB_DEV_FL_THROTTLED, &dev->flags)) { - sts = null_handle_throttled(cmd); - if (sts != BLK_STS_OK) - return sts; - } - if (op == REQ_OP_FLUSH) { cmd->error = errno_to_blk_status(null_handle_flush(nullb)); goto out; @@ -1493,7 +1487,6 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd, sector_t sector, out: nullb_complete_cmd(cmd); - return BLK_STS_OK; } static enum hrtimer_restart nullb_bwtimer_fn(struct hrtimer *timer) @@ -1724,8 +1717,6 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, cmd->fake_timeout = should_timeout_request(rq) || blk_should_fake_timeout(rq->q); - blk_mq_start_request(rq); - if (should_requeue_request(rq)) { /* * Alternate between hitting the core BUSY path, and the @@ -1738,6 +1729,15 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_STS_OK; } + if (test_bit(NULLB_DEV_FL_THROTTLED, &nq->dev->flags)) { + blk_status_t sts = null_handle_throttled(cmd); + + if (sts != BLK_STS_OK) + return sts; + } + + blk_mq_start_request(rq); + if (is_poll) { spin_lock(&nq->poll_lock); list_add_tail(&rq->queuelist, &nq->poll_list); @@ -1747,7 +1747,8 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, if (cmd->fake_timeout) return BLK_STS_OK; - return null_handle_cmd(cmd, sector, nr_sectors, req_op(rq)); + null_handle_cmd(cmd, sector, nr_sectors, req_op(rq)); + return BLK_STS_OK; } static void null_queue_rqs(struct request **rqlist) From 79e0c5be8c73a674c92bd4ba77b75f4f8c91d32e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 14 Nov 2023 01:42:13 +0100 Subject: [PATCH 088/234] net, vrf: Move dstats structure to core Just move struct pcpu_dstats out of the vrf into the core, and streamline the field names slightly, so they better align with the {t,l}stats ones. No functional change otherwise. A conversion of the u64s to u64_stats_t could be done at a separate point in future. This move is needed as we are moving the {t,l,d}stats allocation/freeing to the core. Signed-off-by: Daniel Borkmann Reviewed-by: Nikolay Aleksandrov Cc: Jakub Kicinski Cc: David Ahern Link: https://lore.kernel.org/r/20231114004220.6495-2-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau --- drivers/net/vrf.c | 24 +++++++----------------- include/linux/netdevice.h | 10 ++++++++++ 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index db766941b78f..3e6e0fdc3ba7 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -121,22 +121,12 @@ struct net_vrf { int ifindex; }; -struct pcpu_dstats { - u64 tx_pkts; - u64 tx_bytes; - u64 tx_drps; - u64 rx_pkts; - u64 rx_bytes; - u64 rx_drps; - struct u64_stats_sync syncp; -}; - static void vrf_rx_stats(struct net_device *dev, int len) { struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); u64_stats_update_begin(&dstats->syncp); - dstats->rx_pkts++; + dstats->rx_packets++; dstats->rx_bytes += len; u64_stats_update_end(&dstats->syncp); } @@ -161,10 +151,10 @@ static void vrf_get_stats64(struct net_device *dev, do { start = u64_stats_fetch_begin(&dstats->syncp); tbytes = dstats->tx_bytes; - tpkts = dstats->tx_pkts; - tdrops = dstats->tx_drps; + tpkts = dstats->tx_packets; + tdrops = dstats->tx_drops; rbytes = dstats->rx_bytes; - rpkts = dstats->rx_pkts; + rpkts = dstats->rx_packets; } while (u64_stats_fetch_retry(&dstats->syncp, start)); stats->tx_bytes += tbytes; stats->tx_packets += tpkts; @@ -421,7 +411,7 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev, if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) vrf_rx_stats(dev, len); else - this_cpu_inc(dev->dstats->rx_drps); + this_cpu_inc(dev->dstats->rx_drops); return NETDEV_TX_OK; } @@ -616,11 +606,11 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev) struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); u64_stats_update_begin(&dstats->syncp); - dstats->tx_pkts++; + dstats->tx_packets++; dstats->tx_bytes += len; u64_stats_update_end(&dstats->syncp); } else { - this_cpu_inc(dev->dstats->tx_drps); + this_cpu_inc(dev->dstats->tx_drops); } return ret; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a16c9cc063fe..98082113156e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2755,6 +2755,16 @@ struct pcpu_sw_netstats { struct u64_stats_sync syncp; } __aligned(4 * sizeof(u64)); +struct pcpu_dstats { + u64 rx_packets; + u64 rx_bytes; + u64 rx_drops; + u64 tx_packets; + u64 tx_bytes; + u64 tx_drops; + struct u64_stats_sync syncp; +} __aligned(8 * sizeof(u64)); + struct pcpu_lstats { u64_stats_t packets; u64_stats_t bytes; From 34d21de99cea9cb17967874313e5b0262527833c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 14 Nov 2023 01:42:14 +0100 Subject: [PATCH 089/234] net: Move {l,t,d}stats allocation to core and convert veth & vrf Move {l,t,d}stats allocation to the core and let netdevs pick the stats type they need. That way the driver doesn't have to bother with error handling (allocation failure checking, making sure free happens in the right spot, etc) - all happening in the core. Co-developed-by: Jakub Kicinski Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann Reviewed-by: Nikolay Aleksandrov Cc: David Ahern Link: https://lore.kernel.org/r/20231114004220.6495-3-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau --- drivers/net/veth.c | 16 ++----------- drivers/net/vrf.c | 14 +++-------- include/linux/netdevice.h | 20 ++++++++++++---- net/core/dev.c | 49 ++++++++++++++++++++++++++++++++++++++- 4 files changed, 69 insertions(+), 30 deletions(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 9980517ed8b0..ac030c241d1a 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1506,25 +1506,12 @@ static void veth_free_queues(struct net_device *dev) static int veth_dev_init(struct net_device *dev) { - int err; - - dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats); - if (!dev->lstats) - return -ENOMEM; - - err = veth_alloc_queues(dev); - if (err) { - free_percpu(dev->lstats); - return err; - } - - return 0; + return veth_alloc_queues(dev); } static void veth_dev_free(struct net_device *dev) { veth_free_queues(dev); - free_percpu(dev->lstats); } #ifdef CONFIG_NET_POLL_CONTROLLER @@ -1796,6 +1783,7 @@ static void veth_setup(struct net_device *dev) NETIF_F_HW_VLAN_STAG_RX); dev->needs_free_netdev = true; dev->priv_destructor = veth_dev_free; + dev->pcpu_stat_type = NETDEV_PCPU_STAT_LSTATS; dev->max_mtu = ETH_MAX_MTU; dev->hw_features = VETH_FEATURES; diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 3e6e0fdc3ba7..bb95ce43cd97 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1164,22 +1164,15 @@ static void vrf_dev_uninit(struct net_device *dev) vrf_rtable_release(dev, vrf); vrf_rt6_release(dev, vrf); - - free_percpu(dev->dstats); - dev->dstats = NULL; } static int vrf_dev_init(struct net_device *dev) { struct net_vrf *vrf = netdev_priv(dev); - dev->dstats = netdev_alloc_pcpu_stats(struct pcpu_dstats); - if (!dev->dstats) - goto out_nomem; - /* create the default dst which points back to us */ if (vrf_rtable_create(dev) != 0) - goto out_stats; + goto out_nomem; if (vrf_rt6_create(dev) != 0) goto out_rth; @@ -1193,9 +1186,6 @@ static int vrf_dev_init(struct net_device *dev) out_rth: vrf_rtable_release(dev, vrf); -out_stats: - free_percpu(dev->dstats); - dev->dstats = NULL; out_nomem: return -ENOMEM; } @@ -1694,6 +1684,8 @@ static void vrf_setup(struct net_device *dev) dev->min_mtu = IPV6_MIN_MTU; dev->max_mtu = IP6_MAX_MTU; dev->mtu = dev->max_mtu; + + dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS; } static int vrf_validate(struct nlattr *tb[], struct nlattr *data[], diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 98082113156e..2564e209465e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1797,6 +1797,13 @@ enum netdev_ml_priv_type { ML_PRIV_CAN, }; +enum netdev_stat_type { + NETDEV_PCPU_STAT_NONE, + NETDEV_PCPU_STAT_LSTATS, /* struct pcpu_lstats */ + NETDEV_PCPU_STAT_TSTATS, /* struct pcpu_sw_netstats */ + NETDEV_PCPU_STAT_DSTATS, /* struct pcpu_dstats */ +}; + /** * struct net_device - The DEVICE structure. * @@ -1991,10 +1998,14 @@ enum netdev_ml_priv_type { * * @ml_priv: Mid-layer private * @ml_priv_type: Mid-layer private type - * @lstats: Loopback statistics - * @tstats: Tunnel statistics - * @dstats: Dummy statistics - * @vstats: Virtual ethernet statistics + * + * @pcpu_stat_type: Type of device statistics which the core should + * allocate/free: none, lstats, tstats, dstats. none + * means the driver is handling statistics allocation/ + * freeing internally. + * @lstats: Loopback statistics: packets, bytes + * @tstats: Tunnel statistics: RX/TX packets, RX/TX bytes + * @dstats: Dummy statistics: RX/TX/drop packets, RX/TX bytes * * @garp_port: GARP * @mrp_port: MRP @@ -2354,6 +2365,7 @@ struct net_device { void *ml_priv; enum netdev_ml_priv_type ml_priv_type; + enum netdev_stat_type pcpu_stat_type:8; union { struct pcpu_lstats __percpu *lstats; struct pcpu_sw_netstats __percpu *tstats; diff --git a/net/core/dev.c b/net/core/dev.c index af53f6d838ce..0cc6e283edba 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10051,6 +10051,46 @@ void netif_tx_stop_all_queues(struct net_device *dev) } EXPORT_SYMBOL(netif_tx_stop_all_queues); +static int netdev_do_alloc_pcpu_stats(struct net_device *dev) +{ + void __percpu *v; + + switch (dev->pcpu_stat_type) { + case NETDEV_PCPU_STAT_NONE: + return 0; + case NETDEV_PCPU_STAT_LSTATS: + v = dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats); + break; + case NETDEV_PCPU_STAT_TSTATS: + v = dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + break; + case NETDEV_PCPU_STAT_DSTATS: + v = dev->dstats = netdev_alloc_pcpu_stats(struct pcpu_dstats); + break; + default: + return -EINVAL; + } + + return v ? 0 : -ENOMEM; +} + +static void netdev_do_free_pcpu_stats(struct net_device *dev) +{ + switch (dev->pcpu_stat_type) { + case NETDEV_PCPU_STAT_NONE: + return; + case NETDEV_PCPU_STAT_LSTATS: + free_percpu(dev->lstats); + break; + case NETDEV_PCPU_STAT_TSTATS: + free_percpu(dev->tstats); + break; + case NETDEV_PCPU_STAT_DSTATS: + free_percpu(dev->dstats); + break; + } +} + /** * register_netdevice() - register a network device * @dev: device to register @@ -10111,9 +10151,13 @@ int register_netdevice(struct net_device *dev) goto err_uninit; } + ret = netdev_do_alloc_pcpu_stats(dev); + if (ret) + goto err_uninit; + ret = dev_index_reserve(net, dev->ifindex); if (ret < 0) - goto err_uninit; + goto err_free_pcpu; dev->ifindex = ret; /* Transfer changeable features to wanted_features and enable @@ -10219,6 +10263,8 @@ err_uninit_notify: call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev); err_ifindex_release: dev_index_release(net, dev->ifindex); +err_free_pcpu: + netdev_do_free_pcpu_stats(dev); err_uninit: if (dev->netdev_ops->ndo_uninit) dev->netdev_ops->ndo_uninit(dev); @@ -10471,6 +10517,7 @@ void netdev_run_todo(void) WARN_ON(rcu_access_pointer(dev->ip_ptr)); WARN_ON(rcu_access_pointer(dev->ip6_ptr)); + netdev_do_free_pcpu_stats(dev); if (dev->priv_destructor) dev->priv_destructor(dev); if (dev->needs_free_netdev) From ae1658272c6491a31ac968e39882fc569f312ac3 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 14 Nov 2023 01:42:15 +0100 Subject: [PATCH 090/234] netkit: Add tstats per-CPU traffic counters Add dev->tstats traffic accounting to netkit. The latter contains per-CPU RX and TX counters. The dev's TX counters are bumped upon pass/unspec as well as redirect verdicts, in other words, on everything except for drops. The dev's RX counters are bumped upon successful __netif_rx(), as well as from skb_do_redirect() (not part of this commit here). Using dev->lstats with having just a single packets/bytes counter and inferring one another's RX counters from the peer dev's lstats is not possible given skb_do_redirect() can also bump the device's stats. Signed-off-by: Daniel Borkmann Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20231114004220.6495-4-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau --- drivers/net/netkit.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c index 5a0f86f38f09..99de11f9cde5 100644 --- a/drivers/net/netkit.c +++ b/drivers/net/netkit.c @@ -68,6 +68,7 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev) netdev_tx_t ret_dev = NET_XMIT_SUCCESS; const struct bpf_mprog_entry *entry; struct net_device *peer; + int len = skb->len; rcu_read_lock(); peer = rcu_dereference(nk->peer); @@ -85,15 +86,22 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev) case NETKIT_PASS: skb->protocol = eth_type_trans(skb, skb->dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); - __netif_rx(skb); + if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) { + dev_sw_netstats_tx_add(dev, 1, len); + dev_sw_netstats_rx_add(peer, len); + } else { + goto drop_stats; + } break; case NETKIT_REDIRECT: + dev_sw_netstats_tx_add(dev, 1, len); skb_do_redirect(skb); break; case NETKIT_DROP: default: drop: kfree_skb(skb); +drop_stats: dev_core_stats_tx_dropped_inc(dev); ret_dev = NET_XMIT_DROP; break; @@ -174,6 +182,13 @@ static struct net_device *netkit_peer_dev(struct net_device *dev) return rcu_dereference(netkit_priv(dev)->peer); } +static void netkit_get_stats(struct net_device *dev, + struct rtnl_link_stats64 *stats) +{ + dev_fetch_sw_netstats(stats, dev->tstats); + stats->tx_dropped = DEV_STATS_READ(dev, tx_dropped); +} + static void netkit_uninit(struct net_device *dev); static const struct net_device_ops netkit_netdev_ops = { @@ -184,6 +199,7 @@ static const struct net_device_ops netkit_netdev_ops = { .ndo_set_rx_headroom = netkit_set_headroom, .ndo_get_iflink = netkit_get_iflink, .ndo_get_peer_dev = netkit_peer_dev, + .ndo_get_stats64 = netkit_get_stats, .ndo_uninit = netkit_uninit, .ndo_features_check = passthru_features_check, }; @@ -218,6 +234,7 @@ static void netkit_setup(struct net_device *dev) ether_setup(dev); dev->max_mtu = ETH_MAX_MTU; + dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; dev->flags |= IFF_NOARP; dev->priv_flags &= ~IFF_TX_SKB_SHARING; From 6f2684bf2b4460c84d0d34612a939f78b96b03fc Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Tue, 14 Nov 2023 01:42:16 +0100 Subject: [PATCH 091/234] veth: Use tstats per-CPU traffic counters Currently veth devices use the lstats per-CPU traffic counters, which only cover TX traffic. veth_get_stats64() actually populates RX stats of a veth device from its peer's TX counters, based on the assumption that a veth device can _only_ receive packets from its peer, which is no longer true: For example, recent CNIs (like Cilium) can use the bpf_redirect_peer() BPF helper to redirect traffic from NIC's tc ingress to veth's tc ingress (in a different netns), skipping veth's peer device. Unfortunately, this kind of traffic isn't currently accounted for in veth's RX stats. In preparation for the fix, use tstats (instead of lstats) to maintain both RX and TX counters for each veth device. We'll use RX counters for bpf_redirect_peer() traffic, and keep using TX counters for the usual "peer-to-peer" traffic. In veth_get_stats64(), calculate RX stats by _adding_ RX count to peer's TX count, in order to cover both kinds of traffic. veth_stats_rx() might need a name change (perhaps to "veth_stats_xdp()") for less confusion, but let's leave it to another patch to keep the fix minimal. Signed-off-by: Peilin Ye Co-developed-by: Daniel Borkmann Signed-off-by: Daniel Borkmann Reviewed-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20231114004220.6495-5-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau --- drivers/net/veth.c | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index ac030c241d1a..6cc352296c67 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -373,7 +373,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) skb_tx_timestamp(skb); if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) { if (!use_napi) - dev_lstats_add(dev, length); + dev_sw_netstats_tx_add(dev, 1, length); else __veth_xdp_flush(rq); } else { @@ -387,14 +387,6 @@ drop: return ret; } -static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes) -{ - struct veth_priv *priv = netdev_priv(dev); - - dev_lstats_read(dev, packets, bytes); - return atomic64_read(&priv->dropped); -} - static void veth_stats_rx(struct veth_stats *result, struct net_device *dev) { struct veth_priv *priv = netdev_priv(dev); @@ -432,24 +424,24 @@ static void veth_get_stats64(struct net_device *dev, struct veth_priv *priv = netdev_priv(dev); struct net_device *peer; struct veth_stats rx; - u64 packets, bytes; - tot->tx_dropped = veth_stats_tx(dev, &packets, &bytes); - tot->tx_bytes = bytes; - tot->tx_packets = packets; + tot->tx_dropped = atomic64_read(&priv->dropped); + dev_fetch_sw_netstats(tot, dev->tstats); veth_stats_rx(&rx, dev); tot->tx_dropped += rx.xdp_tx_err; tot->rx_dropped = rx.rx_drops + rx.peer_tq_xdp_xmit_err; - tot->rx_bytes = rx.xdp_bytes; - tot->rx_packets = rx.xdp_packets; + tot->rx_bytes += rx.xdp_bytes; + tot->rx_packets += rx.xdp_packets; rcu_read_lock(); peer = rcu_dereference(priv->peer); if (peer) { - veth_stats_tx(peer, &packets, &bytes); - tot->rx_bytes += bytes; - tot->rx_packets += packets; + struct rtnl_link_stats64 tot_peer = {}; + + dev_fetch_sw_netstats(&tot_peer, peer->tstats); + tot->rx_bytes += tot_peer.tx_bytes; + tot->rx_packets += tot_peer.tx_packets; veth_stats_rx(&rx, peer); tot->tx_dropped += rx.peer_tq_xdp_xmit_err; @@ -1783,7 +1775,7 @@ static void veth_setup(struct net_device *dev) NETIF_F_HW_VLAN_STAG_RX); dev->needs_free_netdev = true; dev->priv_destructor = veth_dev_free; - dev->pcpu_stat_type = NETDEV_PCPU_STAT_LSTATS; + dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; dev->max_mtu = ETH_MAX_MTU; dev->hw_features = VETH_FEATURES; From 024ee930cb3c9ae49e4266aee89cfde0ebb407e1 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Tue, 14 Nov 2023 01:42:17 +0100 Subject: [PATCH 092/234] bpf: Fix dev's rx stats for bpf_redirect_peer traffic Traffic redirected by bpf_redirect_peer() (used by recent CNIs like Cilium) is not accounted for in the RX stats of supported devices (that is, veth and netkit), confusing user space metrics collectors such as cAdvisor [0], as reported by Youlun. Fix it by calling dev_sw_netstats_rx_add() in skb_do_redirect(), to update RX traffic counters. Devices that support ndo_get_peer_dev _must_ use the @tstats per-CPU counters (instead of @lstats, or @dstats). To make this more fool-proof, error out when ndo_get_peer_dev is set but @tstats are not selected. [0] Specifically, the "container_network_receive_{byte,packet}s_total" counters are affected. Fixes: 9aa1206e8f48 ("bpf: Add redirect_peer helper") Reported-by: Youlun Zhang Signed-off-by: Peilin Ye Co-developed-by: Daniel Borkmann Signed-off-by: Daniel Borkmann Reviewed-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20231114004220.6495-6-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau --- net/core/dev.c | 8 ++++++++ net/core/filter.c | 1 + 2 files changed, 9 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index 0cc6e283edba..c879246be48d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10055,6 +10055,14 @@ static int netdev_do_alloc_pcpu_stats(struct net_device *dev) { void __percpu *v; + /* Drivers implementing ndo_get_peer_dev must support tstat + * accounting, so that skb_do_redirect() can bump the dev's + * RX stats upon network namespace switch. + */ + if (dev->netdev_ops->ndo_get_peer_dev && + dev->pcpu_stat_type != NETDEV_PCPU_STAT_TSTATS) + return -EOPNOTSUPP; + switch (dev->pcpu_stat_type) { case NETDEV_PCPU_STAT_NONE: return 0; diff --git a/net/core/filter.c b/net/core/filter.c index 383f96b0a1c7..cca810987c8d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2492,6 +2492,7 @@ int skb_do_redirect(struct sk_buff *skb) net_eq(net, dev_net(dev)))) goto out_drop; skb->dev = dev; + dev_sw_netstats_rx_add(dev, skb->len); return -EAGAIN; } return flags & BPF_F_NEIGH ? From 2c225425704078282e152ba692649237f78b3d7a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 14 Nov 2023 01:42:18 +0100 Subject: [PATCH 093/234] bpf, netkit: Add indirect call wrapper for fetching peer dev ndo_get_peer_dev is used in tcx BPF fast path, therefore make use of indirect call wrapper and therefore optimize the bpf_redirect_peer() internal handling a bit. Add a small skb_get_peer_dev() wrapper which utilizes the INDIRECT_CALL_1() macro instead of open coding. Future work could potentially add a peer pointer directly into struct net_device in future and convert veth and netkit over to use it so that eventually ndo_get_peer_dev can be removed. Co-developed-by: Nikolay Aleksandrov Signed-off-by: Nikolay Aleksandrov Signed-off-by: Daniel Borkmann Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20231114004220.6495-7-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau --- drivers/net/netkit.c | 3 ++- include/net/netkit.h | 6 ++++++ net/core/filter.c | 18 +++++++++++++----- 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c index 99de11f9cde5..97bd6705c241 100644 --- a/drivers/net/netkit.c +++ b/drivers/net/netkit.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -177,7 +178,7 @@ out: rcu_read_unlock(); } -static struct net_device *netkit_peer_dev(struct net_device *dev) +INDIRECT_CALLABLE_SCOPE struct net_device *netkit_peer_dev(struct net_device *dev) { return rcu_dereference(netkit_priv(dev)->peer); } diff --git a/include/net/netkit.h b/include/net/netkit.h index 0ba2e6b847ca..9ec0163739f4 100644 --- a/include/net/netkit.h +++ b/include/net/netkit.h @@ -10,6 +10,7 @@ int netkit_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog); int netkit_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); int netkit_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog); int netkit_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); +INDIRECT_CALLABLE_DECLARE(struct net_device *netkit_peer_dev(struct net_device *dev)); #else static inline int netkit_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) @@ -34,5 +35,10 @@ static inline int netkit_prog_query(const union bpf_attr *attr, { return -EINVAL; } + +static inline struct net_device *netkit_peer_dev(struct net_device *dev) +{ + return NULL; +} #endif /* CONFIG_NETKIT */ #endif /* __NET_NETKIT_H */ diff --git a/net/core/filter.c b/net/core/filter.c index cca810987c8d..7e4d7c3bcc84 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -81,6 +81,7 @@ #include #include #include +#include #include #include "dev.h" @@ -2468,6 +2469,16 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = { DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info); EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info); +static struct net_device *skb_get_peer_dev(struct net_device *dev) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (likely(ops->ndo_get_peer_dev)) + return INDIRECT_CALL_1(ops->ndo_get_peer_dev, + netkit_peer_dev, dev); + return NULL; +} + int skb_do_redirect(struct sk_buff *skb) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); @@ -2481,12 +2492,9 @@ int skb_do_redirect(struct sk_buff *skb) if (unlikely(!dev)) goto out_drop; if (flags & BPF_F_PEER) { - const struct net_device_ops *ops = dev->netdev_ops; - - if (unlikely(!ops->ndo_get_peer_dev || - !skb_at_tc_ingress(skb))) + if (unlikely(!skb_at_tc_ingress(skb))) goto out_drop; - dev = ops->ndo_get_peer_dev(dev); + dev = skb_get_peer_dev(dev); if (unlikely(!dev || !(dev->flags & IFF_UP) || net_eq(net, dev_net(dev)))) From eee82da79f036bb49ff80d3088b9530e3c2e57eb Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 14 Nov 2023 01:42:19 +0100 Subject: [PATCH 094/234] selftests/bpf: De-veth-ize the tc_redirect test case No functional changes to the test case, but just renaming various functions, variables, etc, to remove veth part of their name for making it more generic and reusable later on (e.g. for netkit). Signed-off-by: Daniel Borkmann Acked-by: Stanislav Fomichev Reviewed-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20231114004220.6495-8-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau --- .../selftests/bpf/prog_tests/tc_redirect.c | 263 +++++++++--------- 1 file changed, 137 insertions(+), 126 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index 6ee22c3b251a..407ff4e9bc78 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -110,11 +110,16 @@ static void netns_setup_namespaces_nofail(const char *verb) } } +enum dev_mode { + MODE_VETH, +}; + struct netns_setup_result { - int ifindex_veth_src; - int ifindex_veth_src_fwd; - int ifindex_veth_dst; - int ifindex_veth_dst_fwd; + enum dev_mode dev_mode; + int ifindex_src; + int ifindex_src_fwd; + int ifindex_dst; + int ifindex_dst_fwd; }; static int get_ifaddr(const char *name, char *ifaddr) @@ -140,55 +145,59 @@ static int get_ifaddr(const char *name, char *ifaddr) static int netns_setup_links_and_routes(struct netns_setup_result *result) { struct nstoken *nstoken = NULL; - char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {}; + char src_fwd_addr[IFADDR_STR_LEN+1] = {}; - SYS(fail, "ip link add veth_src type veth peer name veth_src_fwd"); - SYS(fail, "ip link add veth_dst type veth peer name veth_dst_fwd"); + if (result->dev_mode == MODE_VETH) { + SYS(fail, "ip link add src type veth peer name src_fwd"); + SYS(fail, "ip link add dst type veth peer name dst_fwd"); - SYS(fail, "ip link set veth_dst_fwd address " MAC_DST_FWD); - SYS(fail, "ip link set veth_dst address " MAC_DST); + SYS(fail, "ip link set dst_fwd address " MAC_DST_FWD); + SYS(fail, "ip link set dst address " MAC_DST); + } - if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr)) + if (get_ifaddr("src_fwd", src_fwd_addr)) goto fail; - result->ifindex_veth_src = if_nametoindex("veth_src"); - if (!ASSERT_GT(result->ifindex_veth_src, 0, "ifindex_veth_src")) + result->ifindex_src = if_nametoindex("src"); + if (!ASSERT_GT(result->ifindex_src, 0, "ifindex_src")) goto fail; - result->ifindex_veth_src_fwd = if_nametoindex("veth_src_fwd"); - if (!ASSERT_GT(result->ifindex_veth_src_fwd, 0, "ifindex_veth_src_fwd")) + result->ifindex_src_fwd = if_nametoindex("src_fwd"); + if (!ASSERT_GT(result->ifindex_src_fwd, 0, "ifindex_src_fwd")) goto fail; - result->ifindex_veth_dst = if_nametoindex("veth_dst"); - if (!ASSERT_GT(result->ifindex_veth_dst, 0, "ifindex_veth_dst")) + result->ifindex_dst = if_nametoindex("dst"); + if (!ASSERT_GT(result->ifindex_dst, 0, "ifindex_dst")) goto fail; - result->ifindex_veth_dst_fwd = if_nametoindex("veth_dst_fwd"); - if (!ASSERT_GT(result->ifindex_veth_dst_fwd, 0, "ifindex_veth_dst_fwd")) + result->ifindex_dst_fwd = if_nametoindex("dst_fwd"); + if (!ASSERT_GT(result->ifindex_dst_fwd, 0, "ifindex_dst_fwd")) goto fail; - SYS(fail, "ip link set veth_src netns " NS_SRC); - SYS(fail, "ip link set veth_src_fwd netns " NS_FWD); - SYS(fail, "ip link set veth_dst_fwd netns " NS_FWD); - SYS(fail, "ip link set veth_dst netns " NS_DST); + SYS(fail, "ip link set src netns " NS_SRC); + SYS(fail, "ip link set src_fwd netns " NS_FWD); + SYS(fail, "ip link set dst_fwd netns " NS_FWD); + SYS(fail, "ip link set dst netns " NS_DST); /** setup in 'src' namespace */ nstoken = open_netns(NS_SRC); if (!ASSERT_OK_PTR(nstoken, "setns src")) goto fail; - SYS(fail, "ip addr add " IP4_SRC "/32 dev veth_src"); - SYS(fail, "ip addr add " IP6_SRC "/128 dev veth_src nodad"); - SYS(fail, "ip link set dev veth_src up"); + SYS(fail, "ip addr add " IP4_SRC "/32 dev src"); + SYS(fail, "ip addr add " IP6_SRC "/128 dev src nodad"); + SYS(fail, "ip link set dev src up"); - SYS(fail, "ip route add " IP4_DST "/32 dev veth_src scope global"); - SYS(fail, "ip route add " IP4_NET "/16 dev veth_src scope global"); - SYS(fail, "ip route add " IP6_DST "/128 dev veth_src scope global"); + SYS(fail, "ip route add " IP4_DST "/32 dev src scope global"); + SYS(fail, "ip route add " IP4_NET "/16 dev src scope global"); + SYS(fail, "ip route add " IP6_DST "/128 dev src scope global"); - SYS(fail, "ip neigh add " IP4_DST " dev veth_src lladdr %s", - veth_src_fwd_addr); - SYS(fail, "ip neigh add " IP6_DST " dev veth_src lladdr %s", - veth_src_fwd_addr); + if (result->dev_mode == MODE_VETH) { + SYS(fail, "ip neigh add " IP4_DST " dev src lladdr %s", + src_fwd_addr); + SYS(fail, "ip neigh add " IP6_DST " dev src lladdr %s", + src_fwd_addr); + } close_netns(nstoken); @@ -201,15 +210,15 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) * needs v4 one in order to start ARP probing. IP4_NET route is added * to the endpoints so that the ARP processing will reply. */ - SYS(fail, "ip addr add " IP4_SLL "/32 dev veth_src_fwd"); - SYS(fail, "ip addr add " IP4_DLL "/32 dev veth_dst_fwd"); - SYS(fail, "ip link set dev veth_src_fwd up"); - SYS(fail, "ip link set dev veth_dst_fwd up"); + SYS(fail, "ip addr add " IP4_SLL "/32 dev src_fwd"); + SYS(fail, "ip addr add " IP4_DLL "/32 dev dst_fwd"); + SYS(fail, "ip link set dev src_fwd up"); + SYS(fail, "ip link set dev dst_fwd up"); - SYS(fail, "ip route add " IP4_SRC "/32 dev veth_src_fwd scope global"); - SYS(fail, "ip route add " IP6_SRC "/128 dev veth_src_fwd scope global"); - SYS(fail, "ip route add " IP4_DST "/32 dev veth_dst_fwd scope global"); - SYS(fail, "ip route add " IP6_DST "/128 dev veth_dst_fwd scope global"); + SYS(fail, "ip route add " IP4_SRC "/32 dev src_fwd scope global"); + SYS(fail, "ip route add " IP6_SRC "/128 dev src_fwd scope global"); + SYS(fail, "ip route add " IP4_DST "/32 dev dst_fwd scope global"); + SYS(fail, "ip route add " IP6_DST "/128 dev dst_fwd scope global"); close_netns(nstoken); @@ -218,16 +227,18 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) if (!ASSERT_OK_PTR(nstoken, "setns dst")) goto fail; - SYS(fail, "ip addr add " IP4_DST "/32 dev veth_dst"); - SYS(fail, "ip addr add " IP6_DST "/128 dev veth_dst nodad"); - SYS(fail, "ip link set dev veth_dst up"); + SYS(fail, "ip addr add " IP4_DST "/32 dev dst"); + SYS(fail, "ip addr add " IP6_DST "/128 dev dst nodad"); + SYS(fail, "ip link set dev dst up"); - SYS(fail, "ip route add " IP4_SRC "/32 dev veth_dst scope global"); - SYS(fail, "ip route add " IP4_NET "/16 dev veth_dst scope global"); - SYS(fail, "ip route add " IP6_SRC "/128 dev veth_dst scope global"); + SYS(fail, "ip route add " IP4_SRC "/32 dev dst scope global"); + SYS(fail, "ip route add " IP4_NET "/16 dev dst scope global"); + SYS(fail, "ip route add " IP6_SRC "/128 dev dst scope global"); - SYS(fail, "ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD); - SYS(fail, "ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD); + if (result->dev_mode == MODE_VETH) { + SYS(fail, "ip neigh add " IP4_SRC " dev dst lladdr " MAC_DST_FWD); + SYS(fail, "ip neigh add " IP6_SRC " dev dst lladdr " MAC_DST_FWD); + } close_netns(nstoken); @@ -293,23 +304,23 @@ static int netns_load_bpf(const struct bpf_program *src_prog, const struct bpf_program *chk_prog, const struct netns_setup_result *setup_result) { - LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src_fwd); - LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd); + LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd); + LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd); int err; - /* tc qdisc add dev veth_src_fwd clsact */ - QDISC_CLSACT_CREATE(&qdisc_veth_src_fwd, setup_result->ifindex_veth_src_fwd); - /* tc filter add dev veth_src_fwd ingress bpf da src_prog */ - XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS, src_prog, 0); - /* tc filter add dev veth_src_fwd egress bpf da chk_prog */ - XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS, chk_prog, 0); + /* tc qdisc add dev src_fwd clsact */ + QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd); + /* tc filter add dev src_fwd ingress bpf da src_prog */ + XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, src_prog, 0); + /* tc filter add dev src_fwd egress bpf da chk_prog */ + XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS, chk_prog, 0); - /* tc qdisc add dev veth_dst_fwd clsact */ - QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd); - /* tc filter add dev veth_dst_fwd ingress bpf da dst_prog */ - XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, dst_prog, 0); - /* tc filter add dev veth_dst_fwd egress bpf da chk_prog */ - XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, chk_prog, 0); + /* tc qdisc add dev dst_fwd clsact */ + QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd); + /* tc filter add dev dst_fwd ingress bpf da dst_prog */ + XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, dst_prog, 0); + /* tc filter add dev dst_fwd egress bpf da chk_prog */ + XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, chk_prog, 0); return 0; fail: @@ -539,10 +550,10 @@ done: static int netns_load_dtime_bpf(struct test_tc_dtime *skel, const struct netns_setup_result *setup_result) { - LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src_fwd); - LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd); - LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src); - LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst); + LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd); + LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd); + LIBBPF_OPTS(bpf_tc_hook, qdisc_src); + LIBBPF_OPTS(bpf_tc_hook, qdisc_dst); struct nstoken *nstoken; int err; @@ -550,58 +561,58 @@ static int netns_load_dtime_bpf(struct test_tc_dtime *skel, nstoken = open_netns(NS_SRC); if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC)) return -1; - /* tc qdisc add dev veth_src clsact */ - QDISC_CLSACT_CREATE(&qdisc_veth_src, setup_result->ifindex_veth_src); - /* tc filter add dev veth_src ingress bpf da ingress_host */ - XGRESS_FILTER_ADD(&qdisc_veth_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0); - /* tc filter add dev veth_src egress bpf da egress_host */ - XGRESS_FILTER_ADD(&qdisc_veth_src, BPF_TC_EGRESS, skel->progs.egress_host, 0); + /* tc qdisc add dev src clsact */ + QDISC_CLSACT_CREATE(&qdisc_src, setup_result->ifindex_src); + /* tc filter add dev src ingress bpf da ingress_host */ + XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0); + /* tc filter add dev src egress bpf da egress_host */ + XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_EGRESS, skel->progs.egress_host, 0); close_netns(nstoken); /* setup ns_dst tc progs */ nstoken = open_netns(NS_DST); if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST)) return -1; - /* tc qdisc add dev veth_dst clsact */ - QDISC_CLSACT_CREATE(&qdisc_veth_dst, setup_result->ifindex_veth_dst); - /* tc filter add dev veth_dst ingress bpf da ingress_host */ - XGRESS_FILTER_ADD(&qdisc_veth_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0); - /* tc filter add dev veth_dst egress bpf da egress_host */ - XGRESS_FILTER_ADD(&qdisc_veth_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0); + /* tc qdisc add dev dst clsact */ + QDISC_CLSACT_CREATE(&qdisc_dst, setup_result->ifindex_dst); + /* tc filter add dev dst ingress bpf da ingress_host */ + XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0); + /* tc filter add dev dst egress bpf da egress_host */ + XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0); close_netns(nstoken); /* setup ns_fwd tc progs */ nstoken = open_netns(NS_FWD); if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD)) return -1; - /* tc qdisc add dev veth_dst_fwd clsact */ - QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd); - /* tc filter add dev veth_dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */ - XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, + /* tc qdisc add dev dst_fwd clsact */ + QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd); + /* tc filter add dev dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */ + XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, skel->progs.ingress_fwdns_prio100, 100); - /* tc filter add dev veth_dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */ - XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, + /* tc filter add dev dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */ + XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, skel->progs.ingress_fwdns_prio101, 101); - /* tc filter add dev veth_dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */ - XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, + /* tc filter add dev dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */ + XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, skel->progs.egress_fwdns_prio100, 100); - /* tc filter add dev veth_dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */ - XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, + /* tc filter add dev dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */ + XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, skel->progs.egress_fwdns_prio101, 101); - /* tc qdisc add dev veth_src_fwd clsact */ - QDISC_CLSACT_CREATE(&qdisc_veth_src_fwd, setup_result->ifindex_veth_src_fwd); - /* tc filter add dev veth_src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */ - XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS, + /* tc qdisc add dev src_fwd clsact */ + QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd); + /* tc filter add dev src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */ + XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, skel->progs.ingress_fwdns_prio100, 100); - /* tc filter add dev veth_src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */ - XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS, + /* tc filter add dev src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */ + XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, skel->progs.ingress_fwdns_prio101, 101); - /* tc filter add dev veth_src_fwd egress prio 100 bpf da egress_fwdns_prio100 */ - XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS, + /* tc filter add dev src_fwd egress prio 100 bpf da egress_fwdns_prio100 */ + XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS, skel->progs.egress_fwdns_prio100, 100); - /* tc filter add dev veth_src_fwd egress prio 101 bpf da egress_fwdns_prio101 */ - XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS, + /* tc filter add dev src_fwd egress prio 101 bpf da egress_fwdns_prio101 */ + XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS, skel->progs.egress_fwdns_prio101, 101); close_netns(nstoken); return 0; @@ -777,8 +788,8 @@ static void test_tc_redirect_dtime(struct netns_setup_result *setup_result) if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open")) return; - skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd; - skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; + skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd; + skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd; err = test_tc_dtime__load(skel); if (!ASSERT_OK(err, "test_tc_dtime__load")) @@ -868,8 +879,8 @@ static void test_tc_redirect_neigh(struct netns_setup_result *setup_result) if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open")) goto done; - skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd; - skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; + skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd; + skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd; err = test_tc_neigh__load(skel); if (!ASSERT_OK(err, "test_tc_neigh__load")) @@ -904,8 +915,8 @@ static void test_tc_redirect_peer(struct netns_setup_result *setup_result) if (!ASSERT_OK_PTR(skel, "test_tc_peer__open")) goto done; - skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd; - skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; + skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd; + skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd; err = test_tc_peer__load(skel); if (!ASSERT_OK(err, "test_tc_peer__load")) @@ -996,7 +1007,7 @@ static int tun_relay_loop(int src_fd, int target_fd) static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) { LIBBPF_OPTS(bpf_tc_hook, qdisc_tun_fwd); - LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd); + LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd); struct test_tc_peer *skel = NULL; struct nstoken *nstoken = NULL; int err; @@ -1045,7 +1056,7 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) goto fail; skel->rodata->IFINDEX_SRC = ifindex; - skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; + skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd; err = test_tc_peer__load(skel); if (!ASSERT_OK(err, "test_tc_peer__load")) @@ -1053,19 +1064,19 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) /* Load "tc_src_l3" to the tun_fwd interface to redirect packets * towards dst, and "tc_dst" to redirect packets - * and "tc_chk" on veth_dst_fwd to drop non-redirected packets. + * and "tc_chk" on dst_fwd to drop non-redirected packets. */ /* tc qdisc add dev tun_fwd clsact */ QDISC_CLSACT_CREATE(&qdisc_tun_fwd, ifindex); /* tc filter add dev tun_fwd ingress bpf da tc_src_l3 */ XGRESS_FILTER_ADD(&qdisc_tun_fwd, BPF_TC_INGRESS, skel->progs.tc_src_l3, 0); - /* tc qdisc add dev veth_dst_fwd clsact */ - QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd); - /* tc filter add dev veth_dst_fwd ingress bpf da tc_dst_l3 */ - XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0); - /* tc filter add dev veth_dst_fwd egress bpf da tc_chk */ - XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0); + /* tc qdisc add dev dst_fwd clsact */ + QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd); + /* tc filter add dev dst_fwd ingress bpf da tc_dst_l3 */ + XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0); + /* tc filter add dev dst_fwd egress bpf da tc_chk */ + XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0); /* Setup route and neigh tables */ SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24"); @@ -1074,17 +1085,17 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad"); SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad"); - SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global"); + SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev src scope global"); SYS(fail, "ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD " dev tun_src scope global"); - SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global"); - SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global"); + SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev dst scope global"); + SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev src scope global"); SYS(fail, "ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD " dev tun_src scope global"); - SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global"); + SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev dst scope global"); - SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD); - SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD); + SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev dst lladdr " MAC_DST_FWD); + SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev dst lladdr " MAC_DST_FWD); if (!ASSERT_OK(set_forwarding(false), "disable forwarding")) goto fail; @@ -1106,9 +1117,9 @@ fail: close_netns(nstoken); } -#define RUN_TEST(name) \ +#define RUN_TEST(name, mode) \ ({ \ - struct netns_setup_result setup_result; \ + struct netns_setup_result setup_result = { .dev_mode = mode, }; \ if (test__start_subtest(#name)) \ if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \ if (ASSERT_OK(netns_setup_links_and_routes(&setup_result), \ @@ -1122,11 +1133,11 @@ static void *test_tc_redirect_run_tests(void *arg) { netns_setup_namespaces_nofail("delete"); - RUN_TEST(tc_redirect_peer); - RUN_TEST(tc_redirect_peer_l3); - RUN_TEST(tc_redirect_neigh); - RUN_TEST(tc_redirect_neigh_fib); - RUN_TEST(tc_redirect_dtime); + RUN_TEST(tc_redirect_peer, MODE_VETH); + RUN_TEST(tc_redirect_peer_l3, MODE_VETH); + RUN_TEST(tc_redirect_neigh, MODE_VETH); + RUN_TEST(tc_redirect_neigh_fib, MODE_VETH); + RUN_TEST(tc_redirect_dtime, MODE_VETH); return NULL; } From adfeae2d243d9e5b83d094af481d189156b11779 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 14 Nov 2023 01:42:20 +0100 Subject: [PATCH 095/234] selftests/bpf: Add netkit to tc_redirect selftest Extend the existing tc_redirect selftest to also cover netkit devices for exercising the bpf_redirect_peer() code paths, so that we have both veth as well as netkit covered, all tests still pass after this change. Signed-off-by: Daniel Borkmann Acked-by: Stanislav Fomichev Reviewed-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20231114004220.6495-9-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau --- .../selftests/bpf/prog_tests/tc_redirect.c | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index 407ff4e9bc78..518f143c5b0f 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -24,6 +24,7 @@ #include "test_progs.h" #include "network_helpers.h" +#include "netlink_helpers.h" #include "test_tc_neigh_fib.skel.h" #include "test_tc_neigh.skel.h" #include "test_tc_peer.skel.h" @@ -112,6 +113,7 @@ static void netns_setup_namespaces_nofail(const char *verb) enum dev_mode { MODE_VETH, + MODE_NETKIT, }; struct netns_setup_result { @@ -142,10 +144,51 @@ static int get_ifaddr(const char *name, char *ifaddr) return 0; } +static int create_netkit(int mode, char *prim, char *peer) +{ + struct rtattr *linkinfo, *data, *peer_info; + struct rtnl_handle rth = { .fd = -1 }; + const char *type = "netkit"; + struct { + struct nlmsghdr n; + struct ifinfomsg i; + char buf[1024]; + } req = {}; + int err; + + err = rtnl_open(&rth, 0); + if (!ASSERT_OK(err, "open_rtnetlink")) + return err; + + memset(&req, 0, sizeof(req)); + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; + req.n.nlmsg_type = RTM_NEWLINK; + req.i.ifi_family = AF_UNSPEC; + + addattr_l(&req.n, sizeof(req), IFLA_IFNAME, prim, strlen(prim)); + linkinfo = addattr_nest(&req.n, sizeof(req), IFLA_LINKINFO); + addattr_l(&req.n, sizeof(req), IFLA_INFO_KIND, type, strlen(type)); + data = addattr_nest(&req.n, sizeof(req), IFLA_INFO_DATA); + addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode); + peer_info = addattr_nest(&req.n, sizeof(req), IFLA_NETKIT_PEER_INFO); + req.n.nlmsg_len += sizeof(struct ifinfomsg); + addattr_l(&req.n, sizeof(req), IFLA_IFNAME, peer, strlen(peer)); + addattr_nest_end(&req.n, peer_info); + addattr_nest_end(&req.n, data); + addattr_nest_end(&req.n, linkinfo); + + err = rtnl_talk(&rth, &req.n, NULL); + ASSERT_OK(err, "talk_rtnetlink"); + rtnl_close(&rth); + return err; +} + static int netns_setup_links_and_routes(struct netns_setup_result *result) { struct nstoken *nstoken = NULL; char src_fwd_addr[IFADDR_STR_LEN+1] = {}; + int err; if (result->dev_mode == MODE_VETH) { SYS(fail, "ip link add src type veth peer name src_fwd"); @@ -153,6 +196,13 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) SYS(fail, "ip link set dst_fwd address " MAC_DST_FWD); SYS(fail, "ip link set dst address " MAC_DST); + } else if (result->dev_mode == MODE_NETKIT) { + err = create_netkit(NETKIT_L3, "src", "src_fwd"); + if (!ASSERT_OK(err, "create_ifindex_src")) + goto fail; + err = create_netkit(NETKIT_L3, "dst", "dst_fwd"); + if (!ASSERT_OK(err, "create_ifindex_dst")) + goto fail; } if (get_ifaddr("src_fwd", src_fwd_addr)) @@ -1134,7 +1184,9 @@ static void *test_tc_redirect_run_tests(void *arg) netns_setup_namespaces_nofail("delete"); RUN_TEST(tc_redirect_peer, MODE_VETH); + RUN_TEST(tc_redirect_peer, MODE_NETKIT); RUN_TEST(tc_redirect_peer_l3, MODE_VETH); + RUN_TEST(tc_redirect_peer_l3, MODE_NETKIT); RUN_TEST(tc_redirect_neigh, MODE_VETH); RUN_TEST(tc_redirect_neigh_fib, MODE_VETH); RUN_TEST(tc_redirect_dtime, MODE_VETH); From 5029c5e4f20d8d6b41cefbde4b3eeadaec4662c6 Mon Sep 17 00:00:00 2001 From: Muhammad Muzammil Date: Wed, 25 Oct 2023 15:24:36 +0200 Subject: [PATCH 096/234] s390/dasd: resolve spelling mistake resolve typing mistake from pimary to primary Signed-off-by: Muhammad Muzammil Link: https://lore.kernel.org/r/20231010043140.28416-1-m.muzzammilashraf@gmail.com Signed-off-by: Stefan Haberland Link: https://lore.kernel.org/r/20231025132437.1223363-2-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_int.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 2e663131adaf..1b1b8a41c4d4 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -283,7 +283,7 @@ struct dasd_pprc_dev_info { __u8 secondary; /* 7 Secondary device address */ __u16 pprc_id; /* 8-9 Peer-to-Peer Remote Copy ID */ __u8 reserved2[12]; /* 10-21 reserved */ - __u16 prim_cu_ssid; /* 22-23 Pimary Control Unit SSID */ + __u16 prim_cu_ssid; /* 22-23 Primary Control Unit SSID */ __u8 reserved3[12]; /* 24-35 reserved */ __u16 sec_cu_ssid; /* 36-37 Secondary Control Unit SSID */ __u8 reserved4[90]; /* 38-127 reserved */ From db46cd1e0426f52999d50fa72cfa97fa39952885 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=C3=B6ppner?= Date: Wed, 25 Oct 2023 15:24:37 +0200 Subject: [PATCH 097/234] s390/dasd: protect device queue against concurrent access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In dasd_profile_start() the amount of requests on the device queue are counted. The access to the device queue is unprotected against concurrent access. With a lot of parallel I/O, especially with alias devices enabled, the device queue can change while dasd_profile_start() is accessing the queue. In the worst case this leads to a kernel panic due to incorrect pointer accesses. Fix this by taking the device lock before accessing the queue and counting the requests. Additionally the check for a valid profile data pointer can be done earlier to avoid unnecessary locking in a hot path. Cc: Fixes: 4fa52aa7a82f ("[S390] dasd: add enhanced DASD statistics interface") Reviewed-by: Stefan Haberland Signed-off-by: Jan Höppner Signed-off-by: Stefan Haberland Link: https://lore.kernel.org/r/20231025132437.1223363-3-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index d440319a7945..833cfab7d877 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -676,18 +676,20 @@ static void dasd_profile_start(struct dasd_block *block, * we count each request only once. */ device = cqr->startdev; - if (device->profile.data) { - counter = 1; /* request is not yet queued on the start device */ - list_for_each(l, &device->ccw_queue) - if (++counter >= 31) - break; - } + if (!device->profile.data) + return; + + spin_lock(get_ccwdev_lock(device->cdev)); + counter = 1; /* request is not yet queued on the start device */ + list_for_each(l, &device->ccw_queue) + if (++counter >= 31) + break; + spin_unlock(get_ccwdev_lock(device->cdev)); + spin_lock(&device->profile.lock); - if (device->profile.data) { - device->profile.data->dasd_io_nr_req[counter]++; - if (rq_data_dir(req) == READ) - device->profile.data->dasd_read_nr_req[counter]++; - } + device->profile.data->dasd_io_nr_req[counter]++; + if (rq_data_dir(req) == READ) + device->profile.data->dasd_read_nr_req[counter]++; spin_unlock(&device->profile.lock); } From d6fef34ee4d102be448146f24caf96d7b4a05401 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 20 Nov 2023 14:18:31 -0800 Subject: [PATCH 098/234] io_uring: fix off-by one bvec index If the offset equals the bv_len of the first registered bvec, then the request does not include any of that first bvec. Skip it so that drivers don't have to deal with a zero length bvec, which was observed to break NVMe's PRP list creation. Cc: stable@vger.kernel.org Fixes: bd11b3a391e3 ("io_uring: don't use iov_iter_advance() for fixed buffers") Signed-off-by: Keith Busch Link: https://lore.kernel.org/r/20231120221831.2646460-1-kbusch@meta.com Signed-off-by: Jens Axboe --- io_uring/rsrc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 7034be555334..f521c5965a93 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -1258,7 +1258,7 @@ int io_import_fixed(int ddir, struct iov_iter *iter, */ const struct bio_vec *bvec = imu->bvec; - if (offset <= bvec->bv_len) { + if (offset < bvec->bv_len) { /* * Note, huge pages buffers consists of one large * bvec entry and should always go this way. The other From 88903daecacf03b1e5636e1b5f18bda5b07030fc Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Mon, 20 Nov 2023 18:51:06 -0500 Subject: [PATCH 099/234] eventfs: Remove expectation that ei->is_freed means ei->dentry == NULL The logic to free the eventfs_inode (ei) use to set is_freed and clear the "dentry" field under the eventfs_mutex. But that changed when a race was found where the ei->dentry needed to be cleared when the last dput() was called on it. But there was still logic that checked if ei->dentry was not NULL and is_freed is set, and would warn if it was. But since that situation was changed and the ei->dentry isn't cleared until the last dput() is called on it while the ei->is_freed is set, do not test for that condition anymore, and change the comments to reflect that. Link: https://lkml.kernel.org/r/20231120235154.265826243@goodmis.org Cc: Masami Hiramatsu Cc: Andrew Morton Fixes: 020010fbfa20 ("eventfs: Delete eventfs_inode when the last dentry is freed") Reported-by: Mark Rutland Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index f8a594a50ae6..f239b2b507a4 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -27,16 +27,16 @@ /* * eventfs_mutex protects the eventfs_inode (ei) dentry. Any access * to the ei->dentry must be done under this mutex and after checking - * if ei->is_freed is not set. The ei->dentry is released under the - * mutex at the same time ei->is_freed is set. If ei->is_freed is set - * then the ei->dentry is invalid. + * if ei->is_freed is not set. When ei->is_freed is set, the dentry + * is on its way to being freed after the last dput() is made on it. */ static DEFINE_MUTEX(eventfs_mutex); /* * The eventfs_inode (ei) itself is protected by SRCU. It is released from * its parent's list and will have is_freed set (under eventfs_mutex). - * After the SRCU grace period is over, the ei may be freed. + * After the SRCU grace period is over and the last dput() is called + * the ei is freed. */ DEFINE_STATIC_SRCU(eventfs_srcu); @@ -365,12 +365,14 @@ create_file_dentry(struct eventfs_inode *ei, int idx, * created the dentry for this e_dentry. In which case * use that one. * - * Note, with the mutex held, the e_dentry cannot have content - * and the ei->is_freed be true at the same time. + * If ei->is_freed is set, the e_dentry is currently on its + * way to being freed, don't return it. If e_dentry is NULL + * it means it was already freed. */ - dentry = *e_dentry; - if (WARN_ON_ONCE(dentry && ei->is_freed)) + if (ei->is_freed) dentry = NULL; + else + dentry = *e_dentry; /* The lookup does not need to up the dentry refcount */ if (dentry && !lookup) dget(dentry); @@ -473,8 +475,8 @@ create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei, * created the dentry for this e_dentry. In which case * use that one. * - * Note, with the mutex held, the e_dentry cannot have content - * and the ei->is_freed be true at the same time. + * If ei->is_freed is set, the e_dentry is currently on its + * way to being freed. */ dentry = ei->dentry; if (dentry && !lookup) From 71cade82f2b553a74d046c015c986f2df165696f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Mon, 20 Nov 2023 18:51:07 -0500 Subject: [PATCH 100/234] eventfs: Do not invalidate dentry in create_file/dir_dentry() With the call to simple_recursive_removal() on the entire eventfs sub system when the directory is removed, it performs the d_invalidate on all the dentries when it is removed. There's no need to do clean ups when a dentry is being created while the directory is being deleted. As dentries are cleaned up by the simpler_recursive_removal(), trying to do d_invalidate() in these functions will cause the dentry to be invalidated twice, and crash the kernel. Link: https://lore.kernel.org/all/20231116123016.140576-1-naresh.kamboju@linaro.org/ Link: https://lkml.kernel.org/r/20231120235154.422970988@goodmis.org Cc: Masami Hiramatsu Cc: Andrew Morton Fixes: 407c6726ca71 ("eventfs: Use simple_recursive_removal() to clean up dentries") Reported-by: Mark Rutland Reported-by: Naresh Kamboju Reported-by: Linux Kernel Functional Testing Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index f239b2b507a4..3eb6c622a74d 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -326,7 +326,6 @@ create_file_dentry(struct eventfs_inode *ei, int idx, struct eventfs_attr *attr = NULL; struct dentry **e_dentry = &ei->d_children[idx]; struct dentry *dentry; - bool invalidate = false; mutex_lock(&eventfs_mutex); if (ei->is_freed) { @@ -389,17 +388,14 @@ create_file_dentry(struct eventfs_inode *ei, int idx, * Otherwise it means two dentries exist with the same name. */ WARN_ON_ONCE(!ei->is_freed); - invalidate = true; + dentry = NULL; } mutex_unlock(&eventfs_mutex); - if (invalidate) - d_invalidate(dentry); - - if (lookup || invalidate) + if (lookup) dput(dentry); - return invalidate ? NULL : dentry; + return dentry; } /** @@ -439,7 +435,6 @@ static struct dentry * create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei, struct dentry *parent, bool lookup) { - bool invalidate = false; struct dentry *dentry = NULL; mutex_lock(&eventfs_mutex); @@ -495,16 +490,14 @@ create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei, * Otherwise it means two dentries exist with the same name. */ WARN_ON_ONCE(!ei->is_freed); - invalidate = true; + dentry = NULL; } mutex_unlock(&eventfs_mutex); - if (invalidate) - d_invalidate(dentry); - if (lookup || invalidate) + if (lookup) dput(dentry); - return invalidate ? NULL : dentry; + return dentry; } /** From 977bc146d4eb7070118d8a974919b33bb52732b4 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 21 Nov 2023 04:06:51 +0200 Subject: [PATCH 101/234] selftests/bpf: track tcp payload offset as scalar in xdp_synproxy This change prepares syncookie_{tc,xdp} for update in callbakcs verification logic. To allow bpf_loop() verification converge when multiple callback itreations are considered: - track offset inside TCP payload explicitly, not as a part of the pointer; - make sure that offset does not exceed MAX_PACKET_OFF enforced by verifier; - make sure that offset is tracked as unbound scalar between iterations, otherwise verifier won't be able infer that bpf_loop callback reaches identical states. Acked-by: Andrii Nakryiko Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20231121020701.26440-2-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/xdp_synproxy_kern.c | 84 ++++++++++++------- 1 file changed, 52 insertions(+), 32 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c index e959336c7a73..80f620602d50 100644 --- a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c +++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c @@ -53,6 +53,8 @@ #define DEFAULT_TTL 64 #define MAX_ALLOWED_PORTS 8 +#define MAX_PACKET_OFF 0xffff + #define swap(a, b) \ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) @@ -183,63 +185,76 @@ static __always_inline __u32 tcp_clock_ms(void) } struct tcpopt_context { - __u8 *ptr; - __u8 *end; + void *data; void *data_end; __be32 *tsecr; __u8 wscale; bool option_timestamp; bool option_sack; + __u32 off; }; +static __always_inline u8 *next(struct tcpopt_context *ctx, __u32 sz) +{ + __u64 off = ctx->off; + __u8 *data; + + /* Verifier forbids access to packet when offset exceeds MAX_PACKET_OFF */ + if (off > MAX_PACKET_OFF - sz) + return NULL; + + data = ctx->data + off; + barrier_var(data); + if (data + sz >= ctx->data_end) + return NULL; + + ctx->off += sz; + return data; +} + static int tscookie_tcpopt_parse(struct tcpopt_context *ctx) { - __u8 opcode, opsize; + __u8 *opcode, *opsize, *wscale, *tsecr; + __u32 off = ctx->off; - if (ctx->ptr >= ctx->end) - return 1; - if (ctx->ptr >= ctx->data_end) + opcode = next(ctx, 1); + if (!opcode) return 1; - opcode = ctx->ptr[0]; - - if (opcode == TCPOPT_EOL) + if (*opcode == TCPOPT_EOL) return 1; - if (opcode == TCPOPT_NOP) { - ++ctx->ptr; + if (*opcode == TCPOPT_NOP) return 0; - } - if (ctx->ptr + 1 >= ctx->end) - return 1; - if (ctx->ptr + 1 >= ctx->data_end) - return 1; - opsize = ctx->ptr[1]; - if (opsize < 2) + opsize = next(ctx, 1); + if (!opsize || *opsize < 2) return 1; - if (ctx->ptr + opsize > ctx->end) - return 1; - - switch (opcode) { + switch (*opcode) { case TCPOPT_WINDOW: - if (opsize == TCPOLEN_WINDOW && ctx->ptr + TCPOLEN_WINDOW <= ctx->data_end) - ctx->wscale = ctx->ptr[2] < TCP_MAX_WSCALE ? ctx->ptr[2] : TCP_MAX_WSCALE; + wscale = next(ctx, 1); + if (!wscale) + return 1; + if (*opsize == TCPOLEN_WINDOW) + ctx->wscale = *wscale < TCP_MAX_WSCALE ? *wscale : TCP_MAX_WSCALE; break; case TCPOPT_TIMESTAMP: - if (opsize == TCPOLEN_TIMESTAMP && ctx->ptr + TCPOLEN_TIMESTAMP <= ctx->data_end) { + tsecr = next(ctx, 4); + if (!tsecr) + return 1; + if (*opsize == TCPOLEN_TIMESTAMP) { ctx->option_timestamp = true; /* Client's tsval becomes our tsecr. */ - *ctx->tsecr = get_unaligned((__be32 *)(ctx->ptr + 2)); + *ctx->tsecr = get_unaligned((__be32 *)tsecr); } break; case TCPOPT_SACK_PERM: - if (opsize == TCPOLEN_SACK_PERM) + if (*opsize == TCPOLEN_SACK_PERM) ctx->option_sack = true; break; } - ctx->ptr += opsize; + ctx->off = off + *opsize; return 0; } @@ -256,16 +271,21 @@ static int tscookie_tcpopt_parse_batch(__u32 index, void *context) static __always_inline bool tscookie_init(struct tcphdr *tcp_header, __u16 tcp_len, __be32 *tsval, - __be32 *tsecr, void *data_end) + __be32 *tsecr, void *data, void *data_end) { struct tcpopt_context loop_ctx = { - .ptr = (__u8 *)(tcp_header + 1), - .end = (__u8 *)tcp_header + tcp_len, + .data = data, .data_end = data_end, .tsecr = tsecr, .wscale = TS_OPT_WSCALE_MASK, .option_timestamp = false, .option_sack = false, + /* Note: currently verifier would track .off as unbound scalar. + * In case if verifier would at some point get smarter and + * compute bounded value for this var, beware that it might + * hinder bpf_loop() convergence validation. + */ + .off = (__u8 *)(tcp_header + 1) - (__u8 *)data, }; u32 cookie; @@ -635,7 +655,7 @@ static __always_inline int syncookie_handle_syn(struct header_pointers *hdr, cookie = (__u32)value; if (tscookie_init((void *)hdr->tcp, hdr->tcp_len, - &tsopt_buf[0], &tsopt_buf[1], data_end)) + &tsopt_buf[0], &tsopt_buf[1], data, data_end)) tsopt = tsopt_buf; /* Check that there is enough space for a SYNACK. It also covers From 87eb0152bcc102ecbda866978f4e54db5a3be1ef Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 21 Nov 2023 04:06:52 +0200 Subject: [PATCH 102/234] selftests/bpf: track string payload offset as scalar in strobemeta This change prepares strobemeta for update in callbacks verification logic. To allow bpf_loop() verification converge when multiple callback iterations are considered: - track offset inside strobemeta_payload->payload directly as scalar value; - at each iteration make sure that remaining strobemeta_payload->payload capacity is sufficient for execution of read_{map,str}_var functions; - make sure that offset is tracked as unbound scalar between iterations, otherwise verifier won't be able infer that bpf_loop callback reaches identical states. Acked-by: Andrii Nakryiko Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20231121020701.26440-3-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/progs/strobemeta.h | 78 ++++++++++++------- 1 file changed, 48 insertions(+), 30 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h index e02cfd380746..40df2cc26eaf 100644 --- a/tools/testing/selftests/bpf/progs/strobemeta.h +++ b/tools/testing/selftests/bpf/progs/strobemeta.h @@ -24,9 +24,11 @@ struct task_struct {}; #define STACK_TABLE_EPOCH_SHIFT 20 #define STROBE_MAX_STR_LEN 1 #define STROBE_MAX_CFGS 32 +#define READ_MAP_VAR_PAYLOAD_CAP \ + ((1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN) #define STROBE_MAX_PAYLOAD \ (STROBE_MAX_STRS * STROBE_MAX_STR_LEN + \ - STROBE_MAX_MAPS * (1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN) + STROBE_MAX_MAPS * READ_MAP_VAR_PAYLOAD_CAP) struct strobe_value_header { /* @@ -355,7 +357,7 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg, size_t idx, void *tls_base, struct strobe_value_generic *value, struct strobemeta_payload *data, - void *payload) + size_t off) { void *location; uint64_t len; @@ -366,7 +368,7 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg, return 0; bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location); - len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, value->ptr); + len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, value->ptr); /* * if bpf_probe_read_user_str returns error (<0), due to casting to * unsinged int, it will become big number, so next check is @@ -378,14 +380,14 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg, return 0; data->str_lens[idx] = len; - return len; + return off + len; } -static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, - size_t idx, void *tls_base, - struct strobe_value_generic *value, - struct strobemeta_payload *data, - void *payload) +static __always_inline uint64_t read_map_var(struct strobemeta_cfg *cfg, + size_t idx, void *tls_base, + struct strobe_value_generic *value, + struct strobemeta_payload *data, + size_t off) { struct strobe_map_descr* descr = &data->map_descrs[idx]; struct strobe_map_raw map; @@ -397,11 +399,11 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, location = calc_location(&cfg->map_locs[idx], tls_base); if (!location) - return payload; + return off; bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location); if (bpf_probe_read_user(&map, sizeof(struct strobe_map_raw), value->ptr)) - return payload; + return off; descr->id = map.id; descr->cnt = map.cnt; @@ -410,10 +412,10 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, data->req_meta_valid = 1; } - len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, map.tag); + len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, map.tag); if (len <= STROBE_MAX_STR_LEN) { descr->tag_len = len; - payload += len; + off += len; } #ifdef NO_UNROLL @@ -426,22 +428,22 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, break; descr->key_lens[i] = 0; - len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, + len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, map.entries[i].key); if (len <= STROBE_MAX_STR_LEN) { descr->key_lens[i] = len; - payload += len; + off += len; } descr->val_lens[i] = 0; - len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, + len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, map.entries[i].val); if (len <= STROBE_MAX_STR_LEN) { descr->val_lens[i] = len; - payload += len; + off += len; } } - return payload; + return off; } #ifdef USE_BPF_LOOP @@ -455,14 +457,20 @@ struct read_var_ctx { struct strobemeta_payload *data; void *tls_base; struct strobemeta_cfg *cfg; - void *payload; + size_t payload_off; /* value gets mutated */ struct strobe_value_generic *value; enum read_type type; }; -static int read_var_callback(__u32 index, struct read_var_ctx *ctx) +static int read_var_callback(__u64 index, struct read_var_ctx *ctx) { + /* lose precision info for ctx->payload_off, verifier won't track + * double xor, barrier_var() is needed to force clang keep both xors. + */ + ctx->payload_off ^= index; + barrier_var(ctx->payload_off); + ctx->payload_off ^= index; switch (ctx->type) { case READ_INT_VAR: if (index >= STROBE_MAX_INTS) @@ -472,14 +480,18 @@ static int read_var_callback(__u32 index, struct read_var_ctx *ctx) case READ_MAP_VAR: if (index >= STROBE_MAX_MAPS) return 1; - ctx->payload = read_map_var(ctx->cfg, index, ctx->tls_base, - ctx->value, ctx->data, ctx->payload); + if (ctx->payload_off > sizeof(ctx->data->payload) - READ_MAP_VAR_PAYLOAD_CAP) + return 1; + ctx->payload_off = read_map_var(ctx->cfg, index, ctx->tls_base, + ctx->value, ctx->data, ctx->payload_off); break; case READ_STR_VAR: if (index >= STROBE_MAX_STRS) return 1; - ctx->payload += read_str_var(ctx->cfg, index, ctx->tls_base, - ctx->value, ctx->data, ctx->payload); + if (ctx->payload_off > sizeof(ctx->data->payload) - STROBE_MAX_STR_LEN) + return 1; + ctx->payload_off = read_str_var(ctx->cfg, index, ctx->tls_base, + ctx->value, ctx->data, ctx->payload_off); break; } return 0; @@ -501,7 +513,8 @@ static void *read_strobe_meta(struct task_struct *task, pid_t pid = bpf_get_current_pid_tgid() >> 32; struct strobe_value_generic value = {0}; struct strobemeta_cfg *cfg; - void *tls_base, *payload; + size_t payload_off; + void *tls_base; cfg = bpf_map_lookup_elem(&strobemeta_cfgs, &pid); if (!cfg) @@ -509,7 +522,7 @@ static void *read_strobe_meta(struct task_struct *task, data->int_vals_set_mask = 0; data->req_meta_valid = 0; - payload = data->payload; + payload_off = 0; /* * we don't have struct task_struct definition, it should be: * tls_base = (void *)task->thread.fsbase; @@ -522,7 +535,7 @@ static void *read_strobe_meta(struct task_struct *task, .tls_base = tls_base, .value = &value, .data = data, - .payload = payload, + .payload_off = 0, }; int err; @@ -540,6 +553,11 @@ static void *read_strobe_meta(struct task_struct *task, err = bpf_loop(STROBE_MAX_MAPS, read_var_callback, &ctx, 0); if (err != STROBE_MAX_MAPS) return NULL; + + payload_off = ctx.payload_off; + /* this should not really happen, here only to satisfy verifer */ + if (payload_off > sizeof(data->payload)) + payload_off = sizeof(data->payload); #else #ifdef NO_UNROLL #pragma clang loop unroll(disable) @@ -555,7 +573,7 @@ static void *read_strobe_meta(struct task_struct *task, #pragma unroll #endif /* NO_UNROLL */ for (int i = 0; i < STROBE_MAX_STRS; ++i) { - payload += read_str_var(cfg, i, tls_base, &value, data, payload); + payload_off = read_str_var(cfg, i, tls_base, &value, data, payload_off); } #ifdef NO_UNROLL #pragma clang loop unroll(disable) @@ -563,7 +581,7 @@ static void *read_strobe_meta(struct task_struct *task, #pragma unroll #endif /* NO_UNROLL */ for (int i = 0; i < STROBE_MAX_MAPS; ++i) { - payload = read_map_var(cfg, i, tls_base, &value, data, payload); + payload_off = read_map_var(cfg, i, tls_base, &value, data, payload_off); } #endif /* USE_BPF_LOOP */ @@ -571,7 +589,7 @@ static void *read_strobe_meta(struct task_struct *task, * return pointer right after end of payload, so it's possible to * calculate exact amount of useful data that needs to be sent */ - return payload; + return &data->payload[payload_off]; } SEC("raw_tracepoint/kfree_skb") From f40bfd1679446b22d321e64a1fa98b7d07d2be08 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 21 Nov 2023 04:06:53 +0200 Subject: [PATCH 103/234] selftests/bpf: fix bpf_loop_bench for new callback verification scheme MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a preparatory change. A follow-up patch "bpf: verify callbacks as if they are called unknown number of times" changes logic for callbacks handling. While previously callbacks were verified as a single function call, new scheme takes into account that callbacks could be executed unknown number of times. This has dire implications for bpf_loop_bench: SEC("fentry/" SYS_PREFIX "sys_getpgid") int benchmark(void *ctx) { for (int i = 0; i < 1000; i++) { bpf_loop(nr_loops, empty_callback, NULL, 0); __sync_add_and_fetch(&hits, nr_loops); } return 0; } W/o callbacks change verifier sees it as a 1000 calls to empty_callback(). However, with callbacks change things become exponential: - i=0: state exploring empty_callback is scheduled with i=0 (a); - i=1: state exploring empty_callback is scheduled with i=1; ... - i=999: state exploring empty_callback is scheduled with i=999; - state (a) is popped from stack; - i=1: state exploring empty_callback is scheduled with i=1; ... Avoid this issue by rewriting outer loop as bpf_loop(). Unfortunately, this adds a function call to a loop at runtime, which negatively affects performance: throughput latency before: 149.919 ± 0.168 M ops/s, 6.670 ns/op after : 137.040 ± 0.187 M ops/s, 7.297 ns/op Acked-by: Andrii Nakryiko Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20231121020701.26440-4-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/bpf_loop_bench.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/bpf_loop_bench.c b/tools/testing/selftests/bpf/progs/bpf_loop_bench.c index 4ce76eb064c4..d461746fd3c1 100644 --- a/tools/testing/selftests/bpf/progs/bpf_loop_bench.c +++ b/tools/testing/selftests/bpf/progs/bpf_loop_bench.c @@ -15,13 +15,16 @@ static int empty_callback(__u32 index, void *data) return 0; } +static int outer_loop(__u32 index, void *data) +{ + bpf_loop(nr_loops, empty_callback, NULL, 0); + __sync_add_and_fetch(&hits, nr_loops); + return 0; +} + SEC("fentry/" SYS_PREFIX "sys_getpgid") int benchmark(void *ctx) { - for (int i = 0; i < 1000; i++) { - bpf_loop(nr_loops, empty_callback, NULL, 0); - - __sync_add_and_fetch(&hits, nr_loops); - } + bpf_loop(1000, outer_loop, NULL, 0); return 0; } From 683b96f9606ab7308ffb23c46ab43cecdef8a241 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 21 Nov 2023 04:06:54 +0200 Subject: [PATCH 104/234] bpf: extract __check_reg_arg() utility function Split check_reg_arg() into two utility functions: - check_reg_arg() operating on registers from current verifier state; - __check_reg_arg() operating on a specific set of registers passed as a parameter; The __check_reg_arg() function would be used by a follow-up change for callbacks handling. Acked-by: Andrii Nakryiko Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20231121020701.26440-5-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6da370a047fe..e6e1bcfe00f5 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3439,13 +3439,11 @@ static void mark_insn_zext(struct bpf_verifier_env *env, reg->subreg_def = DEF_NOT_SUBREG; } -static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, - enum reg_arg_type t) +static int __check_reg_arg(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno, + enum reg_arg_type t) { - struct bpf_verifier_state *vstate = env->cur_state; - struct bpf_func_state *state = vstate->frame[vstate->curframe]; struct bpf_insn *insn = env->prog->insnsi + env->insn_idx; - struct bpf_reg_state *reg, *regs = state->regs; + struct bpf_reg_state *reg; bool rw64; if (regno >= MAX_BPF_REG) { @@ -3486,6 +3484,15 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, return 0; } +static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, + enum reg_arg_type t) +{ + struct bpf_verifier_state *vstate = env->cur_state; + struct bpf_func_state *state = vstate->frame[vstate->curframe]; + + return __check_reg_arg(env, state->regs, regno, t); +} + static void mark_jmp_point(struct bpf_verifier_env *env, int idx) { env->insn_aux_data[idx].jmp_point = true; @@ -9350,7 +9357,7 @@ static void clear_caller_saved_regs(struct bpf_verifier_env *env, /* after the call registers r0 - r5 were scratched */ for (i = 0; i < CALLER_SAVED_REGS; i++) { mark_reg_not_init(env, regs, caller_saved[i]); - check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK); + __check_reg_arg(env, regs, caller_saved[i], DST_OP_NO_MARK); } } From 58124a98cb8eda69d248d7f1de954c8b2767c945 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 21 Nov 2023 04:06:55 +0200 Subject: [PATCH 105/234] bpf: extract setup_func_entry() utility function Move code for simulated stack frame creation to a separate utility function. This function would be used in the follow-up change for callbacks handling. Acked-by: Andrii Nakryiko Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20231121020701.26440-6-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 84 ++++++++++++++++++++++++------------------- 1 file changed, 48 insertions(+), 36 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e6e1bcfe00f5..68ee4803d3a2 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -9370,11 +9370,10 @@ static int set_callee_state(struct bpf_verifier_env *env, struct bpf_func_state *caller, struct bpf_func_state *callee, int insn_idx); -static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, - int *insn_idx, int subprog, - set_callee_state_fn set_callee_state_cb) +static int setup_func_entry(struct bpf_verifier_env *env, int subprog, int callsite, + set_callee_state_fn set_callee_state_cb, + struct bpf_verifier_state *state) { - struct bpf_verifier_state *state = env->cur_state; struct bpf_func_state *caller, *callee; int err; @@ -9384,13 +9383,53 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn return -E2BIG; } - caller = state->frame[state->curframe]; if (state->frame[state->curframe + 1]) { verbose(env, "verifier bug. Frame %d already allocated\n", state->curframe + 1); return -EFAULT; } + caller = state->frame[state->curframe]; + callee = kzalloc(sizeof(*callee), GFP_KERNEL); + if (!callee) + return -ENOMEM; + state->frame[state->curframe + 1] = callee; + + /* callee cannot access r0, r6 - r9 for reading and has to write + * into its own stack before reading from it. + * callee can read/write into caller's stack + */ + init_func_state(env, callee, + /* remember the callsite, it will be used by bpf_exit */ + callsite, + state->curframe + 1 /* frameno within this callchain */, + subprog /* subprog number within this prog */); + /* Transfer references to the callee */ + err = copy_reference_state(callee, caller); + err = err ?: set_callee_state_cb(env, caller, callee, callsite); + if (err) + goto err_out; + + /* only increment it after check_reg_arg() finished */ + state->curframe++; + + return 0; + +err_out: + free_func_state(callee); + state->frame[state->curframe + 1] = NULL; + return err; +} + +static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, + int *insn_idx, int subprog, + set_callee_state_fn set_callee_state_cb) +{ + struct bpf_verifier_state *state = env->cur_state; + struct bpf_func_state *caller, *callee; + int err; + + caller = state->frame[state->curframe]; err = btf_check_subprog_call(env, subprog, caller->regs); if (err == -EFAULT) return err; @@ -9460,35 +9499,12 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn return 0; } - callee = kzalloc(sizeof(*callee), GFP_KERNEL); - if (!callee) - return -ENOMEM; - state->frame[state->curframe + 1] = callee; - - /* callee cannot access r0, r6 - r9 for reading and has to write - * into its own stack before reading from it. - * callee can read/write into caller's stack - */ - init_func_state(env, callee, - /* remember the callsite, it will be used by bpf_exit */ - *insn_idx /* callsite */, - state->curframe + 1 /* frameno within this callchain */, - subprog /* subprog number within this prog */); - - /* Transfer references to the callee */ - err = copy_reference_state(callee, caller); + err = setup_func_entry(env, subprog, *insn_idx, set_callee_state_cb, state); if (err) - goto err_out; - - err = set_callee_state_cb(env, caller, callee, *insn_idx); - if (err) - goto err_out; + return err; clear_caller_saved_regs(env, caller->regs); - /* only increment it after check_reg_arg() finished */ - state->curframe++; - /* and go analyze first insn of the callee */ *insn_idx = env->subprog_info[subprog].start - 1; @@ -9496,14 +9512,10 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn verbose(env, "caller:\n"); print_verifier_state(env, caller, true); verbose(env, "callee:\n"); - print_verifier_state(env, callee, true); + print_verifier_state(env, state->frame[state->curframe], true); } - return 0; -err_out: - free_func_state(callee); - state->frame[state->curframe + 1] = NULL; - return err; + return 0; } int map_set_for_each_callback_args(struct bpf_verifier_env *env, From ab5cfac139ab8576fb54630d4cca23c3e690ee90 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 21 Nov 2023 04:06:56 +0200 Subject: [PATCH 106/234] bpf: verify callbacks as if they are called unknown number of times Prior to this patch callbacks were handled as regular function calls, execution of callback body was modeled exactly once. This patch updates callbacks handling logic as follows: - introduces a function push_callback_call() that schedules callback body verification in env->head stack; - updates prepare_func_exit() to reschedule callback body verification upon BPF_EXIT; - as calls to bpf_*_iter_next(), calls to callback invoking functions are marked as checkpoints; - is_state_visited() is updated to stop callback based iteration when some identical parent state is found. Paths with callback function invoked zero times are now verified first, which leads to necessity to modify some selftests: - the following negative tests required adding release/unlock/drop calls to avoid previously masked unrelated error reports: - cb_refs.c:underflow_prog - exceptions_fail.c:reject_rbtree_add_throw - exceptions_fail.c:reject_with_cp_reference - the following precision tracking selftests needed change in expected log trace: - verifier_subprog_precision.c:callback_result_precise (note: r0 precision is no longer propagated inside callback and I think this is a correct behavior) - verifier_subprog_precision.c:parent_callee_saved_reg_precise_with_callback - verifier_subprog_precision.c:parent_stack_slot_precise_with_callback Reported-by: Andrew Werner Closes: https://lore.kernel.org/bpf/CA+vRuzPChFNXmouzGG+wsy=6eMcfr1mFG0F3g7rbg-sedGKW3w@mail.gmail.com/ Acked-by: Andrii Nakryiko Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20231121020701.26440-7-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 5 + kernel/bpf/verifier.c | 274 +++++++++++------- tools/testing/selftests/bpf/progs/cb_refs.c | 1 + .../selftests/bpf/progs/exceptions_fail.c | 2 + .../bpf/progs/verifier_subprog_precision.c | 71 ++++- 5 files changed, 240 insertions(+), 113 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 24213a99cc79..dd326936dd6f 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -400,6 +400,7 @@ struct bpf_verifier_state { struct bpf_idx_pair *jmp_history; u32 jmp_history_cnt; u32 dfs_depth; + u32 callback_unroll_depth; }; #define bpf_get_spilled_reg(slot, frame, mask) \ @@ -511,6 +512,10 @@ struct bpf_insn_aux_data { * this instruction, regardless of any heuristics */ bool force_checkpoint; + /* true if instruction is a call to a helper function that + * accepts callback function as a parameter. + */ + bool calls_callback; }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 68ee4803d3a2..a60dfa56ebb3 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -547,13 +547,12 @@ static bool is_dynptr_ref_function(enum bpf_func_id func_id) return func_id == BPF_FUNC_dynptr_data; } -static bool is_callback_calling_kfunc(u32 btf_id); +static bool is_sync_callback_calling_kfunc(u32 btf_id); static bool is_bpf_throw_kfunc(struct bpf_insn *insn); -static bool is_callback_calling_function(enum bpf_func_id func_id) +static bool is_sync_callback_calling_function(enum bpf_func_id func_id) { return func_id == BPF_FUNC_for_each_map_elem || - func_id == BPF_FUNC_timer_set_callback || func_id == BPF_FUNC_find_vma || func_id == BPF_FUNC_loop || func_id == BPF_FUNC_user_ringbuf_drain; @@ -564,6 +563,18 @@ static bool is_async_callback_calling_function(enum bpf_func_id func_id) return func_id == BPF_FUNC_timer_set_callback; } +static bool is_callback_calling_function(enum bpf_func_id func_id) +{ + return is_sync_callback_calling_function(func_id) || + is_async_callback_calling_function(func_id); +} + +static bool is_sync_callback_calling_insn(struct bpf_insn *insn) +{ + return (bpf_helper_call(insn) && is_sync_callback_calling_function(insn->imm)) || + (bpf_pseudo_kfunc_call(insn) && is_sync_callback_calling_kfunc(insn->imm)); +} + static bool is_storage_get_function(enum bpf_func_id func_id) { return func_id == BPF_FUNC_sk_storage_get || @@ -1808,6 +1819,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, dst_state->first_insn_idx = src->first_insn_idx; dst_state->last_insn_idx = src->last_insn_idx; dst_state->dfs_depth = src->dfs_depth; + dst_state->callback_unroll_depth = src->callback_unroll_depth; dst_state->used_as_loop_entry = src->used_as_loop_entry; for (i = 0; i <= src->curframe; i++) { dst = dst_state->frame[i]; @@ -3731,6 +3743,8 @@ static void fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask) } } +static bool calls_callback(struct bpf_verifier_env *env, int insn_idx); + /* For given verifier state backtrack_insn() is called from the last insn to * the first insn. Its purpose is to compute a bitmask of registers and * stack slots that needs precision in the parent verifier state. @@ -3906,16 +3920,13 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, return -EFAULT; return 0; } - } else if ((bpf_helper_call(insn) && - is_callback_calling_function(insn->imm) && - !is_async_callback_calling_function(insn->imm)) || - (bpf_pseudo_kfunc_call(insn) && is_callback_calling_kfunc(insn->imm))) { - /* callback-calling helper or kfunc call, which means - * we are exiting from subprog, but unlike the subprog - * call handling above, we shouldn't propagate - * precision of r1-r5 (if any requested), as they are - * not actually arguments passed directly to callback - * subprogs + } else if (is_sync_callback_calling_insn(insn) && idx != subseq_idx - 1) { + /* exit from callback subprog to callback-calling helper or + * kfunc call. Use idx/subseq_idx check to discern it from + * straight line code backtracking. + * Unlike the subprog call handling above, we shouldn't + * propagate precision of r1-r5 (if any requested), as they are + * not actually arguments passed directly to callback subprogs */ if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) { verbose(env, "BUG regs %x\n", bt_reg_mask(bt)); @@ -3950,10 +3961,18 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, } else if (opcode == BPF_EXIT) { bool r0_precise; + /* Backtracking to a nested function call, 'idx' is a part of + * the inner frame 'subseq_idx' is a part of the outer frame. + * In case of a regular function call, instructions giving + * precision to registers R1-R5 should have been found already. + * In case of a callback, it is ok to have R1-R5 marked for + * backtracking, as these registers are set by the function + * invoking callback. + */ + if (subseq_idx >= 0 && calls_callback(env, subseq_idx)) + for (i = BPF_REG_1; i <= BPF_REG_5; i++) + bt_clear_reg(bt, i); if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) { - /* if backtracing was looking for registers R1-R5 - * they should have been found already. - */ verbose(env, "BUG regs %x\n", bt_reg_mask(bt)); WARN_ONCE(1, "verifier backtracking bug"); return -EFAULT; @@ -9421,11 +9440,11 @@ err_out: return err; } -static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, - int *insn_idx, int subprog, - set_callee_state_fn set_callee_state_cb) +static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *insn, + int insn_idx, int subprog, + set_callee_state_fn set_callee_state_cb) { - struct bpf_verifier_state *state = env->cur_state; + struct bpf_verifier_state *state = env->cur_state, *callback_state; struct bpf_func_state *caller, *callee; int err; @@ -9433,44 +9452,22 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn err = btf_check_subprog_call(env, subprog, caller->regs); if (err == -EFAULT) return err; - if (subprog_is_global(env, subprog)) { - if (err) { - verbose(env, "Caller passes invalid args into func#%d\n", - subprog); - return err; - } else { - if (env->log.level & BPF_LOG_LEVEL) - verbose(env, - "Func#%d is global and valid. Skipping.\n", - subprog); - clear_caller_saved_regs(env, caller->regs); - - /* All global functions return a 64-bit SCALAR_VALUE */ - mark_reg_unknown(env, caller->regs, BPF_REG_0); - caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG; - - /* continue with next insn after call */ - return 0; - } - } /* set_callee_state is used for direct subprog calls, but we are * interested in validating only BPF helpers that can call subprogs as * callbacks */ - if (set_callee_state_cb != set_callee_state) { - env->subprog_info[subprog].is_cb = true; - if (bpf_pseudo_kfunc_call(insn) && - !is_callback_calling_kfunc(insn->imm)) { - verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n", - func_id_name(insn->imm), insn->imm); - return -EFAULT; - } else if (!bpf_pseudo_kfunc_call(insn) && - !is_callback_calling_function(insn->imm)) { /* helper */ - verbose(env, "verifier bug: helper %s#%d not marked as callback-calling\n", - func_id_name(insn->imm), insn->imm); - return -EFAULT; - } + env->subprog_info[subprog].is_cb = true; + if (bpf_pseudo_kfunc_call(insn) && + !is_sync_callback_calling_kfunc(insn->imm)) { + verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n", + func_id_name(insn->imm), insn->imm); + return -EFAULT; + } else if (!bpf_pseudo_kfunc_call(insn) && + !is_callback_calling_function(insn->imm)) { /* helper */ + verbose(env, "verifier bug: helper %s#%d not marked as callback-calling\n", + func_id_name(insn->imm), insn->imm); + return -EFAULT; } if (insn->code == (BPF_JMP | BPF_CALL) && @@ -9481,25 +9478,76 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn /* there is no real recursion here. timer callbacks are async */ env->subprog_info[subprog].is_async_cb = true; async_cb = push_async_cb(env, env->subprog_info[subprog].start, - *insn_idx, subprog); + insn_idx, subprog); if (!async_cb) return -EFAULT; callee = async_cb->frame[0]; callee->async_entry_cnt = caller->async_entry_cnt + 1; /* Convert bpf_timer_set_callback() args into timer callback args */ - err = set_callee_state_cb(env, caller, callee, *insn_idx); + err = set_callee_state_cb(env, caller, callee, insn_idx); if (err) return err; + return 0; + } + + /* for callback functions enqueue entry to callback and + * proceed with next instruction within current frame. + */ + callback_state = push_stack(env, env->subprog_info[subprog].start, insn_idx, false); + if (!callback_state) + return -ENOMEM; + + err = setup_func_entry(env, subprog, insn_idx, set_callee_state_cb, + callback_state); + if (err) + return err; + + callback_state->callback_unroll_depth++; + return 0; +} + +static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, + int *insn_idx) +{ + struct bpf_verifier_state *state = env->cur_state; + struct bpf_func_state *caller; + int err, subprog, target_insn; + + target_insn = *insn_idx + insn->imm + 1; + subprog = find_subprog(env, target_insn); + if (subprog < 0) { + verbose(env, "verifier bug. No program starts at insn %d\n", target_insn); + return -EFAULT; + } + + caller = state->frame[state->curframe]; + err = btf_check_subprog_call(env, subprog, caller->regs); + if (err == -EFAULT) + return err; + if (subprog_is_global(env, subprog)) { + if (err) { + verbose(env, "Caller passes invalid args into func#%d\n", subprog); + return err; + } + + if (env->log.level & BPF_LOG_LEVEL) + verbose(env, "Func#%d is global and valid. Skipping.\n", subprog); clear_caller_saved_regs(env, caller->regs); + + /* All global functions return a 64-bit SCALAR_VALUE */ mark_reg_unknown(env, caller->regs, BPF_REG_0); caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG; + /* continue with next insn after call */ return 0; } - err = setup_func_entry(env, subprog, *insn_idx, set_callee_state_cb, state); + /* for regular function entry setup new frame and continue + * from that frame. + */ + err = setup_func_entry(env, subprog, *insn_idx, set_callee_state, state); if (err) return err; @@ -9559,22 +9607,6 @@ static int set_callee_state(struct bpf_verifier_env *env, return 0; } -static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, - int *insn_idx) -{ - int subprog, target_insn; - - target_insn = *insn_idx + insn->imm + 1; - subprog = find_subprog(env, target_insn); - if (subprog < 0) { - verbose(env, "verifier bug. No program starts at insn %d\n", - target_insn); - return -EFAULT; - } - - return __check_func_call(env, insn, insn_idx, subprog, set_callee_state); -} - static int set_map_elem_callback_state(struct bpf_verifier_env *env, struct bpf_func_state *caller, struct bpf_func_state *callee, @@ -9798,6 +9830,11 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) verbose_invalid_scalar(env, r0, &range, "callback return", "R0"); return -EINVAL; } + if (!calls_callback(env, callee->callsite)) { + verbose(env, "BUG: in callback at %d, callsite %d !calls_callback\n", + *insn_idx, callee->callsite); + return -EFAULT; + } } else { /* return to the caller whatever r0 had in the callee */ caller->regs[BPF_REG_0] = *r0; @@ -9815,7 +9852,15 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) return err; } - *insn_idx = callee->callsite + 1; + /* for callbacks like bpf_loop or bpf_for_each_map_elem go back to callsite, + * there function call logic would reschedule callback visit. If iteration + * converges is_state_visited() would prune that visit eventually. + */ + if (callee->in_callback_fn) + *insn_idx = callee->callsite; + else + *insn_idx = callee->callsite + 1; + if (env->log.level & BPF_LOG_LEVEL) { verbose(env, "returning from callee:\n"); print_verifier_state(env, callee, true); @@ -10228,24 +10273,24 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn } break; case BPF_FUNC_for_each_map_elem: - err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, - set_map_elem_callback_state); + err = push_callback_call(env, insn, insn_idx, meta.subprogno, + set_map_elem_callback_state); break; case BPF_FUNC_timer_set_callback: - err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, - set_timer_callback_state); + err = push_callback_call(env, insn, insn_idx, meta.subprogno, + set_timer_callback_state); break; case BPF_FUNC_find_vma: - err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, - set_find_vma_callback_state); + err = push_callback_call(env, insn, insn_idx, meta.subprogno, + set_find_vma_callback_state); break; case BPF_FUNC_snprintf: err = check_bpf_snprintf_call(env, regs); break; case BPF_FUNC_loop: update_loop_inline_state(env, meta.subprogno); - err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, - set_loop_callback_state); + err = push_callback_call(env, insn, insn_idx, meta.subprogno, + set_loop_callback_state); break; case BPF_FUNC_dynptr_from_mem: if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) { @@ -10341,8 +10386,8 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn break; } case BPF_FUNC_user_ringbuf_drain: - err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, - set_user_ringbuf_callback_state); + err = push_callback_call(env, insn, insn_idx, meta.subprogno, + set_user_ringbuf_callback_state); break; } @@ -11230,7 +11275,7 @@ static bool is_bpf_graph_api_kfunc(u32 btf_id) btf_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]; } -static bool is_callback_calling_kfunc(u32 btf_id) +static bool is_sync_callback_calling_kfunc(u32 btf_id) { return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl]; } @@ -11982,6 +12027,21 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, return -EACCES; } + /* Check the arguments */ + err = check_kfunc_args(env, &meta, insn_idx); + if (err < 0) + return err; + + if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) { + err = push_callback_call(env, insn, insn_idx, meta.subprogno, + set_rbtree_add_callback_state); + if (err) { + verbose(env, "kfunc %s#%d failed callback verification\n", + func_name, meta.func_id); + return err; + } + } + rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta); rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta); @@ -12017,10 +12077,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, return -EINVAL; } - /* Check the arguments */ - err = check_kfunc_args(env, &meta, insn_idx); - if (err < 0) - return err; /* In case of release function, we get register number of refcounted * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now. */ @@ -12054,16 +12110,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, } } - if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) { - err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, - set_rbtree_add_callback_state); - if (err) { - verbose(env, "kfunc %s#%d failed callback verification\n", - func_name, meta.func_id); - return err; - } - } - if (meta.func_id == special_kfunc_list[KF_bpf_throw]) { if (!bpf_jit_supports_exceptions()) { verbose(env, "JIT does not support calling kfunc %s#%d\n", @@ -15427,6 +15473,15 @@ static bool is_force_checkpoint(struct bpf_verifier_env *env, int insn_idx) return env->insn_aux_data[insn_idx].force_checkpoint; } +static void mark_calls_callback(struct bpf_verifier_env *env, int idx) +{ + env->insn_aux_data[idx].calls_callback = true; +} + +static bool calls_callback(struct bpf_verifier_env *env, int insn_idx) +{ + return env->insn_aux_data[insn_idx].calls_callback; +} enum { DONE_EXPLORING = 0, @@ -15540,6 +15595,21 @@ static int visit_insn(int t, struct bpf_verifier_env *env) * async state will be pushed for further exploration. */ mark_prune_point(env, t); + /* For functions that invoke callbacks it is not known how many times + * callback would be called. Verifier models callback calling functions + * by repeatedly visiting callback bodies and returning to origin call + * instruction. + * In order to stop such iteration verifier needs to identify when a + * state identical some state from a previous iteration is reached. + * Check below forces creation of checkpoint before callback calling + * instruction to allow search for such identical states. + */ + if (is_sync_callback_calling_insn(insn)) { + mark_calls_callback(env, t); + mark_force_checkpoint(env, t); + mark_prune_point(env, t); + mark_jmp_point(env, t); + } if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { struct bpf_kfunc_call_arg_meta meta; @@ -17009,10 +17079,16 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) } goto skip_inf_loop_check; } + if (calls_callback(env, insn_idx)) { + if (states_equal(env, &sl->state, cur, true)) + goto hit; + goto skip_inf_loop_check; + } /* attempt to detect infinite loop to avoid unnecessary doomed work */ if (states_maybe_looping(&sl->state, cur) && states_equal(env, &sl->state, cur, false) && - !iter_active_depths_differ(&sl->state, cur)) { + !iter_active_depths_differ(&sl->state, cur) && + sl->state.callback_unroll_depth == cur->callback_unroll_depth) { verbose_linfo(env, insn_idx, "; "); verbose(env, "infinite loop detected at insn %d\n", insn_idx); verbose(env, "cur state:"); diff --git a/tools/testing/selftests/bpf/progs/cb_refs.c b/tools/testing/selftests/bpf/progs/cb_refs.c index 76d661b20e87..56c764df8196 100644 --- a/tools/testing/selftests/bpf/progs/cb_refs.c +++ b/tools/testing/selftests/bpf/progs/cb_refs.c @@ -33,6 +33,7 @@ int underflow_prog(void *ctx) if (!p) return 0; bpf_for_each_map_elem(&array_map, cb1, &p, 0); + bpf_kfunc_call_test_release(p); return 0; } diff --git a/tools/testing/selftests/bpf/progs/exceptions_fail.c b/tools/testing/selftests/bpf/progs/exceptions_fail.c index 4c39e920dac2..8c0ef2742208 100644 --- a/tools/testing/selftests/bpf/progs/exceptions_fail.c +++ b/tools/testing/selftests/bpf/progs/exceptions_fail.c @@ -171,6 +171,7 @@ int reject_with_rbtree_add_throw(void *ctx) return 0; bpf_spin_lock(&lock); bpf_rbtree_add(&rbtree, &f->node, rbless); + bpf_spin_unlock(&lock); return 0; } @@ -214,6 +215,7 @@ int reject_with_cb_reference(void *ctx) if (!f) return 0; bpf_loop(5, subprog_cb_ref, NULL, 0); + bpf_obj_drop(f); return 0; } diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c index db6b3143338b..da803cffb5ef 100644 --- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c @@ -119,15 +119,26 @@ __naked int global_subprog_result_precise(void) SEC("?raw_tp") __success __log_level(2) +/* First simulated path does not include callback body */ __msg("14: (0f) r1 += r6") -__msg("mark_precise: frame0: last_idx 14 first_idx 10") +__msg("mark_precise: frame0: last_idx 14 first_idx 9") __msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7") __msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4") __msg("mark_precise: frame0: regs=r6 stack= before 11: (25) if r6 > 0x3 goto pc+4") __msg("mark_precise: frame0: regs=r6 stack= before 10: (bf) r6 = r0") -__msg("mark_precise: frame0: parent state regs=r0 stack=:") -__msg("mark_precise: frame0: last_idx 18 first_idx 0") -__msg("mark_precise: frame0: regs=r0 stack= before 18: (95) exit") +__msg("mark_precise: frame0: regs=r0 stack= before 9: (85) call bpf_loop") +/* State entering callback body popped from states stack */ +__msg("from 9 to 17: frame1:") +__msg("17: frame1: R1=scalar() R2=0 R10=fp0 cb") +__msg("17: (b7) r0 = 0") +__msg("18: (95) exit") +__msg("returning from callee:") +__msg("to caller at 9:") +/* r4 (flags) is always precise for bpf_loop() */ +__msg("frame 0: propagating r4") +__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1") +__msg("mark_precise: frame0: regs=r4 stack= before 18: (95) exit") +__msg("from 18 to 9: safe") __naked int callback_result_precise(void) { asm volatile ( @@ -233,20 +244,36 @@ __naked int parent_callee_saved_reg_precise_global(void) SEC("?raw_tp") __success __log_level(2) +/* First simulated path does not include callback body */ __msg("12: (0f) r1 += r6") -__msg("mark_precise: frame0: last_idx 12 first_idx 10") +__msg("mark_precise: frame0: last_idx 12 first_idx 9") __msg("mark_precise: frame0: regs=r6 stack= before 11: (bf) r1 = r7") __msg("mark_precise: frame0: regs=r6 stack= before 10: (27) r6 *= 4") +__msg("mark_precise: frame0: regs=r6 stack= before 9: (85) call bpf_loop") __msg("mark_precise: frame0: parent state regs=r6 stack=:") -__msg("mark_precise: frame0: last_idx 16 first_idx 0") -__msg("mark_precise: frame0: regs=r6 stack= before 16: (95) exit") -__msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0") -__msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop#181") +__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9") __msg("mark_precise: frame0: regs=r6 stack= before 8: (b7) r4 = 0") __msg("mark_precise: frame0: regs=r6 stack= before 7: (b7) r3 = 0") __msg("mark_precise: frame0: regs=r6 stack= before 6: (bf) r2 = r8") __msg("mark_precise: frame0: regs=r6 stack= before 5: (b7) r1 = 1") __msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3") +/* State entering callback body popped from states stack */ +__msg("from 9 to 15: frame1:") +__msg("15: frame1: R1=scalar() R2=0 R10=fp0 cb") +__msg("15: (b7) r0 = 0") +__msg("16: (95) exit") +__msg("returning from callee:") +__msg("to caller at 9:") +/* r4 (flags) is always precise for bpf_loop(), + * r6 was marked before backtracking to callback body. + */ +__msg("frame 0: propagating r4,r6") +__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1") +__msg("mark_precise: frame0: regs=r4,r6 stack= before 16: (95) exit") +__msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0") +__msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop") +__msg("mark_precise: frame0: parent state regs= stack=:") +__msg("from 16 to 9: safe") __naked int parent_callee_saved_reg_precise_with_callback(void) { asm volatile ( @@ -373,22 +400,38 @@ __naked int parent_stack_slot_precise_global(void) SEC("?raw_tp") __success __log_level(2) +/* First simulated path does not include callback body */ __msg("14: (0f) r1 += r6") -__msg("mark_precise: frame0: last_idx 14 first_idx 11") +__msg("mark_precise: frame0: last_idx 14 first_idx 10") __msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7") __msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4") __msg("mark_precise: frame0: regs=r6 stack= before 11: (79) r6 = *(u64 *)(r10 -8)") +__msg("mark_precise: frame0: regs= stack=-8 before 10: (85) call bpf_loop") __msg("mark_precise: frame0: parent state regs= stack=-8:") -__msg("mark_precise: frame0: last_idx 18 first_idx 0") -__msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit") -__msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0") -__msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181") +__msg("mark_precise: frame0: last_idx 9 first_idx 0 subseq_idx 10") __msg("mark_precise: frame0: regs= stack=-8 before 9: (b7) r4 = 0") __msg("mark_precise: frame0: regs= stack=-8 before 8: (b7) r3 = 0") __msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r2 = r8") __msg("mark_precise: frame0: regs= stack=-8 before 6: (bf) r1 = r6") __msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -8) = r6") __msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3") +/* State entering callback body popped from states stack */ +__msg("from 10 to 17: frame1:") +__msg("17: frame1: R1=scalar() R2=0 R10=fp0 cb") +__msg("17: (b7) r0 = 0") +__msg("18: (95) exit") +__msg("returning from callee:") +__msg("to caller at 10:") +/* r4 (flags) is always precise for bpf_loop(), + * fp-8 was marked before backtracking to callback body. + */ +__msg("frame 0: propagating r4,fp-8") +__msg("mark_precise: frame0: last_idx 10 first_idx 10 subseq_idx -1") +__msg("mark_precise: frame0: regs=r4 stack=-8 before 18: (95) exit") +__msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0") +__msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181") +__msg("mark_precise: frame0: parent state regs= stack=:") +__msg("from 18 to 10: safe") __naked int parent_stack_slot_precise_with_callback(void) { asm volatile ( From 958465e217dbf5fc6677d42d8827fb3073d86afd Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 21 Nov 2023 04:06:57 +0200 Subject: [PATCH 107/234] selftests/bpf: tests for iterating callbacks A set of test cases to check behavior of callback handling logic, check if verifier catches the following situations: - program not safe on second callback iteration; - program not safe on zero callback iterations; - infinite loop inside a callback. Verify that callback logic works for bpf_loop, bpf_for_each_map_elem, bpf_user_ringbuf_drain, bpf_find_vma. Acked-by: Andrii Nakryiko Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20231121020701.26440-8-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../bpf/progs/verifier_iterating_callbacks.c | 147 ++++++++++++++++++ 2 files changed, 149 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index e5c61aa6604a..5cfa7a6316b6 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -31,6 +31,7 @@ #include "verifier_helper_restricted.skel.h" #include "verifier_helper_value_access.skel.h" #include "verifier_int_ptr.skel.h" +#include "verifier_iterating_callbacks.skel.h" #include "verifier_jeq_infer_not_null.skel.h" #include "verifier_ld_ind.skel.h" #include "verifier_ldsx.skel.h" @@ -139,6 +140,7 @@ void test_verifier_helper_packet_access(void) { RUN(verifier_helper_packet_acces void test_verifier_helper_restricted(void) { RUN(verifier_helper_restricted); } void test_verifier_helper_value_access(void) { RUN(verifier_helper_value_access); } void test_verifier_int_ptr(void) { RUN(verifier_int_ptr); } +void test_verifier_iterating_callbacks(void) { RUN(verifier_iterating_callbacks); } void test_verifier_jeq_infer_not_null(void) { RUN(verifier_jeq_infer_not_null); } void test_verifier_ld_ind(void) { RUN(verifier_ld_ind); } void test_verifier_ldsx(void) { RUN(verifier_ldsx); } diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c new file mode 100644 index 000000000000..fa9429f77a81 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c @@ -0,0 +1,147 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include "bpf_misc.h" + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 8); + __type(key, __u32); + __type(value, __u64); +} map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_USER_RINGBUF); + __uint(max_entries, 8); +} ringbuf SEC(".maps"); + +struct vm_area_struct; +struct bpf_map; + +struct buf_context { + char *buf; +}; + +struct num_context { + __u64 i; +}; + +__u8 choice_arr[2] = { 0, 1 }; + +static int unsafe_on_2nd_iter_cb(__u32 idx, struct buf_context *ctx) +{ + if (idx == 0) { + ctx->buf = (char *)(0xDEAD); + return 0; + } + + if (bpf_probe_read_user(ctx->buf, 8, (void *)(0xBADC0FFEE))) + return 1; + + return 0; +} + +SEC("?raw_tp") +__failure __msg("R1 type=scalar expected=fp") +int unsafe_on_2nd_iter(void *unused) +{ + char buf[4]; + struct buf_context loop_ctx = { .buf = buf }; + + bpf_loop(100, unsafe_on_2nd_iter_cb, &loop_ctx, 0); + return 0; +} + +static int unsafe_on_zero_iter_cb(__u32 idx, struct num_context *ctx) +{ + ctx->i = 0; + return 0; +} + +SEC("?raw_tp") +__failure __msg("invalid access to map value, value_size=2 off=32 size=1") +int unsafe_on_zero_iter(void *unused) +{ + struct num_context loop_ctx = { .i = 32 }; + + bpf_loop(100, unsafe_on_zero_iter_cb, &loop_ctx, 0); + return choice_arr[loop_ctx.i]; +} + +static int loop_detection_cb(__u32 idx, struct num_context *ctx) +{ + for (;;) {} + return 0; +} + +SEC("?raw_tp") +__failure __msg("infinite loop detected") +int loop_detection(void *unused) +{ + struct num_context loop_ctx = { .i = 0 }; + + bpf_loop(100, loop_detection_cb, &loop_ctx, 0); + return 0; +} + +static __always_inline __u64 oob_state_machine(struct num_context *ctx) +{ + switch (ctx->i) { + case 0: + ctx->i = 1; + break; + case 1: + ctx->i = 32; + break; + } + return 0; +} + +static __u64 for_each_map_elem_cb(struct bpf_map *map, __u32 *key, __u64 *val, void *data) +{ + return oob_state_machine(data); +} + +SEC("?raw_tp") +__failure __msg("invalid access to map value, value_size=2 off=32 size=1") +int unsafe_for_each_map_elem(void *unused) +{ + struct num_context loop_ctx = { .i = 0 }; + + bpf_for_each_map_elem(&map, for_each_map_elem_cb, &loop_ctx, 0); + return choice_arr[loop_ctx.i]; +} + +static __u64 ringbuf_drain_cb(struct bpf_dynptr *dynptr, void *data) +{ + return oob_state_machine(data); +} + +SEC("?raw_tp") +__failure __msg("invalid access to map value, value_size=2 off=32 size=1") +int unsafe_ringbuf_drain(void *unused) +{ + struct num_context loop_ctx = { .i = 0 }; + + bpf_user_ringbuf_drain(&ringbuf, ringbuf_drain_cb, &loop_ctx, 0); + return choice_arr[loop_ctx.i]; +} + +static __u64 find_vma_cb(struct task_struct *task, struct vm_area_struct *vma, void *data) +{ + return oob_state_machine(data); +} + +SEC("?raw_tp") +__failure __msg("invalid access to map value, value_size=2 off=32 size=1") +int unsafe_find_vma(void *unused) +{ + struct task_struct *task = bpf_get_current_task_btf(); + struct num_context loop_ctx = { .i = 0 }; + + bpf_find_vma(task, 0, find_vma_cb, &loop_ctx, 0); + return choice_arr[loop_ctx.i]; +} + +char _license[] SEC("license") = "GPL"; From cafe2c21508a38cdb3ed22708842e957b2572c3e Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 21 Nov 2023 04:06:58 +0200 Subject: [PATCH 108/234] bpf: widening for callback iterators Callbacks are similar to open coded iterators, so add imprecise widening logic for callback body processing. This makes callback based loops behave identically to open coded iterators, e.g. allowing to verify programs like below: struct ctx { u32 i; }; int cb(u32 idx, struct ctx* ctx) { ++ctx->i; return 0; } ... struct ctx ctx = { .i = 0 }; bpf_loop(100, cb, &ctx, 0); ... Acked-by: Andrii Nakryiko Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20231121020701.26440-9-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a60dfa56ebb3..2f03e6b11bb9 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -9799,9 +9799,10 @@ static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env) static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) { - struct bpf_verifier_state *state = env->cur_state; + struct bpf_verifier_state *state = env->cur_state, *prev_st; struct bpf_func_state *caller, *callee; struct bpf_reg_state *r0; + bool in_callback_fn; int err; callee = state->frame[state->curframe]; @@ -9856,7 +9857,8 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) * there function call logic would reschedule callback visit. If iteration * converges is_state_visited() would prune that visit eventually. */ - if (callee->in_callback_fn) + in_callback_fn = callee->in_callback_fn; + if (in_callback_fn) *insn_idx = callee->callsite; else *insn_idx = callee->callsite + 1; @@ -9871,6 +9873,24 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) * bpf_throw, this will be done by copy_verifier_state for extra frames. */ free_func_state(callee); state->frame[state->curframe--] = NULL; + + /* for callbacks widen imprecise scalars to make programs like below verify: + * + * struct ctx { int i; } + * void cb(int idx, struct ctx *ctx) { ctx->i++; ... } + * ... + * struct ctx = { .i = 0; } + * bpf_loop(100, cb, &ctx, 0); + * + * This is similar to what is done in process_iter_next_call() for open + * coded iterators. + */ + prev_st = in_callback_fn ? find_prev_entry(env, state, *insn_idx) : NULL; + if (prev_st) { + err = widen_imprecise_scalars(env, prev_st, state); + if (err) + return err; + } return 0; } From 9f3330aa644d6d979eb064c46e85c62d4b4eac75 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 21 Nov 2023 04:06:59 +0200 Subject: [PATCH 109/234] selftests/bpf: test widening for iterating callbacks A test case to verify that imprecise scalars widening is applied to callback entering state, when callback call is simulated repeatedly. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20231121020701.26440-10-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../bpf/progs/verifier_iterating_callbacks.c | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c index fa9429f77a81..598c1e984b26 100644 --- a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c +++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c @@ -25,6 +25,7 @@ struct buf_context { struct num_context { __u64 i; + __u64 j; }; __u8 choice_arr[2] = { 0, 1 }; @@ -69,6 +70,25 @@ int unsafe_on_zero_iter(void *unused) return choice_arr[loop_ctx.i]; } +static int widening_cb(__u32 idx, struct num_context *ctx) +{ + ++ctx->i; + return 0; +} + +SEC("?raw_tp") +__success +int widening(void *unused) +{ + struct num_context loop_ctx = { .i = 0, .j = 1 }; + + bpf_loop(100, widening_cb, &loop_ctx, 0); + /* loop_ctx.j is not changed during callback iteration, + * verifier should not apply widening to it. + */ + return choice_arr[loop_ctx.j]; +} + static int loop_detection_cb(__u32 idx, struct num_context *ctx) { for (;;) {} From bb124da69c47dd98d69361ec13244ece50bec63e Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 21 Nov 2023 04:07:00 +0200 Subject: [PATCH 110/234] bpf: keep track of max number of bpf_loop callback iterations In some cases verifier can't infer convergence of the bpf_loop() iteration. E.g. for the following program: static int cb(__u32 idx, struct num_context* ctx) { ctx->i++; return 0; } SEC("?raw_tp") int prog(void *_) { struct num_context ctx = { .i = 0 }; __u8 choice_arr[2] = { 0, 1 }; bpf_loop(2, cb, &ctx, 0); return choice_arr[ctx.i]; } Each 'cb' simulation would eventually return to 'prog' and reach 'return choice_arr[ctx.i]' statement. At which point ctx.i would be marked precise, thus forcing verifier to track multitude of separate states with {.i=0}, {.i=1}, ... at bpf_loop() callback entry. This commit allows "brute force" handling for such cases by limiting number of callback body simulations using 'umax' value of the first bpf_loop() parameter. For this, extend bpf_func_state with 'callback_depth' field. Increment this field when callback visiting state is pushed to states traversal stack. For frame #N it's 'callback_depth' field counts how many times callback with frame depth N+1 had been executed. Use bpf_func_state specifically to allow independent tracking of callback depths when multiple nested bpf_loop() calls are present. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20231121020701.26440-11-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 11 ++++++ kernel/bpf/verifier.c | 19 ++++++++-- .../bpf/progs/verifier_subprog_precision.c | 35 +++++++++++++------ 3 files changed, 53 insertions(+), 12 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index dd326936dd6f..aa4d19d0bc94 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -301,6 +301,17 @@ struct bpf_func_state { struct tnum callback_ret_range; bool in_async_callback_fn; bool in_exception_callback_fn; + /* For callback calling functions that limit number of possible + * callback executions (e.g. bpf_loop) keeps track of current + * simulated iteration number. + * Value in frame N refers to number of times callback with frame + * N+1 was simulated, e.g. for the following call: + * + * bpf_loop(..., fn, ...); | suppose current frame is N + * | fn would be simulated in frame N+1 + * | number of simulations is tracked in frame N + */ + u32 callback_depth; /* The following fields should be last. See copy_func_state() */ int acquired_refs; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2f03e6b11bb9..af2819d5c8ee 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -9505,6 +9505,8 @@ static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *ins return err; callback_state->callback_unroll_depth++; + callback_state->frame[callback_state->curframe - 1]->callback_depth++; + caller->callback_depth = 0; return 0; } @@ -10309,8 +10311,21 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn break; case BPF_FUNC_loop: update_loop_inline_state(env, meta.subprogno); - err = push_callback_call(env, insn, insn_idx, meta.subprogno, - set_loop_callback_state); + /* Verifier relies on R1 value to determine if bpf_loop() iteration + * is finished, thus mark it precise. + */ + err = mark_chain_precision(env, BPF_REG_1); + if (err) + return err; + if (cur_func(env)->callback_depth < regs[BPF_REG_1].umax_value) { + err = push_callback_call(env, insn, insn_idx, meta.subprogno, + set_loop_callback_state); + } else { + cur_func(env)->callback_depth = 0; + if (env->log.level & BPF_LOG_LEVEL2) + verbose(env, "frame%d bpf_loop iteration limit reached\n", + env->cur_state->curframe); + } break; case BPF_FUNC_dynptr_from_mem: if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) { diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c index da803cffb5ef..f61d623b1ce8 100644 --- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c @@ -119,7 +119,23 @@ __naked int global_subprog_result_precise(void) SEC("?raw_tp") __success __log_level(2) -/* First simulated path does not include callback body */ +/* First simulated path does not include callback body, + * r1 and r4 are always precise for bpf_loop() calls. + */ +__msg("9: (85) call bpf_loop#181") +__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1") +__msg("mark_precise: frame0: parent state regs=r4 stack=:") +__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9") +__msg("mark_precise: frame0: regs=r4 stack= before 8: (b7) r4 = 0") +__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1") +__msg("mark_precise: frame0: parent state regs=r1 stack=:") +__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9") +__msg("mark_precise: frame0: regs=r1 stack= before 8: (b7) r4 = 0") +__msg("mark_precise: frame0: regs=r1 stack= before 7: (b7) r3 = 0") +__msg("mark_precise: frame0: regs=r1 stack= before 6: (bf) r2 = r8") +__msg("mark_precise: frame0: regs=r1 stack= before 5: (bf) r1 = r6") +__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3") +/* r6 precision propagation */ __msg("14: (0f) r1 += r6") __msg("mark_precise: frame0: last_idx 14 first_idx 9") __msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7") @@ -134,10 +150,9 @@ __msg("17: (b7) r0 = 0") __msg("18: (95) exit") __msg("returning from callee:") __msg("to caller at 9:") -/* r4 (flags) is always precise for bpf_loop() */ -__msg("frame 0: propagating r4") +__msg("frame 0: propagating r1,r4") __msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1") -__msg("mark_precise: frame0: regs=r4 stack= before 18: (95) exit") +__msg("mark_precise: frame0: regs=r1,r4 stack= before 18: (95) exit") __msg("from 18 to 9: safe") __naked int callback_result_precise(void) { @@ -264,12 +279,12 @@ __msg("15: (b7) r0 = 0") __msg("16: (95) exit") __msg("returning from callee:") __msg("to caller at 9:") -/* r4 (flags) is always precise for bpf_loop(), +/* r1, r4 are always precise for bpf_loop(), * r6 was marked before backtracking to callback body. */ -__msg("frame 0: propagating r4,r6") +__msg("frame 0: propagating r1,r4,r6") __msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1") -__msg("mark_precise: frame0: regs=r4,r6 stack= before 16: (95) exit") +__msg("mark_precise: frame0: regs=r1,r4,r6 stack= before 16: (95) exit") __msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0") __msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop") __msg("mark_precise: frame0: parent state regs= stack=:") @@ -422,12 +437,12 @@ __msg("17: (b7) r0 = 0") __msg("18: (95) exit") __msg("returning from callee:") __msg("to caller at 10:") -/* r4 (flags) is always precise for bpf_loop(), +/* r1, r4 are always precise for bpf_loop(), * fp-8 was marked before backtracking to callback body. */ -__msg("frame 0: propagating r4,fp-8") +__msg("frame 0: propagating r1,r4,fp-8") __msg("mark_precise: frame0: last_idx 10 first_idx 10 subseq_idx -1") -__msg("mark_precise: frame0: regs=r4 stack=-8 before 18: (95) exit") +__msg("mark_precise: frame0: regs=r1,r4 stack=-8 before 18: (95) exit") __msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0") __msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181") __msg("mark_precise: frame0: parent state regs= stack=:") From 57e2a52deeb12ab84c15c6d0fb93638b5b94001b Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 21 Nov 2023 04:07:01 +0200 Subject: [PATCH 111/234] selftests/bpf: check if max number of bpf_loop iterations is tracked Check that even if bpf_loop() callback simulation does not converge to a specific state, verification could proceed via "brute force" simulation of maximal number of callback calls. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20231121020701.26440-12-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../bpf/progs/verifier_iterating_callbacks.c | 75 +++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c index 598c1e984b26..5905e036e0ea 100644 --- a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c +++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c @@ -164,4 +164,79 @@ int unsafe_find_vma(void *unused) return choice_arr[loop_ctx.i]; } +static int iter_limit_cb(__u32 idx, struct num_context *ctx) +{ + ctx->i++; + return 0; +} + +SEC("?raw_tp") +__success +int bpf_loop_iter_limit_ok(void *unused) +{ + struct num_context ctx = { .i = 0 }; + + bpf_loop(1, iter_limit_cb, &ctx, 0); + return choice_arr[ctx.i]; +} + +SEC("?raw_tp") +__failure __msg("invalid access to map value, value_size=2 off=2 size=1") +int bpf_loop_iter_limit_overflow(void *unused) +{ + struct num_context ctx = { .i = 0 }; + + bpf_loop(2, iter_limit_cb, &ctx, 0); + return choice_arr[ctx.i]; +} + +static int iter_limit_level2a_cb(__u32 idx, struct num_context *ctx) +{ + ctx->i += 100; + return 0; +} + +static int iter_limit_level2b_cb(__u32 idx, struct num_context *ctx) +{ + ctx->i += 10; + return 0; +} + +static int iter_limit_level1_cb(__u32 idx, struct num_context *ctx) +{ + ctx->i += 1; + bpf_loop(1, iter_limit_level2a_cb, ctx, 0); + bpf_loop(1, iter_limit_level2b_cb, ctx, 0); + return 0; +} + +/* Check that path visiting every callback function once had been + * reached by verifier. Variables 'ctx{1,2}i' below serve as flags, + * with each decimal digit corresponding to a callback visit marker. + */ +SEC("socket") +__success __retval(111111) +int bpf_loop_iter_limit_nested(void *unused) +{ + struct num_context ctx1 = { .i = 0 }; + struct num_context ctx2 = { .i = 0 }; + __u64 a, b, c; + + bpf_loop(1, iter_limit_level1_cb, &ctx1, 0); + bpf_loop(1, iter_limit_level1_cb, &ctx2, 0); + a = ctx1.i; + b = ctx2.i; + /* Force 'ctx1.i' and 'ctx2.i' precise. */ + c = choice_arr[(a + b) % 2]; + /* This makes 'c' zero, but neither clang nor verifier know it. */ + c /= 10; + /* Make sure that verifier does not visit 'impossible' states: + * enumerate all possible callback visit masks. + */ + if (a != 0 && a != 1 && a != 11 && a != 101 && a != 111 && + b != 0 && b != 1 && b != 11 && b != 101 && b != 111) + asm volatile ("r0 /= 0;" ::: "r0"); + return 1000 * a + b + c; +} + char _license[] SEC("license") = "GPL"; From d3ec75bc635cb0cb8185b63293d33a3d1b942d22 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 21 Nov 2023 15:03:25 +0800 Subject: [PATCH 112/234] LoongArch: Add dependency between vmlinuz.efi and vmlinux.efi MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A common issue in Makefile is a race in parallel building. You need to be careful to prevent multiple threads from writing to the same file simultaneously. Commit 3939f3345050 ("ARM: 8418/1: add boot image dependencies to not generate invalid images") addressed such a bad scenario. A similar symptom occurs with the following command: $ make -j$(nproc) ARCH=loongarch vmlinux.efi vmlinuz.efi [ snip ] SORTTAB vmlinux OBJCOPY arch/loongarch/boot/vmlinux.efi OBJCOPY arch/loongarch/boot/vmlinux.efi PAD arch/loongarch/boot/vmlinux.bin GZIP arch/loongarch/boot/vmlinuz OBJCOPY arch/loongarch/boot/vmlinuz.o LD arch/loongarch/boot/vmlinuz.efi.elf OBJCOPY arch/loongarch/boot/vmlinuz.efi The log "OBJCOPY arch/loongarch/boot/vmlinux.efi" is displayed twice. It indicates that two threads simultaneously enter arch/loongarch/boot/ and write to arch/loongarch/boot/vmlinux.efi. It occasionally leads to a build failure: $ make -j$(nproc) ARCH=loongarch vmlinux.efi vmlinuz.efi [ snip ] SORTTAB vmlinux OBJCOPY arch/loongarch/boot/vmlinux.efi PAD arch/loongarch/boot/vmlinux.bin truncate: Invalid number: ‘arch/loongarch/boot/vmlinux.bin’ make[2]: *** [drivers/firmware/efi/libstub/Makefile.zboot:13: arch/loongarch/boot/vmlinux.bin] Error 1 make[2]: *** Deleting file 'arch/loongarch/boot/vmlinux.bin' make[1]: *** [arch/loongarch/Makefile:146: vmlinuz.efi] Error 2 make[1]: *** Waiting for unfinished jobs.... make: *** [Makefile:234: __sub-make] Error 2 vmlinuz.efi depends on vmlinux.efi, but such a dependency is not specified in arch/loongarch/Makefile. Signed-off-by: Masahiro Yamada Signed-off-by: Huacai Chen --- arch/loongarch/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 9eeb0c05f3f4..a8d65eaf1211 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -142,6 +142,8 @@ vdso-install-y += arch/loongarch/vdso/vdso.so.dbg all: $(notdir $(KBUILD_IMAGE)) +vmlinuz.efi: vmlinux.efi + vmlinux.elf vmlinux.efi vmlinuz.efi: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(bootvars-y) $(boot)/$@ From cbfd44bd5c6eec0aada0c39130f0b8d7ecba0529 Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Tue, 21 Nov 2023 15:03:25 +0800 Subject: [PATCH 113/234] LoongArch: Explicitly set -fdirect-access-external-data for vmlinux After this llvm commit [1], The -fno-pic does not imply direct access external data. Explicitly set -fdirect-access-external-data for vmlinux that can avoids GOT entries. Link: https://github.com/llvm/llvm-project/commit/47eeee297775347cbdb7624d6a766c2a3eec4a59 Suggested-by: Xi Ruoyao Signed-off-by: WANG Rui Signed-off-by: Huacai Chen --- arch/loongarch/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index a8d65eaf1211..204b94b2e6aa 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -68,6 +68,7 @@ LDFLAGS_vmlinux += -static -n -nostdlib ifdef CONFIG_AS_HAS_EXPLICIT_RELOCS cflags-y += $(call cc-option,-mexplicit-relocs) KBUILD_CFLAGS_KERNEL += $(call cc-option,-mdirect-extern-access) +KBUILD_CFLAGS_KERNEL += $(call cc-option,-fdirect-access-external-data) KBUILD_AFLAGS_MODULE += $(call cc-option,-fno-direct-access-external-data) KBUILD_CFLAGS_MODULE += $(call cc-option,-fno-direct-access-external-data) KBUILD_AFLAGS_MODULE += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax) From aa0cbc1b506b090c3a775b547c693ada108cc0d7 Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Tue, 21 Nov 2023 15:03:25 +0800 Subject: [PATCH 114/234] LoongArch: Record pc instead of offset in la_abs relocation To clarify, the previous version functioned flawlessly. However, it's worth noting that the LLVM's LoongArch backend currently lacks support for cross-section label calculations. With this patch, we enable the use of clang to compile relocatable kernels. Tested-by: Nathan Chancellor Signed-off-by: WANG Rui Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/asmmacro.h | 3 +-- arch/loongarch/include/asm/setup.h | 2 +- arch/loongarch/kernel/relocate.c | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/loongarch/include/asm/asmmacro.h b/arch/loongarch/include/asm/asmmacro.h index c9544f358c33..655db7d7a427 100644 --- a/arch/loongarch/include/asm/asmmacro.h +++ b/arch/loongarch/include/asm/asmmacro.h @@ -609,8 +609,7 @@ lu32i.d \reg, 0 lu52i.d \reg, \reg, 0 .pushsection ".la_abs", "aw", %progbits - 768: - .dword 768b-766b + .dword 766b .dword \sym .popsection #endif diff --git a/arch/loongarch/include/asm/setup.h b/arch/loongarch/include/asm/setup.h index a0bc159ce8bd..ee52fb1e9963 100644 --- a/arch/loongarch/include/asm/setup.h +++ b/arch/loongarch/include/asm/setup.h @@ -25,7 +25,7 @@ extern void set_merr_handler(unsigned long offset, void *addr, unsigned long len #ifdef CONFIG_RELOCATABLE struct rela_la_abs { - long offset; + long pc; long symvalue; }; diff --git a/arch/loongarch/kernel/relocate.c b/arch/loongarch/kernel/relocate.c index 6c3eff9af9fb..288b739ca88d 100644 --- a/arch/loongarch/kernel/relocate.c +++ b/arch/loongarch/kernel/relocate.c @@ -52,7 +52,7 @@ static inline void __init relocate_absolute(long random_offset) for (p = begin; (void *)p < end; p++) { long v = p->symvalue; uint32_t lu12iw, ori, lu32id, lu52id; - union loongarch_instruction *insn = (void *)p - p->offset; + union loongarch_instruction *insn = (void *)p->pc; lu12iw = (v >> 12) & 0xfffff; ori = v & 0xfff; From ee2daf7102f42007b7bf5dbd1ae76478ee62c512 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 21 Nov 2023 15:03:25 +0800 Subject: [PATCH 115/234] LoongArch: Add __percpu annotation for __percpu_read()/__percpu_write() When build kernel with C=1, we get: arch/loongarch/kernel/process.c:234:46: warning: incorrect type in argument 1 (different address spaces) arch/loongarch/kernel/process.c:234:46: expected void *ptr arch/loongarch/kernel/process.c:234:46: got unsigned long [noderef] __percpu * arch/loongarch/kernel/process.c:234:46: warning: incorrect type in argument 1 (different address spaces) arch/loongarch/kernel/process.c:234:46: expected void *ptr arch/loongarch/kernel/process.c:234:46: got unsigned long [noderef] __percpu * arch/loongarch/kernel/process.c:234:46: warning: incorrect type in argument 1 (different address spaces) arch/loongarch/kernel/process.c:234:46: expected void *ptr arch/loongarch/kernel/process.c:234:46: got unsigned long [noderef] __percpu * arch/loongarch/kernel/process.c:234:46: warning: incorrect type in argument 1 (different address spaces) arch/loongarch/kernel/process.c:234:46: expected void *ptr arch/loongarch/kernel/process.c:234:46: got unsigned long [noderef] __percpu * Add __percpu annotation for __percpu_read()/__percpu_write() can avoid such warnings. __percpu_xchg() and other functions don't need annotation because their wrapper, i.e. _pcp_protect(), already suppresses warnings. Also adjust the indentations in this file. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202311080409.LlOfTR3m-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202311080840.Vc2kXhfp-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202311081340.3k72KKdg-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202311120926.cjYHyoYw-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202311152142.g6UyNx1R-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202311160339.DbhaH8LX-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202311181454.CTPrSYmQ-lkp@intel.com/ Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/percpu.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h index ed5da02b1cf6..9b36ac003f89 100644 --- a/arch/loongarch/include/asm/percpu.h +++ b/arch/loongarch/include/asm/percpu.h @@ -40,13 +40,13 @@ static __always_inline unsigned long __percpu_##op(void *ptr, \ switch (size) { \ case 4: \ __asm__ __volatile__( \ - "am"#asm_op".w" " %[ret], %[val], %[ptr] \n" \ + "am"#asm_op".w" " %[ret], %[val], %[ptr] \n" \ : [ret] "=&r" (ret), [ptr] "+ZB"(*(u32 *)ptr) \ : [val] "r" (val)); \ break; \ case 8: \ __asm__ __volatile__( \ - "am"#asm_op".d" " %[ret], %[val], %[ptr] \n" \ + "am"#asm_op".d" " %[ret], %[val], %[ptr] \n" \ : [ret] "=&r" (ret), [ptr] "+ZB"(*(u64 *)ptr) \ : [val] "r" (val)); \ break; \ @@ -63,7 +63,7 @@ PERCPU_OP(and, and, &) PERCPU_OP(or, or, |) #undef PERCPU_OP -static __always_inline unsigned long __percpu_read(void *ptr, int size) +static __always_inline unsigned long __percpu_read(void __percpu *ptr, int size) { unsigned long ret; @@ -100,7 +100,7 @@ static __always_inline unsigned long __percpu_read(void *ptr, int size) return ret; } -static __always_inline void __percpu_write(void *ptr, unsigned long val, int size) +static __always_inline void __percpu_write(void __percpu *ptr, unsigned long val, int size) { switch (size) { case 1: @@ -132,8 +132,7 @@ static __always_inline void __percpu_write(void *ptr, unsigned long val, int siz } } -static __always_inline unsigned long __percpu_xchg(void *ptr, unsigned long val, - int size) +static __always_inline unsigned long __percpu_xchg(void *ptr, unsigned long val, int size) { switch (size) { case 1: From 902d75cdf0cf0a3fb58550089ee519abf12566f5 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 21 Nov 2023 15:03:25 +0800 Subject: [PATCH 116/234] LoongArch: Silence the boot warning about 'nokaslr' The kernel parameter 'nokaslr' is handled before start_kernel(), so we don't need early_param() to mark it technically. But it can cause a boot warning as follows: Unknown kernel command line parameters "nokaslr", will be passed to user space. When we use 'init=/bin/bash', 'nokaslr' which passed to user space will even cause a kernel panic. So we use early_param() to mark 'nokaslr', simply print a notice and silence the boot warning (also fix a potential panic). This logic is similar to RISC-V. Signed-off-by: Huacai Chen --- arch/loongarch/kernel/relocate.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/loongarch/kernel/relocate.c b/arch/loongarch/kernel/relocate.c index 288b739ca88d..1acfa704c8d0 100644 --- a/arch/loongarch/kernel/relocate.c +++ b/arch/loongarch/kernel/relocate.c @@ -102,6 +102,14 @@ static inline __init unsigned long get_random_boot(void) return hash; } +static int __init nokaslr(char *p) +{ + pr_info("KASLR is disabled.\n"); + + return 0; /* Print a notice and silence the boot warning */ +} +early_param("nokaslr", nokaslr); + static inline __init bool kaslr_disabled(void) { char *str; From 19d86a496233731882aea7ec24505ce6641b1c0c Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 21 Nov 2023 15:03:25 +0800 Subject: [PATCH 117/234] LoongArch: Mark {dmw,tlb}_virt_to_page() exports as non-GPL Mark {dmw,tlb}_virt_to_page() exports as non-GPL, in order to let out-of-tree modules (e.g. OpenZFS) be built without errors. Otherwise we get: ERROR: modpost: GPL-incompatible module zfs.ko uses GPL-only symbol 'dmw_virt_to_page' ERROR: modpost: GPL-incompatible module zfs.ko uses GPL-only symbol 'tlb_virt_to_page' Reported-by: Haowu Ge Signed-off-by: Huacai Chen --- arch/loongarch/mm/pgtable.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c index 71d0539e2d0b..2aae72e63871 100644 --- a/arch/loongarch/mm/pgtable.c +++ b/arch/loongarch/mm/pgtable.c @@ -13,13 +13,13 @@ struct page *dmw_virt_to_page(unsigned long kaddr) { return pfn_to_page(virt_to_pfn(kaddr)); } -EXPORT_SYMBOL_GPL(dmw_virt_to_page); +EXPORT_SYMBOL(dmw_virt_to_page); struct page *tlb_virt_to_page(unsigned long kaddr) { return pfn_to_page(pte_pfn(*virt_to_kpte(kaddr))); } -EXPORT_SYMBOL_GPL(tlb_virt_to_page); +EXPORT_SYMBOL(tlb_virt_to_page); pgd_t *pgd_alloc(struct mm_struct *mm) { From d43f37b73468c172bc89ac4824a1511b411f0778 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Tue, 21 Nov 2023 15:03:25 +0800 Subject: [PATCH 118/234] LoongArch: Implement constant timer shutdown interface When a cpu is hot-unplugged, it is put in idle state and the function arch_cpu_idle_dead() is called. The timer interrupt for this processor should be disabled, otherwise there will be pending timer interrupt for the unplugged cpu, so that vcpu is prevented from giving up scheduling when system is running in vm mode. This patch implements the timer shutdown interface so that the constant timer will be properly disabled when a CPU is hot-unplugged. Reviewed-by: WANG Xuerui Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kernel/time.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index 3064af94db9c..e7015f7b70e3 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -58,21 +58,6 @@ static int constant_set_state_oneshot(struct clock_event_device *evt) return 0; } -static int constant_set_state_oneshot_stopped(struct clock_event_device *evt) -{ - unsigned long timer_config; - - raw_spin_lock(&state_lock); - - timer_config = csr_read64(LOONGARCH_CSR_TCFG); - timer_config &= ~CSR_TCFG_EN; - csr_write64(timer_config, LOONGARCH_CSR_TCFG); - - raw_spin_unlock(&state_lock); - - return 0; -} - static int constant_set_state_periodic(struct clock_event_device *evt) { unsigned long period; @@ -92,6 +77,16 @@ static int constant_set_state_periodic(struct clock_event_device *evt) static int constant_set_state_shutdown(struct clock_event_device *evt) { + unsigned long timer_config; + + raw_spin_lock(&state_lock); + + timer_config = csr_read64(LOONGARCH_CSR_TCFG); + timer_config &= ~CSR_TCFG_EN; + csr_write64(timer_config, LOONGARCH_CSR_TCFG); + + raw_spin_unlock(&state_lock); + return 0; } @@ -161,7 +156,7 @@ int constant_clockevent_init(void) cd->rating = 320; cd->cpumask = cpumask_of(cpu); cd->set_state_oneshot = constant_set_state_oneshot; - cd->set_state_oneshot_stopped = constant_set_state_oneshot_stopped; + cd->set_state_oneshot_stopped = constant_set_state_shutdown; cd->set_state_periodic = constant_set_state_periodic; cd->set_state_shutdown = constant_set_state_shutdown; cd->set_next_event = constant_timer_next_event; From 10301780c96e70b694eca86a3ccfa1893a4a6d3e Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Tue, 21 Nov 2023 15:03:25 +0800 Subject: [PATCH 119/234] Docs/LoongArch: Update links in LoongArch introduction.rst LoongArch-Vol1 has been updated to v1.10, the links in the documentation are out of date, let's update it. Signed-off-by: Yanteng Si Signed-off-by: Huacai Chen --- Documentation/arch/loongarch/introduction.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/arch/loongarch/introduction.rst b/Documentation/arch/loongarch/introduction.rst index 8c568cfc2107..5e6db78abeaf 100644 --- a/Documentation/arch/loongarch/introduction.rst +++ b/Documentation/arch/loongarch/introduction.rst @@ -375,9 +375,9 @@ Developer web site of Loongson and LoongArch (Software and Documentation): Documentation of LoongArch ISA: - https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.02-CN.pdf (in Chinese) + https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.10-CN.pdf (in Chinese) - https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.02-EN.pdf (in English) + https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.10-EN.pdf (in English) Documentation of LoongArch ELF psABI: From c517fd2738f472eb0d1db60a70d91629349a9bf8 Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Tue, 21 Nov 2023 15:03:26 +0800 Subject: [PATCH 120/234] Docs/zh_CN/LoongArch: Update links in LoongArch introduction.rst LoongArch-Vol1 has been updated to v1.10, the links in the documentation are out of date, let's update it. Signed-off-by: Yanteng Si Signed-off-by: Huacai Chen --- .../translations/zh_CN/arch/loongarch/introduction.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/translations/zh_CN/arch/loongarch/introduction.rst b/Documentation/translations/zh_CN/arch/loongarch/introduction.rst index 59d6bf33050c..bf463c5a4c51 100644 --- a/Documentation/translations/zh_CN/arch/loongarch/introduction.rst +++ b/Documentation/translations/zh_CN/arch/loongarch/introduction.rst @@ -338,9 +338,9 @@ Loongson与LoongArch的开发者网站(软件与文档资源): LoongArch指令集架构的文档: - https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.02-CN.pdf (中文版) + https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.10-CN.pdf (中文版) - https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.02-EN.pdf (英文版) + https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.10-EN.pdf (英文版) LoongArch的ELF psABI文档: From 8d9ce3e53bbd2d6241213e9a4deb310499c929ff Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 20 Nov 2023 16:45:45 +0100 Subject: [PATCH 121/234] MAINTAINERS: Drop Mark Gross as maintainer for x86 platform drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mark has not really been active as maintainer for x86 platform drivers lately, drop Mark from the MAINTAINERS entries for drivers/platform/x86, drivers/platform/mellanox and drivers/platform/surface. Cc: Mark Gross Signed-off-by: Hans de Goede Acked-by: Andy Shevchenko Link: https://lore.kernel.org/r/20231120154548.611041-1-hdegoede@redhat.com Signed-off-by: Ilpo Järvinen --- MAINTAINERS | 3 --- 1 file changed, 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 0666ddb65552..e972ce5f54e7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13778,7 +13778,6 @@ F: drivers/net/ethernet/mellanox/mlxfw/ MELLANOX HARDWARE PLATFORM SUPPORT M: Hans de Goede M: Ilpo Järvinen -M: Mark Gross M: Vadim Pasternak L: platform-driver-x86@vger.kernel.org S: Supported @@ -14387,7 +14386,6 @@ F: drivers/platform/surface/surface_gpe.c MICROSOFT SURFACE HARDWARE PLATFORM SUPPORT M: Hans de Goede M: Ilpo Järvinen -M: Mark Gross M: Maximilian Luz L: platform-driver-x86@vger.kernel.org S: Maintained @@ -23653,7 +23651,6 @@ F: drivers/platform/x86/x86-android-tablets/ X86 PLATFORM DRIVERS M: Hans de Goede M: Ilpo Järvinen -M: Mark Gross L: platform-driver-x86@vger.kernel.org S: Maintained Q: https://patchwork.kernel.org/project/platform-driver-x86/list/ From a6584711e64d9d12ab79a450ec3628fd35e4f476 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 20 Nov 2023 17:07:56 +0200 Subject: [PATCH 122/234] platform/x86: intel_telemetry: Fix kernel doc descriptions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LKP found issues with a kernel doc in the driver: core.c:116: warning: Function parameter or member 'ioss_evtconfig' not described in 'telemetry_update_events' core.c:188: warning: Function parameter or member 'ioss_evtconfig' not described in 'telemetry_get_eventconfig' It looks like it were copy'n'paste typos when these descriptions had been introduced. Fix the typos. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202310070743.WALmRGSY-lkp@intel.com/ Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20231120150756.1661425-1-andriy.shevchenko@linux.intel.com Reviewed-by: Rajneesh Bhardwaj Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/telemetry/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/intel/telemetry/core.c b/drivers/platform/x86/intel/telemetry/core.c index fdf55b5d6948..e4be40f73eeb 100644 --- a/drivers/platform/x86/intel/telemetry/core.c +++ b/drivers/platform/x86/intel/telemetry/core.c @@ -102,7 +102,7 @@ static const struct telemetry_core_ops telm_defpltops = { /** * telemetry_update_events() - Update telemetry Configuration * @pss_evtconfig: PSS related config. No change if num_evts = 0. - * @pss_evtconfig: IOSS related config. No change if num_evts = 0. + * @ioss_evtconfig: IOSS related config. No change if num_evts = 0. * * This API updates the IOSS & PSS Telemetry configuration. Old config * is overwritten. Call telemetry_reset_events when logging is over @@ -176,7 +176,7 @@ EXPORT_SYMBOL_GPL(telemetry_reset_events); /** * telemetry_get_eventconfig() - Returns the pss and ioss events enabled * @pss_evtconfig: Pointer to PSS related configuration. - * @pss_evtconfig: Pointer to IOSS related configuration. + * @ioss_evtconfig: Pointer to IOSS related configuration. * @pss_len: Number of u32 elements allocated for pss_evtconfig array * @ioss_len: Number of u32 elements allocated for ioss_evtconfig array * From 3f7c0634926daf48cd2f6db6c1197a1047074088 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Wed, 15 Nov 2023 12:10:04 +0100 Subject: [PATCH 123/234] accel/ivpu/37xx: Fix hangs related to MMIO reset There is no need to call MMIO reset using VPU_37XX_BUTTRESS_VPU_IP_RESET register. IP will be reset by FLR or by entering d0i3. Also IP reset during power_up is not needed as the VPU is already in reset. Removing MMIO reset improves stability as it a partial device reset that is not safe in some corner cases. This change also brings back ivpu_boot_pwr_domain_disable() that helps to properly power down VPU when it is hung by a buggy workload. Signed-off-by: Jacek Lawrynowicz Fixes: 828d63042aec ("accel/ivpu: Don't enter d0i3 during FLR") Reviewed-by: Jeffrey Hugo Link: https://patchwork.freedesktop.org/patch/msgid/20231115111004.1304092-1-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_hw_37xx.c | 46 +++++++++++++++---------------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c index 5c0246b9e522..4ccf1994b97a 100644 --- a/drivers/accel/ivpu/ivpu_hw_37xx.c +++ b/drivers/accel/ivpu/ivpu_hw_37xx.c @@ -502,6 +502,16 @@ static int ivpu_boot_pwr_domain_enable(struct ivpu_device *vdev) return ret; } +static int ivpu_boot_pwr_domain_disable(struct ivpu_device *vdev) +{ + ivpu_boot_dpu_active_drive(vdev, false); + ivpu_boot_pwr_island_isolation_drive(vdev, true); + ivpu_boot_pwr_island_trickle_drive(vdev, false); + ivpu_boot_pwr_island_drive(vdev, false); + + return ivpu_boot_wait_for_pwr_island_status(vdev, 0x0); +} + static void ivpu_boot_no_snoop_enable(struct ivpu_device *vdev) { u32 val = REGV_RD32(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES); @@ -600,25 +610,17 @@ static int ivpu_hw_37xx_info_init(struct ivpu_device *vdev) static int ivpu_hw_37xx_reset(struct ivpu_device *vdev) { - int ret; - u32 val; + int ret = 0; - if (IVPU_WA(punit_disabled)) - return 0; - - ret = REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US); - if (ret) { - ivpu_err(vdev, "Timed out waiting for TRIGGER bit\n"); - return ret; + if (ivpu_boot_pwr_domain_disable(vdev)) { + ivpu_err(vdev, "Failed to disable power domain\n"); + ret = -EIO; } - val = REGB_RD32(VPU_37XX_BUTTRESS_VPU_IP_RESET); - val = REG_SET_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, val); - REGB_WR32(VPU_37XX_BUTTRESS_VPU_IP_RESET, val); - - ret = REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US); - if (ret) - ivpu_err(vdev, "Timed out waiting for RESET completion\n"); + if (ivpu_pll_disable(vdev)) { + ivpu_err(vdev, "Failed to disable PLL\n"); + ret = -EIO; + } return ret; } @@ -651,10 +653,6 @@ static int ivpu_hw_37xx_power_up(struct ivpu_device *vdev) { int ret; - ret = ivpu_hw_37xx_reset(vdev); - if (ret) - ivpu_warn(vdev, "Failed to reset HW: %d\n", ret); - ret = ivpu_hw_37xx_d0i3_disable(vdev); if (ret) ivpu_warn(vdev, "Failed to disable D0I3: %d\n", ret); @@ -722,11 +720,11 @@ static int ivpu_hw_37xx_power_down(struct ivpu_device *vdev) { int ret = 0; - if (!ivpu_hw_37xx_is_idle(vdev) && ivpu_hw_37xx_reset(vdev)) - ivpu_err(vdev, "Failed to reset the VPU\n"); + if (!ivpu_hw_37xx_is_idle(vdev)) + ivpu_warn(vdev, "VPU not idle during power down\n"); - if (ivpu_pll_disable(vdev)) { - ivpu_err(vdev, "Failed to disable PLL\n"); + if (ivpu_hw_37xx_reset(vdev)) { + ivpu_err(vdev, "Failed to reset VPU\n"); ret = -EIO; } From f2d4a5834638bbc967371b9168c0b481519f7c5e Mon Sep 17 00:00:00 2001 From: Hamish Martin Date: Wed, 25 Oct 2023 16:55:10 +1300 Subject: [PATCH 124/234] HID: mcp2221: Set driver data before I2C adapter add The process of adding an I2C adapter can invoke I2C accesses on that new adapter (see i2c_detect()). Ensure we have set the adapter's driver data to avoid null pointer dereferences in the xfer functions during the adapter add. This has been noted in the past and the same fix proposed but not completed. See: https://lore.kernel.org/lkml/ef597e73-ed71-168e-52af-0d19b03734ac@vigem.de/ Signed-off-by: Hamish Martin Signed-off-by: Jiri Kosina --- drivers/hid/hid-mcp2221.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-mcp2221.c b/drivers/hid/hid-mcp2221.c index 72883e0ce757..b95f31cf0fa2 100644 --- a/drivers/hid/hid-mcp2221.c +++ b/drivers/hid/hid-mcp2221.c @@ -1157,12 +1157,12 @@ static int mcp2221_probe(struct hid_device *hdev, snprintf(mcp->adapter.name, sizeof(mcp->adapter.name), "MCP2221 usb-i2c bridge"); + i2c_set_adapdata(&mcp->adapter, mcp); ret = devm_i2c_add_adapter(&hdev->dev, &mcp->adapter); if (ret) { hid_err(hdev, "can't add usb-i2c adapter: %d\n", ret); return ret; } - i2c_set_adapdata(&mcp->adapter, mcp); #if IS_REACHABLE(CONFIG_GPIOLIB) /* Setup GPIO chip */ From 73ce9f1f2741a38f5d27393e627702ae2c46e6f2 Mon Sep 17 00:00:00 2001 From: Hamish Martin Date: Wed, 25 Oct 2023 16:55:11 +1300 Subject: [PATCH 125/234] HID: mcp2221: Allow IO to start during probe During the probe we add an I2C adapter and as soon as we add that adapter it may be used for a transfer (e.g via the code in i2cdetect()). Those transfers are not able to complete and time out. This is because the HID raw_event callback (mcp2221_raw_event) will not be invoked until the HID device's 'driver_input_lock' is marked up at the completion of the probe in hid_device_probe(). This starves the driver of the responses it is waiting for. In order to allow the I2C transfers to complete while we are still in the probe, start the IO once we have completed init of the HID device. This issue seems to have been seen before and a patch was submitted but it seems it was never accepted. See: https://lore.kernel.org/all/20221103222714.21566-3-Enrik.Berkhan@inka.de/ Signed-off-by: Hamish Martin Signed-off-by: Jiri Kosina --- drivers/hid/hid-mcp2221.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-mcp2221.c b/drivers/hid/hid-mcp2221.c index b95f31cf0fa2..aef0785c91cc 100644 --- a/drivers/hid/hid-mcp2221.c +++ b/drivers/hid/hid-mcp2221.c @@ -1142,6 +1142,8 @@ static int mcp2221_probe(struct hid_device *hdev, if (ret) return ret; + hid_device_io_start(hdev); + /* Set I2C bus clock diviser */ if (i2c_clk_freq > 400) i2c_clk_freq = 400; From 113f736655e4f20633e107d731dd5bd097d5938c Mon Sep 17 00:00:00 2001 From: Yihong Cao Date: Mon, 30 Oct 2023 01:05:38 +0800 Subject: [PATCH 126/234] HID: apple: add Jamesdonkey and A3R to non-apple keyboards list Jamesdonkey A3R keyboard is identified as "Jamesdonkey A3R" in wired mode, "A3R-U" in wireless mode and "A3R" in bluetooth mode. Adding them to non-apple keyboards fixes function key. Signed-off-by: Yihong Cao Signed-off-by: Jiri Kosina --- drivers/hid/hid-apple.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index 3ca45975c686..d9e9829b2200 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -345,6 +345,8 @@ static const struct apple_non_apple_keyboard non_apple_keyboards[] = { { "AONE" }, { "GANSS" }, { "Hailuck" }, + { "Jamesdonkey" }, + { "A3R" }, }; static bool apple_is_non_apple_keyboard(struct hid_device *hdev) From fc43e9c857b7aa55efba9398419b14d9e35dcc7d Mon Sep 17 00:00:00 2001 From: Charles Yi Date: Tue, 31 Oct 2023 12:32:39 +0800 Subject: [PATCH 127/234] HID: fix HID device resource race between HID core and debugging support hid_debug_events_release releases resources bound to the HID device instance. hid_device_release releases the underlying HID device instance potentially before hid_debug_events_release has completed releasing debug resources bound to the same HID device instance. Reference count to prevent the HID device instance from being torn down preemptively when HID debugging support is used. When count reaches zero, release core resources of HID device instance using hiddev_free. The crash: [ 120.728477][ T4396] kernel BUG at lib/list_debug.c:53! [ 120.728505][ T4396] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP [ 120.739806][ T4396] Modules linked in: bcmdhd dhd_static_buf 8822cu pcie_mhi r8168 [ 120.747386][ T4396] CPU: 1 PID: 4396 Comm: hidt_bridge Not tainted 5.10.110 #257 [ 120.754771][ T4396] Hardware name: Rockchip RK3588 EVB4 LP4 V10 Board (DT) [ 120.761643][ T4396] pstate: 60400089 (nZCv daIf +PAN -UAO -TCO BTYPE=--) [ 120.768338][ T4396] pc : __list_del_entry_valid+0x98/0xac [ 120.773730][ T4396] lr : __list_del_entry_valid+0x98/0xac [ 120.779120][ T4396] sp : ffffffc01e62bb60 [ 120.783126][ T4396] x29: ffffffc01e62bb60 x28: ffffff818ce3a200 [ 120.789126][ T4396] x27: 0000000000000009 x26: 0000000000980000 [ 120.795126][ T4396] x25: ffffffc012431000 x24: ffffff802c6d4e00 [ 120.801125][ T4396] x23: ffffff8005c66f00 x22: ffffffc01183b5b8 [ 120.807125][ T4396] x21: ffffff819df2f100 x20: 0000000000000000 [ 120.813124][ T4396] x19: ffffff802c3f0700 x18: ffffffc01d2cd058 [ 120.819124][ T4396] x17: 0000000000000000 x16: 0000000000000000 [ 120.825124][ T4396] x15: 0000000000000004 x14: 0000000000003fff [ 120.831123][ T4396] x13: ffffffc012085588 x12: 0000000000000003 [ 120.837123][ T4396] x11: 00000000ffffbfff x10: 0000000000000003 [ 120.843123][ T4396] x9 : 455103d46b329300 x8 : 455103d46b329300 [ 120.849124][ T4396] x7 : 74707572726f6320 x6 : ffffffc0124b8cb5 [ 120.855124][ T4396] x5 : ffffffffffffffff x4 : 0000000000000000 [ 120.861123][ T4396] x3 : ffffffc011cf4f90 x2 : ffffff81fee7b948 [ 120.867122][ T4396] x1 : ffffffc011cf4f90 x0 : 0000000000000054 [ 120.873122][ T4396] Call trace: [ 120.876259][ T4396] __list_del_entry_valid+0x98/0xac [ 120.881304][ T4396] hid_debug_events_release+0x48/0x12c [ 120.886617][ T4396] full_proxy_release+0x50/0xbc [ 120.891323][ T4396] __fput+0xdc/0x238 [ 120.895075][ T4396] ____fput+0x14/0x24 [ 120.898911][ T4396] task_work_run+0x90/0x148 [ 120.903268][ T4396] do_exit+0x1bc/0x8a4 [ 120.907193][ T4396] do_group_exit+0x8c/0xa4 [ 120.911458][ T4396] get_signal+0x468/0x744 [ 120.915643][ T4396] do_signal+0x84/0x280 [ 120.919650][ T4396] do_notify_resume+0xd0/0x218 [ 120.924262][ T4396] work_pending+0xc/0x3f0 [ Rahul Rameshbabu : rework changelog ] Fixes: cd667ce24796 ("HID: use debugfs for events/reports dumping") Signed-off-by: Charles Yi Signed-off-by: Jiri Kosina --- drivers/hid/hid-core.c | 12 ++++++++++-- drivers/hid/hid-debug.c | 3 +++ include/linux/hid.h | 3 +++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 8992e3c1e769..e0181218ad85 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -702,15 +702,22 @@ static void hid_close_report(struct hid_device *device) * Free a device structure, all reports, and all fields. */ -static void hid_device_release(struct device *dev) +void hiddev_free(struct kref *ref) { - struct hid_device *hid = to_hid_device(dev); + struct hid_device *hid = container_of(ref, struct hid_device, ref); hid_close_report(hid); kfree(hid->dev_rdesc); kfree(hid); } +static void hid_device_release(struct device *dev) +{ + struct hid_device *hid = to_hid_device(dev); + + kref_put(&hid->ref, hiddev_free); +} + /* * Fetch a report description item from the data stream. We support long * items, though they are not used yet. @@ -2846,6 +2853,7 @@ struct hid_device *hid_allocate_device(void) spin_lock_init(&hdev->debug_list_lock); sema_init(&hdev->driver_input_lock, 1); mutex_init(&hdev->ll_open_lock); + kref_init(&hdev->ref); hid_bpf_device_init(hdev); diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index e7ef1ea107c9..7dd83ec74f8a 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -1135,6 +1135,7 @@ static int hid_debug_events_open(struct inode *inode, struct file *file) goto out; } list->hdev = (struct hid_device *) inode->i_private; + kref_get(&list->hdev->ref); file->private_data = list; mutex_init(&list->read_mutex); @@ -1227,6 +1228,8 @@ static int hid_debug_events_release(struct inode *inode, struct file *file) list_del(&list->node); spin_unlock_irqrestore(&list->hdev->debug_list_lock, flags); kfifo_free(&list->hid_debug_fifo); + + kref_put(&list->hdev->ref, hiddev_free); kfree(list); return 0; diff --git a/include/linux/hid.h b/include/linux/hid.h index 5a8387a4a712..bf43f3ff6664 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -679,6 +679,7 @@ struct hid_device { /* device report descriptor */ struct list_head debug_list; spinlock_t debug_list_lock; wait_queue_head_t debug_wait; + struct kref ref; unsigned int id; /* system unique id */ @@ -687,6 +688,8 @@ struct hid_device { /* device report descriptor */ #endif /* CONFIG_BPF */ }; +void hiddev_free(struct kref *ref); + #define to_hid_device(pdev) \ container_of(pdev, struct hid_device, dev) From a5e913c25b6b2b6ae02acef6d9400645ac03dfdf Mon Sep 17 00:00:00 2001 From: Brett Raye Date: Thu, 2 Nov 2023 18:10:38 -0700 Subject: [PATCH 128/234] HID: glorious: fix Glorious Model I HID report The Glorious Model I mouse has a buggy HID report descriptor for its keyboard endpoint (used for programmable buttons). For report ID 2, there is a mismatch between Logical Minimum and Usage Minimum in the array that reports keycodes. The offending portion of the descriptor: (from hid-decode) 0x95, 0x05, // Report Count (5) 30 0x75, 0x08, // Report Size (8) 32 0x15, 0x00, // Logical Minimum (0) 34 0x25, 0x65, // Logical Maximum (101) 36 0x05, 0x07, // Usage Page (Keyboard) 38 0x19, 0x01, // Usage Minimum (1) 40 0x29, 0x65, // Usage Maximum (101) 42 0x81, 0x00, // Input (Data,Arr,Abs) 44 This bug shifts all programmed keycodes up by 1. Importantly, this causes "empty" array indexes of 0x00 to be interpreted as 0x01, ErrorRollOver. The presence of ErrorRollOver causes the system to ignore all keypresses from the endpoint and breaks the ability to use the programmable buttons. Setting byte 41 to 0x00 fixes this, and causes keycodes to be interpreted correctly. Also, USB_VENDOR_ID_GLORIOUS is changed to USB_VENDOR_ID_SINOWEALTH, and a new ID for Laview Technology is added. Glorious seems to be white-labeling controller boards or mice from these vendors. There isn't a single canonical vendor ID for Glorious products. Signed-off-by: Brett Raye Signed-off-by: Jiri Kosina --- drivers/hid/hid-glorious.c | 16 ++++++++++++++-- drivers/hid/hid-ids.h | 11 +++++++---- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/drivers/hid/hid-glorious.c b/drivers/hid/hid-glorious.c index 558eb08c19ef..281b3a7187ce 100644 --- a/drivers/hid/hid-glorious.c +++ b/drivers/hid/hid-glorious.c @@ -21,6 +21,10 @@ MODULE_DESCRIPTION("HID driver for Glorious PC Gaming Race mice"); * Glorious Model O and O- specify the const flag in the consumer input * report descriptor, which leads to inputs being ignored. Fix this * by patching the descriptor. + * + * Glorious Model I incorrectly specifes the Usage Minimum for its + * keyboard HID report, causing keycodes to be misinterpreted. + * Fix this by setting Usage Minimum to 0 in that report. */ static __u8 *glorious_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) @@ -32,6 +36,10 @@ static __u8 *glorious_report_fixup(struct hid_device *hdev, __u8 *rdesc, rdesc[85] = rdesc[113] = rdesc[141] = \ HID_MAIN_ITEM_VARIABLE | HID_MAIN_ITEM_RELATIVE; } + if (*rsize == 156 && rdesc[41] == 1) { + hid_info(hdev, "patching Glorious Model I keyboard report descriptor\n"); + rdesc[41] = 0; + } return rdesc; } @@ -44,6 +52,8 @@ static void glorious_update_name(struct hid_device *hdev) model = "Model O"; break; case USB_DEVICE_ID_GLORIOUS_MODEL_D: model = "Model D"; break; + case USB_DEVICE_ID_GLORIOUS_MODEL_I: + model = "Model I"; break; } snprintf(hdev->name, sizeof(hdev->name), "%s %s", "Glorious", model); @@ -66,10 +76,12 @@ static int glorious_probe(struct hid_device *hdev, } static const struct hid_device_id glorious_devices[] = { - { HID_USB_DEVICE(USB_VENDOR_ID_GLORIOUS, + { HID_USB_DEVICE(USB_VENDOR_ID_SINOWEALTH, USB_DEVICE_ID_GLORIOUS_MODEL_O) }, - { HID_USB_DEVICE(USB_VENDOR_ID_GLORIOUS, + { HID_USB_DEVICE(USB_VENDOR_ID_SINOWEALTH, USB_DEVICE_ID_GLORIOUS_MODEL_D) }, + { HID_USB_DEVICE(USB_VENDOR_ID_LAVIEW, + USB_DEVICE_ID_GLORIOUS_MODEL_I) }, { } }; MODULE_DEVICE_TABLE(hid, glorious_devices); diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index f7973ccd84a2..125c73a20051 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -511,10 +511,6 @@ #define USB_DEVICE_ID_GENERAL_TOUCH_WIN8_PIT_010A 0x010a #define USB_DEVICE_ID_GENERAL_TOUCH_WIN8_PIT_E100 0xe100 -#define USB_VENDOR_ID_GLORIOUS 0x258a -#define USB_DEVICE_ID_GLORIOUS_MODEL_D 0x0033 -#define USB_DEVICE_ID_GLORIOUS_MODEL_O 0x0036 - #define I2C_VENDOR_ID_GOODIX 0x27c6 #define I2C_DEVICE_ID_GOODIX_01F0 0x01f0 @@ -745,6 +741,9 @@ #define USB_VENDOR_ID_LABTEC 0x1020 #define USB_DEVICE_ID_LABTEC_WIRELESS_KEYBOARD 0x0006 +#define USB_VENDOR_ID_LAVIEW 0x22D4 +#define USB_DEVICE_ID_GLORIOUS_MODEL_I 0x1503 + #define USB_VENDOR_ID_LCPOWER 0x1241 #define USB_DEVICE_ID_LCPOWER_LC1000 0xf767 @@ -1160,6 +1159,10 @@ #define USB_VENDOR_ID_SIGMATEL 0x066F #define USB_DEVICE_ID_SIGMATEL_STMP3780 0x3780 +#define USB_VENDOR_ID_SINOWEALTH 0x258a +#define USB_DEVICE_ID_GLORIOUS_MODEL_D 0x0033 +#define USB_DEVICE_ID_GLORIOUS_MODEL_O 0x0036 + #define USB_VENDOR_ID_SIS_TOUCH 0x0457 #define USB_DEVICE_ID_SIS9200_TOUCH 0x9200 #define USB_DEVICE_ID_SIS817_TOUCH 0x0817 From 8d6ef26501b97243ee6c16b8187c5b38cb69b77d Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Thu, 16 Nov 2023 14:02:12 +0100 Subject: [PATCH 129/234] drm/ast: Disconnect BMC if physical connector is connected Many user-space compositors fail with mode setting if a CRTC has more than one connected connector. This is the case with the BMC on Aspeed systems. Work around this problem by setting the BMC's connector status to disconnected when the physical connector has a display attached. This way compositors will only see one connected connector at a time; either the physical one or the BMC. Suggested-by: Jocelyn Falempe Fixes: e329cb53b45d ("drm/ast: Add BMC virtual connector") Signed-off-by: Thomas Zimmermann Cc: # v6.6+ Reviewed-by: Jocelyn Falempe Link: https://patchwork.freedesktop.org/patch/msgid/20231116130217.22931-1-tzimmermann@suse.de --- drivers/gpu/drm/ast/ast_drv.h | 13 ++++++- drivers/gpu/drm/ast/ast_mode.c | 62 ++++++++++++++++++++++++++++++---- 2 files changed, 67 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h index 2aee32344f4a..772f3b049c16 100644 --- a/drivers/gpu/drm/ast/ast_drv.h +++ b/drivers/gpu/drm/ast/ast_drv.h @@ -174,6 +174,17 @@ to_ast_sil164_connector(struct drm_connector *connector) return container_of(connector, struct ast_sil164_connector, base); } +struct ast_bmc_connector { + struct drm_connector base; + struct drm_connector *physical_connector; +}; + +static inline struct ast_bmc_connector * +to_ast_bmc_connector(struct drm_connector *connector) +{ + return container_of(connector, struct ast_bmc_connector, base); +} + /* * Device */ @@ -218,7 +229,7 @@ struct ast_device { } astdp; struct { struct drm_encoder encoder; - struct drm_connector connector; + struct ast_bmc_connector bmc_connector; } bmc; } output; diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index cb9614984285..c20534d0ef7c 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -1767,6 +1767,30 @@ static const struct drm_encoder_funcs ast_bmc_encoder_funcs = { .destroy = drm_encoder_cleanup, }; +static int ast_bmc_connector_helper_detect_ctx(struct drm_connector *connector, + struct drm_modeset_acquire_ctx *ctx, + bool force) +{ + struct ast_bmc_connector *bmc_connector = to_ast_bmc_connector(connector); + struct drm_connector *physical_connector = bmc_connector->physical_connector; + + /* + * Most user-space compositors cannot handle more than one connected + * connector per CRTC. Hence, we only mark the BMC as connected if the + * physical connector is disconnected. If the physical connector's status + * is connected or unknown, the BMC remains disconnected. This has no + * effect on the output of the BMC. + * + * FIXME: Remove this logic once user-space compositors can handle more + * than one connector per CRTC. The BMC should always be connected. + */ + + if (physical_connector && physical_connector->status == connector_status_disconnected) + return connector_status_connected; + + return connector_status_disconnected; +} + static int ast_bmc_connector_helper_get_modes(struct drm_connector *connector) { return drm_add_modes_noedid(connector, 4096, 4096); @@ -1774,6 +1798,7 @@ static int ast_bmc_connector_helper_get_modes(struct drm_connector *connector) static const struct drm_connector_helper_funcs ast_bmc_connector_helper_funcs = { .get_modes = ast_bmc_connector_helper_get_modes, + .detect_ctx = ast_bmc_connector_helper_detect_ctx, }; static const struct drm_connector_funcs ast_bmc_connector_funcs = { @@ -1784,12 +1809,33 @@ static const struct drm_connector_funcs ast_bmc_connector_funcs = { .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, }; -static int ast_bmc_output_init(struct ast_device *ast) +static int ast_bmc_connector_init(struct drm_device *dev, + struct ast_bmc_connector *bmc_connector, + struct drm_connector *physical_connector) +{ + struct drm_connector *connector = &bmc_connector->base; + int ret; + + ret = drm_connector_init(dev, connector, &ast_bmc_connector_funcs, + DRM_MODE_CONNECTOR_VIRTUAL); + if (ret) + return ret; + + drm_connector_helper_add(connector, &ast_bmc_connector_helper_funcs); + + bmc_connector->physical_connector = physical_connector; + + return 0; +} + +static int ast_bmc_output_init(struct ast_device *ast, + struct drm_connector *physical_connector) { struct drm_device *dev = &ast->base; struct drm_crtc *crtc = &ast->crtc; struct drm_encoder *encoder = &ast->output.bmc.encoder; - struct drm_connector *connector = &ast->output.bmc.connector; + struct ast_bmc_connector *bmc_connector = &ast->output.bmc.bmc_connector; + struct drm_connector *connector = &bmc_connector->base; int ret; ret = drm_encoder_init(dev, encoder, @@ -1799,13 +1845,10 @@ static int ast_bmc_output_init(struct ast_device *ast) return ret; encoder->possible_crtcs = drm_crtc_mask(crtc); - ret = drm_connector_init(dev, connector, &ast_bmc_connector_funcs, - DRM_MODE_CONNECTOR_VIRTUAL); + ret = ast_bmc_connector_init(dev, bmc_connector, physical_connector); if (ret) return ret; - drm_connector_helper_add(connector, &ast_bmc_connector_helper_funcs); - ret = drm_connector_attach_encoder(connector, encoder); if (ret) return ret; @@ -1864,6 +1907,7 @@ static const struct drm_mode_config_funcs ast_mode_config_funcs = { int ast_mode_config_init(struct ast_device *ast) { struct drm_device *dev = &ast->base; + struct drm_connector *physical_connector = NULL; int ret; ret = drmm_mode_config_init(dev); @@ -1904,23 +1948,27 @@ int ast_mode_config_init(struct ast_device *ast) ret = ast_vga_output_init(ast); if (ret) return ret; + physical_connector = &ast->output.vga.vga_connector.base; } if (ast->tx_chip_types & AST_TX_SIL164_BIT) { ret = ast_sil164_output_init(ast); if (ret) return ret; + physical_connector = &ast->output.sil164.sil164_connector.base; } if (ast->tx_chip_types & AST_TX_DP501_BIT) { ret = ast_dp501_output_init(ast); if (ret) return ret; + physical_connector = &ast->output.dp501.connector; } if (ast->tx_chip_types & AST_TX_ASTDP_BIT) { ret = ast_astdp_output_init(ast); if (ret) return ret; + physical_connector = &ast->output.astdp.connector; } - ret = ast_bmc_output_init(ast); + ret = ast_bmc_output_init(ast, physical_connector); if (ret) return ret; From c55092187d9ad7b2f8f5a8645286fa03997d442f Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 14 Nov 2023 15:54:30 +0100 Subject: [PATCH 130/234] HID: add ALWAYS_POLL quirk for Apple kb These devices disconnect if suspended without remote wakeup. They can operate with the standard driver. Signed-off-by: Oliver Neukum Signed-off-by: Jiri Kosina --- drivers/hid/hid-quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 5a48fcaa32f0..ea472923fab0 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -33,6 +33,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_AKAI, USB_DEVICE_ID_AKAI_MPKMINI2), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_ALPS, USB_DEVICE_ID_IBM_GAMEPAD), HID_QUIRK_BADPAD }, { HID_USB_DEVICE(USB_VENDOR_ID_AMI, USB_DEVICE_ID_AMI_VIRT_KEYBOARD_AND_MOUSE), HID_QUIRK_ALWAYS_POLL }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_2PORTKVM), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_4PORTKVMC), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_4PORTKVM), HID_QUIRK_NOGET }, From c0e2926266af3b5acf28df0a8fc6e4d90effe0bb Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Sun, 19 Nov 2023 22:17:59 +0800 Subject: [PATCH 131/234] ipv4: Correct/silence an endian warning in __ip_do_redirect net/ipv4/route.c:783:46: warning: incorrect type in argument 2 (different base types) net/ipv4/route.c:783:46: expected unsigned int [usertype] key net/ipv4/route.c:783:46: got restricted __be32 [usertype] new_gw Fixes: 969447f226b4 ("ipv4: use new_gw for redirect neigh lookup") Suggested-by: Eric Dumazet Signed-off-by: Kunwu Chan Link: https://lore.kernel.org/r/20231119141759.420477-1-chentao@kylinos.cn Signed-off-by: Paolo Abeni --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 3290a4442b4a..16615d107cf0 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -780,7 +780,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow goto reject_redirect; } - n = __ipv4_neigh_lookup(rt->dst.dev, new_gw); + n = __ipv4_neigh_lookup(rt->dst.dev, (__force u32)new_gw); if (!n) n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev); if (!IS_ERR(n)) { From d30fb712e52964f2cf9a9c14cf67078394044837 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Sun, 19 Nov 2023 08:23:41 -0800 Subject: [PATCH 132/234] hv_netvsc: fix race of netvsc and VF register_netdevice The rtnl lock also needs to be held before rndis_filter_device_add() which advertises nvsp_2_vsc_capability / sriov bit, and triggers VF NIC offering and registering. If VF NIC finished register_netdev() earlier it may cause name based config failure. To fix this issue, move the call to rtnl_lock() before rndis_filter_device_add(), so VF will be registered later than netvsc / synthetic NIC, and gets a name numbered (ethX) after netvsc. Cc: stable@vger.kernel.org Fixes: e04e7a7bbd4b ("hv_netvsc: Fix a deadlock by getting rtnl lock earlier in netvsc_probe()") Reported-by: Dexuan Cui Signed-off-by: Haiyang Zhang Reviewed-by: Wojciech Drewek Reviewed-by: Simon Horman Reviewed-by: Dexuan Cui Signed-off-by: Paolo Abeni --- drivers/net/hyperv/netvsc_drv.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 3ba3c8fb28a5..5e528a76f5f5 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2531,6 +2531,21 @@ static int netvsc_probe(struct hv_device *dev, goto devinfo_failed; } + /* We must get rtnl lock before scheduling nvdev->subchan_work, + * otherwise netvsc_subchan_work() can get rtnl lock first and wait + * all subchannels to show up, but that may not happen because + * netvsc_probe() can't get rtnl lock and as a result vmbus_onoffer() + * -> ... -> device_add() -> ... -> __device_attach() can't get + * the device lock, so all the subchannels can't be processed -- + * finally netvsc_subchan_work() hangs forever. + * + * The rtnl lock also needs to be held before rndis_filter_device_add() + * which advertises nvsp_2_vsc_capability / sriov bit, and triggers + * VF NIC offering and registering. If VF NIC finished register_netdev() + * earlier it may cause name based config failure. + */ + rtnl_lock(); + nvdev = rndis_filter_device_add(dev, device_info); if (IS_ERR(nvdev)) { ret = PTR_ERR(nvdev); @@ -2540,16 +2555,6 @@ static int netvsc_probe(struct hv_device *dev, eth_hw_addr_set(net, device_info->mac_adr); - /* We must get rtnl lock before scheduling nvdev->subchan_work, - * otherwise netvsc_subchan_work() can get rtnl lock first and wait - * all subchannels to show up, but that may not happen because - * netvsc_probe() can't get rtnl lock and as a result vmbus_onoffer() - * -> ... -> device_add() -> ... -> __device_attach() can't get - * the device lock, so all the subchannels can't be processed -- - * finally netvsc_subchan_work() hangs forever. - */ - rtnl_lock(); - if (nvdev->num_chn > 1) schedule_work(&nvdev->subchan_work); @@ -2586,9 +2591,9 @@ static int netvsc_probe(struct hv_device *dev, return 0; register_failed: - rtnl_unlock(); rndis_filter_device_remove(dev, nvdev); rndis_failed: + rtnl_unlock(); netvsc_devinfo_put(device_info); devinfo_failed: free_percpu(net_device_ctx->vf_stats); From 85520856466ed6bc3b1ccb013cddac70ceb437db Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Sun, 19 Nov 2023 08:23:42 -0800 Subject: [PATCH 133/234] hv_netvsc: Fix race of register_netdevice_notifier and VF register If VF NIC is registered earlier, NETDEV_REGISTER event is replayed, but NETDEV_POST_INIT is not. Move register_netdevice_notifier() earlier, so the call back function is set before probing. Cc: stable@vger.kernel.org Fixes: e04e7a7bbd4b ("hv_netvsc: Fix a deadlock by getting rtnl lock earlier in netvsc_probe()") Reported-by: Dexuan Cui Signed-off-by: Haiyang Zhang Reviewed-by: Wojciech Drewek Reviewed-by: Dexuan Cui Signed-off-by: Paolo Abeni --- drivers/net/hyperv/netvsc_drv.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 5e528a76f5f5..b7dfd51f09e6 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2793,12 +2793,17 @@ static int __init netvsc_drv_init(void) } netvsc_ring_bytes = ring_size * PAGE_SIZE; + register_netdevice_notifier(&netvsc_netdev_notifier); + ret = vmbus_driver_register(&netvsc_drv); if (ret) - return ret; + goto err_vmbus_reg; - register_netdevice_notifier(&netvsc_netdev_notifier); return 0; + +err_vmbus_reg: + unregister_netdevice_notifier(&netvsc_netdev_notifier); + return ret; } MODULE_LICENSE("GPL"); From c807d6cd089d2f4951baa838081ec5ae3e2360f8 Mon Sep 17 00:00:00 2001 From: Long Li Date: Sun, 19 Nov 2023 08:23:43 -0800 Subject: [PATCH 134/234] hv_netvsc: Mark VF as slave before exposing it to user-mode When a VF is being exposed form the kernel, it should be marked as "slave" before exposing to the user-mode. The VF is not usable without netvsc running as master. The user-mode should never see a VF without the "slave" flag. This commit moves the code of setting the slave flag to the time before VF is exposed to user-mode. Cc: stable@vger.kernel.org Fixes: 0c195567a8f6 ("netvsc: transparent VF management") Signed-off-by: Long Li Signed-off-by: Haiyang Zhang Acked-by: Stephen Hemminger Acked-by: Dexuan Cui Signed-off-by: Paolo Abeni --- drivers/net/hyperv/netvsc_drv.c | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index b7dfd51f09e6..706ea5263e87 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2206,9 +2206,6 @@ static int netvsc_vf_join(struct net_device *vf_netdev, goto upper_link_failed; } - /* set slave flag before open to prevent IPv6 addrconf */ - vf_netdev->flags |= IFF_SLAVE; - schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT); call_netdevice_notifiers(NETDEV_JOIN, vf_netdev); @@ -2315,16 +2312,18 @@ static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev) } - /* Fallback path to check synthetic vf with - * help of mac addr + /* Fallback path to check synthetic vf with help of mac addr. + * Because this function can be called before vf_netdev is + * initialized (NETDEV_POST_INIT) when its perm_addr has not been copied + * from dev_addr, also try to match to its dev_addr. + * Note: On Hyper-V and Azure, it's not possible to set a MAC address + * on a VF that matches to the MAC of a unrelated NETVSC device. */ list_for_each_entry(ndev_ctx, &netvsc_dev_list, list) { ndev = hv_get_drvdata(ndev_ctx->device_ctx); - if (ether_addr_equal(vf_netdev->perm_addr, ndev->perm_addr)) { - netdev_notice(vf_netdev, - "falling back to mac addr based matching\n"); + if (ether_addr_equal(vf_netdev->perm_addr, ndev->perm_addr) || + ether_addr_equal(vf_netdev->dev_addr, ndev->perm_addr)) return ndev; - } } netdev_notice(vf_netdev, @@ -2332,6 +2331,19 @@ static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev) return NULL; } +static int netvsc_prepare_bonding(struct net_device *vf_netdev) +{ + struct net_device *ndev; + + ndev = get_netvsc_byslot(vf_netdev); + if (!ndev) + return NOTIFY_DONE; + + /* set slave flag before open to prevent IPv6 addrconf */ + vf_netdev->flags |= IFF_SLAVE; + return NOTIFY_DONE; +} + static int netvsc_register_vf(struct net_device *vf_netdev) { struct net_device_context *net_device_ctx; @@ -2758,6 +2770,8 @@ static int netvsc_netdev_event(struct notifier_block *this, return NOTIFY_DONE; switch (event) { + case NETDEV_POST_INIT: + return netvsc_prepare_bonding(event_dev); case NETDEV_REGISTER: return netvsc_register_vf(event_dev); case NETDEV_UNREGISTER: From e8df9d9f4209c04161321d8c12640ae560f65939 Mon Sep 17 00:00:00 2001 From: Dapeng Mi Date: Tue, 21 Nov 2023 09:46:28 +0800 Subject: [PATCH 135/234] perf/x86/intel: Correct incorrect 'or' operation for PMU capabilities When running perf-stat command on Intel hybrid platform, perf-stat reports the following errors: sudo taskset -c 7 ./perf stat -vvvv -e cpu_atom/instructions/ sleep 1 Opening: cpu/cycles/:HG ------------------------------------------------------------ perf_event_attr: type 0 (PERF_TYPE_HARDWARE) config 0xa00000000 disabled 1 ------------------------------------------------------------ sys_perf_event_open: pid 0 cpu -1 group_fd -1 flags 0x8 sys_perf_event_open failed, error -16 Performance counter stats for 'sleep 1': cpu_atom/instructions/ It looks the cpu_atom/instructions/ event can't be enabled on atom PMU even when the process is pinned on atom core. Investigation shows that exclusive_event_init() helper always returns -EBUSY error in the perf event creation. That's strange since the atom PMU should not be an exclusive PMU. Further investigation shows the issue was introduced by commit: 97588df87b56 ("perf/x86/intel: Add common intel_pmu_init_hybrid()") The commit originally intents to clear the bit PERF_PMU_CAP_AUX_OUTPUT from PMU capabilities if intel_cap.pebs_output_pt_available is not set, but it incorrectly uses 'or' operation and leads to all PMU capabilities bits are set to 1 except bit PERF_PMU_CAP_AUX_OUTPUT. Testing this fix on Intel hybrid platforms, the observed issues disappear. Fixes: 97588df87b56 ("perf/x86/intel: Add common intel_pmu_init_hybrid()") Signed-off-by: Dapeng Mi Signed-off-by: Ingo Molnar Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231121014628.729989-1-dapeng1.mi@linux.intel.com --- arch/x86/events/intel/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index a08f794a0e79..ce1c777227b4 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4660,7 +4660,7 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu) if (pmu->intel_cap.pebs_output_pt_available) pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT; else - pmu->pmu.capabilities |= ~PERF_PMU_CAP_AUX_OUTPUT; + pmu->pmu.capabilities &= ~PERF_PMU_CAP_AUX_OUTPUT; intel_pmu_check_event_constraints(pmu->event_constraints, pmu->num_counters, From 8771127e25d6c20d458ad27cf32f7fcfc1755e05 Mon Sep 17 00:00:00 2001 From: Lech Perczak Date: Sat, 18 Nov 2023 00:19:17 +0100 Subject: [PATCH 136/234] USB: serial: option: don't claim interface 4 for ZTE MF290 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Interface 4 is used by for QMI interface in stock firmware of MF28D, the router which uses MF290 modem. Free the interface up, to rebind it to qmi_wwan driver. The proper configuration is: Interface mapping is: 0: QCDM, 1: (unknown), 2: AT (PCUI), 2: AT (Modem), 4: QMI T: Bus=01 Lev=02 Prnt=02 Port=00 Cnt=01 Dev#= 4 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=19d2 ProdID=0189 Rev= 0.00 S: Manufacturer=ZTE, Incorporated S: Product=ZTE LTE Technologies MSM C:* #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=84(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=86(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms Cc: Bjørn Mork Signed-off-by: Lech Perczak Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 45dcfaadaf98..ff9049db6e65 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1546,7 +1546,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0165, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0167, 0xff, 0xff, 0xff), .driver_info = RSVD(4) }, - { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0189, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0189, 0xff, 0xff, 0xff), + .driver_info = RSVD(4) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0191, 0xff, 0xff, 0xff), /* ZTE EuFi890 */ .driver_info = RSVD(4) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0196, 0xff, 0xff, 0xff) }, From a1092619dd28ac0fcf23016160a2fdccd98ef935 Mon Sep 17 00:00:00 2001 From: Puliang Lu Date: Thu, 26 Oct 2023 20:35:06 +0800 Subject: [PATCH 137/234] USB: serial: option: fix FM101R-GL defines Modify the definition of the two Fibocom FM101R-GL PID macros, which had their PIDs switched. The correct PIDs are: - VID:PID 413C:8213, FM101R-GL ESIM are laptop M.2 cards (with MBIM interfaces for Linux) - VID:PID 413C:8215, FM101R-GL are laptop M.2 cards (with MBIM interface for Linux) 0x8213: mbim, tty 0x8215: mbim, tty Signed-off-by: Puliang Lu Fixes: 52480e1f1a25 ("USB: serial: option: add Fibocom to DELL custom modem FM101R-GL") Link: https://lore.kernel.org/lkml/TYZPR02MB508845BAD7936A62A105CE5D89DFA@TYZPR02MB5088.apcprd02.prod.outlook.com/ Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index ff9049db6e65..9c76095ebfe1 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -203,8 +203,8 @@ static void option_instat_callback(struct urb *urb); #define DELL_PRODUCT_5829E_ESIM 0x81e4 #define DELL_PRODUCT_5829E 0x81e6 -#define DELL_PRODUCT_FM101R 0x8213 -#define DELL_PRODUCT_FM101R_ESIM 0x8215 +#define DELL_PRODUCT_FM101R_ESIM 0x8213 +#define DELL_PRODUCT_FM101R 0x8215 #define KYOCERA_VENDOR_ID 0x0c88 #define KYOCERA_PRODUCT_KPC650 0x17da From 5b4ffb176d7979ac66b349addf3f7de433335e00 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 21 Nov 2023 14:38:11 +0100 Subject: [PATCH 138/234] Revert "HID: logitech-dj: Add support for a new lightspeed receiver iteration" This reverts commit 9d1bd9346241cd6963b58da7ffb7ed303285f684. Multiple people reported misbehaving devices and reverting this commit fixes the problem for them. As soon as the original commit author starts reacting again, we can try to figure out why he hasn't seen the issues (mismatching report descriptors?), but for the time being, fix for 6.7 by reverting. Link: https://bugzilla.kernel.org/show_bug.cgi?id=218172 Link: https://bugzilla.kernel.org/show_bug.cgi?id=218094 Cc: Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 - drivers/hid/hid-logitech-dj.c | 11 +++-------- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 125c73a20051..c6e4e0d1f214 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -868,7 +868,6 @@ #define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_2 0xc534 #define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1 0xc539 #define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1_1 0xc53f -#define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1_2 0xc547 #define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_POWERPLAY 0xc53a #define USB_DEVICE_ID_SPACETRAVELLER 0xc623 #define USB_DEVICE_ID_SPACENAVIGATOR 0xc626 diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c index 8afe3be683ba..e6a8b6d8eab7 100644 --- a/drivers/hid/hid-logitech-dj.c +++ b/drivers/hid/hid-logitech-dj.c @@ -1695,12 +1695,11 @@ static int logi_dj_raw_event(struct hid_device *hdev, } /* * Mouse-only receivers send unnumbered mouse data. The 27 MHz - * receiver uses 6 byte packets, the nano receiver 8 bytes, - * the lightspeed receiver (Pro X Superlight) 13 bytes. + * receiver uses 6 byte packets, the nano receiver 8 bytes. */ if (djrcv_dev->unnumbered_application == HID_GD_MOUSE && - size <= 13){ - u8 mouse_report[14]; + size <= 8) { + u8 mouse_report[9]; /* Prepend report id */ mouse_report[0] = REPORT_TYPE_MOUSE; @@ -1984,10 +1983,6 @@ static const struct hid_device_id logi_dj_receivers[] = { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1_1), .driver_data = recvr_type_gaming_hidpp}, - { /* Logitech lightspeed receiver (0xc547) */ - HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, - USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1_2), - .driver_data = recvr_type_gaming_hidpp}, { /* Logitech 27 MHz HID++ 1.0 receiver (0xc513) */ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_MX3000_RECEIVER), From 9c6dc13106f2dd2d6819d66618b25a6f41f0ee6a Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 20 Nov 2023 08:28:40 +0000 Subject: [PATCH 139/234] MAINTAINERS: Add indirect_call_wrapper.h to NETWORKING [GENERAL] indirect_call_wrapper.h is not, strictly speaking, networking specific. However, it's git history indicates that in practice changes go through netdev and thus the netdev maintainers have effectively been taking responsibility for it. Formalise this by adding it to the NETWORKING [GENERAL] section in the MAINTAINERS file. It is not clear how many other files under include/linux fall into this category and it would be interesting, as a follow-up, to audit that and propose further updates to the MAINTAINERS file as appropriate. Link: https://lore.kernel.org/netdev/20231116010310.4664dd38@kernel.org/ Signed-off-by: Simon Horman Link: https://lore.kernel.org/r/20231120-indirect_call_wrapper-maintainer-v1-1-0a6bb1f7363e@kernel.org Signed-off-by: Paolo Abeni --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 80a95878c61d..6cc0708f77b4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15057,6 +15057,7 @@ F: Documentation/networking/ F: Documentation/process/maintainer-netdev.rst F: Documentation/userspace-api/netlink/ F: include/linux/in.h +F: include/linux/indirect_call_wrapper.h F: include/linux/net.h F: include/linux/netdevice.h F: include/net/ From 41058707bea93b979c4854bdb857e46f2b85df92 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 10 Nov 2023 14:48:02 +0100 Subject: [PATCH 140/234] dt-bindings: usb: hcd: add missing phy name to example The example host controller node has two PHYs and therefore needs two PHY names. Fixes: 3aa3c66aedef ("dt-bindings: usb: Bring back phy-names") Signed-off-by: Johan Hovold Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20231110134802.32060-1-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/usb-hcd.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/usb/usb-hcd.yaml b/Documentation/devicetree/bindings/usb/usb-hcd.yaml index 692dd60e3f73..45a19d4928af 100644 --- a/Documentation/devicetree/bindings/usb/usb-hcd.yaml +++ b/Documentation/devicetree/bindings/usb/usb-hcd.yaml @@ -41,7 +41,7 @@ examples: - | usb { phys = <&usb2_phy1>, <&usb3_phy1>; - phy-names = "usb"; + phy-names = "usb2", "usb3"; #address-cells = <1>; #size-cells = <0>; From a6fe37f428c19dd164c2111157d4a1029bd853aa Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Wed, 1 Nov 2023 02:19:09 +0000 Subject: [PATCH 141/234] usb: typec: tcpm: Skip hard reset when in error recovery Hard reset queued prior to error recovery (or) received during error recovery will make TCPM to prematurely exit error recovery sequence. Ignore hard resets received during error recovery (or) port reset sequence. ``` [46505.459688] state change SNK_READY -> ERROR_RECOVERY [rev3 NONE_AMS] [46505.459706] state change ERROR_RECOVERY -> PORT_RESET [rev3 NONE_AMS] [46505.460433] disable vbus discharge ret:0 [46505.461226] Setting usb_comm capable false [46505.467244] Setting voltage/current limit 0 mV 0 mA [46505.467262] polarity 0 [46505.470695] Requesting mux state 0, usb-role 0, orientation 0 [46505.475621] cc:=0 [46505.476012] pending state change PORT_RESET -> PORT_RESET_WAIT_OFF @ 100 ms [rev3 NONE_AMS] [46505.476020] Received hard reset [46505.476024] state change PORT_RESET -> HARD_RESET_START [rev3 HARD_RESET] ``` Cc: stable@vger.kernel.org Fixes: f0690a25a140 ("staging: typec: USB Type-C Port Manager (tcpm)") Signed-off-by: Badhri Jagan Sridharan Acked-by: Heikki Krogeus Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20231101021909.2962679-1-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 058d5b853b57..b386102f7a3a 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -5391,6 +5391,15 @@ static void _tcpm_pd_hard_reset(struct tcpm_port *port) if (port->bist_request == BDO_MODE_TESTDATA && port->tcpc->set_bist_data) port->tcpc->set_bist_data(port->tcpc, false); + switch (port->state) { + case ERROR_RECOVERY: + case PORT_RESET: + case PORT_RESET_WAIT_OFF: + return; + default: + break; + } + if (port->ams != NONE_AMS) port->ams = NONE_AMS; if (port->hard_reset_count < PD_N_HARD_RESET_COUNT) From cdd0cde8d8837de3d234bb08115d5f196e0ac8dd Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 30 Oct 2023 07:56:40 +0100 Subject: [PATCH 142/234] USB: typec: tps6598x: Fix a memory leak in an error handling path All error handling end to the error handling path, except these ones. Go to 'release_fw' as well here, otherwise 'fw' is leaking. Fixes: 7e7a3c815d22 ("USB: typec: tps6598x: Add TPS25750 support") Signed-off-by: Christophe JAILLET Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/23168336f18a9f6cb1a5b47130fc134dc0510d7f.1698648980.git.christophe.jaillet@wanadoo.fr Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tipd/core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c index 0e867f531d34..b0184be06c3d 100644 --- a/drivers/usb/typec/tipd/core.c +++ b/drivers/usb/typec/tipd/core.c @@ -968,16 +968,17 @@ static int tps25750_start_patch_burst_mode(struct tps6598x *tps) ret = of_property_match_string(np, "reg-names", "patch-address"); if (ret < 0) { dev_err(tps->dev, "failed to get patch-address %d\n", ret); - return ret; + goto release_fw; } ret = of_property_read_u32_index(np, "reg", ret, &addr); if (ret) - return ret; + goto release_fw; if (addr == 0 || (addr >= 0x20 && addr <= 0x23)) { dev_err(tps->dev, "wrong patch address %u\n", addr); - return -EINVAL; + ret = -EINVAL; + goto release_fw; } bpms_data.addr = (u8)addr; From 10d510abd096d620b9fda2dd3e0047c5efc4ad2b Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 25 Oct 2023 11:51:10 +0200 Subject: [PATCH 143/234] usb: dwc3: Fix default mode initialization The default mode, configurable by DT, shall be set before usb role switch driver is registered. Otherwise there is a race between default mode and mode set by usb role switch driver. Fixes: 98ed256a4dbad ("usb: dwc3: Add support for role-switch-default-mode binding") Cc: stable Signed-off-by: Alexander Stein Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20231025095110.2405281-1-alexander.stein@ew.tq-group.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/drd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/drd.c b/drivers/usb/dwc3/drd.c index 039bf241769a..57ddd2e43022 100644 --- a/drivers/usb/dwc3/drd.c +++ b/drivers/usb/dwc3/drd.c @@ -505,6 +505,7 @@ static int dwc3_setup_role_switch(struct dwc3 *dwc) dwc->role_switch_default_mode = USB_DR_MODE_PERIPHERAL; mode = DWC3_GCTL_PRTCAP_DEVICE; } + dwc3_set_mode(dwc, mode); dwc3_role_switch.fwnode = dev_fwnode(dwc->dev); dwc3_role_switch.set = dwc3_usb_role_switch_set; @@ -526,7 +527,6 @@ static int dwc3_setup_role_switch(struct dwc3 *dwc) } } - dwc3_set_mode(dwc, mode); return 0; } #else From 187fb003c57c964ea61ac9fbfe41abf3ca9973eb Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Wed, 1 Nov 2023 01:28:45 +0000 Subject: [PATCH 144/234] usb: typec: tcpm: Fix sink caps op current check TCPM checks for sink caps operational current even when PD is disabled. This incorrectly sets tcpm_set_charge() when PD is disabled. Check for sink caps only when PD is enabled. [ 97.572342] Start toggling [ 97.578949] CC1: 0 -> 0, CC2: 0 -> 0 [state TOGGLING, polarity 0, disconnected] [ 99.571648] CC1: 0 -> 0, CC2: 0 -> 4 [state TOGGLING, polarity 0, connected] [ 99.571658] state change TOGGLING -> SNK_ATTACH_WAIT [rev3 NONE_AMS] [ 99.571673] pending state change SNK_ATTACH_WAIT -> SNK_DEBOUNCED @ 170 ms [rev3 NONE_AMS] [ 99.741778] state change SNK_ATTACH_WAIT -> SNK_DEBOUNCED [delayed 170 ms] [ 99.789283] CC1: 0 -> 0, CC2: 4 -> 5 [state SNK_DEBOUNCED, polarity 0, connected] [ 99.789306] state change SNK_DEBOUNCED -> SNK_DEBOUNCED [rev3 NONE_AMS] [ 99.903584] VBUS on [ 99.903591] state change SNK_DEBOUNCED -> SNK_ATTACHED [rev3 NONE_AMS] [ 99.903600] polarity 1 [ 99.910155] enable vbus discharge ret:0 [ 99.910160] Requesting mux state 1, usb-role 2, orientation 2 [ 99.946791] state change SNK_ATTACHED -> SNK_STARTUP [rev3 NONE_AMS] [ 99.946798] state change SNK_STARTUP -> SNK_DISCOVERY [rev3 NONE_AMS] [ 99.946800] Setting voltage/current limit 5000 mV 500 mA [ 99.946803] vbus=0 charge:=1 [ 100.027139] state change SNK_DISCOVERY -> SNK_READY [rev3 NONE_AMS] [ 100.027145] Setting voltage/current limit 5000 mV 3000 mA [ 100.466830] VBUS on Cc: stable@vger.kernel.org Fixes: 803b1c8a0cea ("usb: typec: tcpm: not sink vbus if operational current is 0mA") Signed-off-by: Badhri Jagan Sridharan Reviewed-by: Guenter Roeck Acked-by: Heikki Krogerus Tested-by: Will McVicker Link: https://lore.kernel.org/r/20231101012845.2701348-1-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index b386102f7a3a..bfb6f9481e87 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -4273,7 +4273,8 @@ static void run_state_machine(struct tcpm_port *port) current_lim = PD_P_SNK_STDBY_MW / 5; tcpm_set_current_limit(port, current_lim, 5000); /* Not sink vbus if operational current is 0mA */ - tcpm_set_charge(port, !!pdo_max_current(port->snk_pdo[0])); + tcpm_set_charge(port, !port->pd_supported || + pdo_max_current(port->snk_pdo[0])); if (!port->pd_supported) tcpm_set_state(port, SNK_READY, 0); From 58f2fcb3a845fcbbad2f3196bb37d744e0506250 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Wed, 8 Nov 2023 10:31:25 +0100 Subject: [PATCH 145/234] usb: cdnsp: Fix deadlock issue during using NCM gadget The interrupt service routine registered for the gadget is a primary handler which mask the interrupt source and a threaded handler which handles the source of the interrupt. Since the threaded handler is voluntary threaded, the IRQ-core does not disable bottom halves before invoke the handler like it does for the forced-threaded handler. Due to changes in networking it became visible that a network gadget's completions handler may schedule a softirq which remains unprocessed. The gadget's completion handler is usually invoked either in hard-IRQ or soft-IRQ context. In this context it is enough to just raise the softirq because the softirq itself will be handled once that context is left. In the case of the voluntary threaded handler, there is nothing that will process pending softirqs. Which means it remain queued until another random interrupt (on this CPU) fires and handles it on its exit path or another thread locks and unlocks a lock with the bh suffix. Worst case is that the CPU goes idle and the NOHZ complains about unhandled softirqs. Disable bottom halves before acquiring the lock (and disabling interrupts) and enable them after dropping the lock. This ensures that any pending softirqs will handled right away. cc: stable@vger.kernel.org Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Signed-off-by: Pawel Laszczak Acked-by: Peter Chen Link: https://lore.kernel.org/r/20231108093125.224963-1-pawell@cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-ring.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c index af981778382d..02f297f5637d 100644 --- a/drivers/usb/cdns3/cdnsp-ring.c +++ b/drivers/usb/cdns3/cdnsp-ring.c @@ -1529,6 +1529,7 @@ irqreturn_t cdnsp_thread_irq_handler(int irq, void *data) unsigned long flags; int counter = 0; + local_bh_disable(); spin_lock_irqsave(&pdev->lock, flags); if (pdev->cdnsp_state & (CDNSP_STATE_HALTED | CDNSP_STATE_DYING)) { @@ -1541,6 +1542,7 @@ irqreturn_t cdnsp_thread_irq_handler(int irq, void *data) cdnsp_died(pdev); spin_unlock_irqrestore(&pdev->lock, flags); + local_bh_enable(); return IRQ_HANDLED; } @@ -1557,6 +1559,7 @@ irqreturn_t cdnsp_thread_irq_handler(int irq, void *data) cdnsp_update_erst_dequeue(pdev, event_ring_deq, 1); spin_unlock_irqrestore(&pdev->lock, flags); + local_bh_enable(); return IRQ_HANDLED; } From 30ce1c03a083c9dc131d09d28ba1bcaafa3d8df2 Mon Sep 17 00:00:00 2001 From: Wentong Wu Date: Tue, 14 Nov 2023 15:25:31 +0800 Subject: [PATCH 146/234] usb: misc: ljca: Drop _ADR support to get ljca children devices Currently the shipped platforms use only _HID to distinguish ljca children devices. The _ADR support here is for future HW. This patch is to drop _ADR support and we can then re-introduce it (revert this patch) if future HW actually starts using _ADR to distinguish children devices. Signed-off-by: Wentong Wu Reviewed-by: Hans de Goede Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20231114072531.1366753-1-wentong.wu@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/usb-ljca.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/usb/misc/usb-ljca.c b/drivers/usb/misc/usb-ljca.c index c9decd0396d4..7f0deebebc13 100644 --- a/drivers/usb/misc/usb-ljca.c +++ b/drivers/usb/misc/usb-ljca.c @@ -457,8 +457,8 @@ static void ljca_auxdev_acpi_bind(struct ljca_adapter *adap, u64 adr, u8 id) { struct ljca_match_ids_walk_data wd = { 0 }; - struct acpi_device *parent, *adev; struct device *dev = adap->dev; + struct acpi_device *parent; char uid[4]; parent = ACPI_COMPANION(dev); @@ -466,17 +466,7 @@ static void ljca_auxdev_acpi_bind(struct ljca_adapter *adap, return; /* - * get auxdev ACPI handle from the ACPI device directly - * under the parent that matches _ADR. - */ - adev = acpi_find_child_device(parent, adr, false); - if (adev) { - ACPI_COMPANION_SET(&auxdev->dev, adev); - return; - } - - /* - * _ADR is a grey area in the ACPI specification, some + * Currently LJCA hw doesn't use _ADR instead the shipped * platforms use _HID to distinguish children devices. */ switch (adr) { From 0583bc776ca5b5a3f5752869fc31cf7322df2b35 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 15 Nov 2023 15:45:07 +0100 Subject: [PATCH 147/234] USB: dwc2: write HCINT with INTMASK applied dwc2_hc_n_intr() writes back INTMASK as read but evaluates it with intmask applied. In stress testing this causes spurious interrupts like this: [Mon Aug 14 10:51:07 2023] dwc2 3f980000.usb: dwc2_hc_chhltd_intr_dma: Channel 7 - ChHltd set, but reason is unknown [Mon Aug 14 10:51:07 2023] dwc2 3f980000.usb: hcint 0x00000002, intsts 0x04600001 [Mon Aug 14 10:51:08 2023] dwc2 3f980000.usb: dwc2_hc_chhltd_intr_dma: Channel 0 - ChHltd set, but reason is unknown [Mon Aug 14 10:51:08 2023] dwc2 3f980000.usb: hcint 0x00000002, intsts 0x04600001 [Mon Aug 14 10:51:08 2023] dwc2 3f980000.usb: dwc2_hc_chhltd_intr_dma: Channel 4 - ChHltd set, but reason is unknown [Mon Aug 14 10:51:08 2023] dwc2 3f980000.usb: hcint 0x00000002, intsts 0x04600001 [Mon Aug 14 10:51:08 2023] dwc2 3f980000.usb: dwc2_update_urb_state_abn(): trimming xfer length Applying INTMASK prevents this. The issue exists in all versions of the driver. Signed-off-by: Oliver Neukum Tested-by: Ivan Ivanov Tested-by: Andrea della Porta Link: https://lore.kernel.org/r/20231115144514.15248-1-oneukum@suse.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/hcd_intr.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/usb/dwc2/hcd_intr.c b/drivers/usb/dwc2/hcd_intr.c index 0144ca8350c3..5c7538d498dd 100644 --- a/drivers/usb/dwc2/hcd_intr.c +++ b/drivers/usb/dwc2/hcd_intr.c @@ -2015,15 +2015,17 @@ static void dwc2_hc_n_intr(struct dwc2_hsotg *hsotg, int chnum) { struct dwc2_qtd *qtd; struct dwc2_host_chan *chan; - u32 hcint, hcintmsk; + u32 hcint, hcintraw, hcintmsk; chan = hsotg->hc_ptr_array[chnum]; - hcint = dwc2_readl(hsotg, HCINT(chnum)); + hcintraw = dwc2_readl(hsotg, HCINT(chnum)); hcintmsk = dwc2_readl(hsotg, HCINTMSK(chnum)); + hcint = hcintraw & hcintmsk; + dwc2_writel(hsotg, hcint, HCINT(chnum)); + if (!chan) { dev_err(hsotg->dev, "## hc_ptr_array for channel is NULL ##\n"); - dwc2_writel(hsotg, hcint, HCINT(chnum)); return; } @@ -2032,11 +2034,9 @@ static void dwc2_hc_n_intr(struct dwc2_hsotg *hsotg, int chnum) chnum); dev_vdbg(hsotg->dev, " hcint 0x%08x, hcintmsk 0x%08x, hcint&hcintmsk 0x%08x\n", - hcint, hcintmsk, hcint & hcintmsk); + hcintraw, hcintmsk, hcint); } - dwc2_writel(hsotg, hcint, HCINT(chnum)); - /* * If we got an interrupt after someone called * dwc2_hcd_endpoint_disable() we don't want to crash below @@ -2046,8 +2046,7 @@ static void dwc2_hc_n_intr(struct dwc2_hsotg *hsotg, int chnum) return; } - chan->hcint = hcint; - hcint &= hcintmsk; + chan->hcint = hcintraw; /* * If the channel was halted due to a dequeue, the qtd list might From 791cd7afe51b0c770264836ab0607766e3e80f52 Mon Sep 17 00:00:00 2001 From: Stanley Chang Date: Fri, 17 Nov 2023 15:03:05 +0800 Subject: [PATCH 148/234] usb: dwc3: add missing of_node_put and platform_device_put of_get_compatible_child performs an of_node_get, so an of_node_put is required. Add platform_device_put to match with of_find_device_by_node. Fixes: 34c200483569 ("usb: dwc3: add Realtek DHC RTD SoC dwc3 glue layer driver") Signed-off-by: Stanley Chang Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20231117070311.32502-1-stanley_chang@realtek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-rtk.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/dwc3-rtk.c b/drivers/usb/dwc3/dwc3-rtk.c index 590028e8fdcb..3cd6b184551c 100644 --- a/drivers/usb/dwc3/dwc3-rtk.c +++ b/drivers/usb/dwc3/dwc3-rtk.c @@ -183,10 +183,13 @@ static enum usb_device_speed __get_dwc3_maximum_speed(struct device_node *np) ret = of_property_read_string(dwc3_np, "maximum-speed", &maximum_speed); if (ret < 0) - return USB_SPEED_UNKNOWN; + goto out; ret = match_string(speed_names, ARRAY_SIZE(speed_names), maximum_speed); +out: + of_node_put(dwc3_np); + return (ret < 0) ? USB_SPEED_UNKNOWN : ret; } @@ -339,6 +342,9 @@ static int dwc3_rtk_probe_dwc3_core(struct dwc3_rtk *rtk) switch_usb2_role(rtk, rtk->cur_role); + platform_device_put(dwc3_pdev); + of_node_put(dwc3_node); + return 0; err_pdev_put: From 974bba5c118f4c2baf00de0356e3e4f7928b4cbc Mon Sep 17 00:00:00 2001 From: Niklas Neronin Date: Wed, 15 Nov 2023 14:13:25 +0200 Subject: [PATCH 149/234] usb: config: fix iteration issue in 'usb_get_bos_descriptor()' The BOS descriptor defines a root descriptor and is the base descriptor for accessing a family of related descriptors. Function 'usb_get_bos_descriptor()' encounters an iteration issue when skipping the 'USB_DT_DEVICE_CAPABILITY' descriptor type. This results in the same descriptor being read repeatedly. To address this issue, a 'goto' statement is introduced to ensure that the pointer and the amount read is updated correctly. This ensures that the function iterates to the next descriptor instead of reading the same descriptor repeatedly. Cc: stable@vger.kernel.org Fixes: 3dd550a2d365 ("USB: usbcore: Fix slab-out-of-bounds bug during device reset") Signed-off-by: Niklas Neronin Acked-by: Mathias Nyman Reviewed-by: Alan Stern Link: https://lore.kernel.org/r/20231115121325.471454-1-niklas.neronin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index b19e38d5fd10..7f8d33f92ddb 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -1047,7 +1047,7 @@ int usb_get_bos_descriptor(struct usb_device *dev) if (cap->bDescriptorType != USB_DT_DEVICE_CAPABILITY) { dev_notice(ddev, "descriptor type invalid, skip\n"); - continue; + goto skip_to_next_descriptor; } switch (cap_type) { @@ -1078,6 +1078,7 @@ int usb_get_bos_descriptor(struct usb_device *dev) break; } +skip_to_next_descriptor: total_len -= length; buffer += length; } From 8bbae288a85abed6a1cf7d185d8b9dc2f5dcb12c Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Fri, 27 Oct 2023 11:28:20 +0000 Subject: [PATCH 150/234] usb: dwc3: set the dma max_seg_size Allow devices to have dma operations beyond 4K, and avoid warnings such as: DMA-API: dwc3 a600000.usb: mapping sg segment longer than device claims to support [len=86016] [max=65536] Cc: stable@vger.kernel.org Fixes: 72246da40f37 ("usb: Introduce DesignWare USB3 DRD Driver") Reported-by: Zubin Mithra Signed-off-by: Ricardo Ribalda Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20231026-dwc3-v2-1-1d4fd5c3e067@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 0328c86ef806..b101dbf8c5dc 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -2034,6 +2034,8 @@ static int dwc3_probe(struct platform_device *pdev) pm_runtime_put(dev); + dma_set_max_seg_size(dev, UINT_MAX); + return 0; err_exit_debugfs: From 61d2cf0db741827724d33079b4a54bf99a32b8e5 Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Sat, 18 Nov 2023 11:30:11 +0800 Subject: [PATCH 151/234] usb: xhci-mtk: fix in-ep's start-split check failure It's wrong to use the data length in a CS (in uframe x) to check whether there is a SS (in uframe x-2), because for a isoc-in ep, it may need some CS to receive data; Save the count of SS in a uframe for isoc/intr in-eps to fix the issue. Fixes: 5c954e030f55 ("usb: xhci-mtk: improve split scheduling by separate IN/OUT budget") Signed-off-by: Chunfeng Yun Link: https://lore.kernel.org/r/20231118033011.22033-1-chunfeng.yun@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mtk-sch.c | 13 ++++++++++--- drivers/usb/host/xhci-mtk.h | 2 ++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-mtk-sch.c b/drivers/usb/host/xhci-mtk-sch.c index 5b3cd455adec..61f3f8bbdcea 100644 --- a/drivers/usb/host/xhci-mtk-sch.c +++ b/drivers/usb/host/xhci-mtk-sch.c @@ -650,9 +650,8 @@ static int check_isoc_ss_overlap(struct mu3h_sch_ep_info *sch_ep, u32 offset) if (sch_ep->ep_type == ISOC_OUT_EP) { for (j = 0; j < sch_ep->num_budget_microframes; j++) { - k = XHCI_MTK_BW_INDEX(base + j + CS_OFFSET); - /* use cs to indicate existence of in-ss @(base+j) */ - if (tt->fs_bus_bw_in[k]) + k = XHCI_MTK_BW_INDEX(base + j); + if (tt->in_ss_cnt[k]) return -ESCH_SS_OVERLAP; } } else if (sch_ep->ep_type == ISOC_IN_EP || sch_ep->ep_type == INT_IN_EP) { @@ -769,6 +768,14 @@ static void update_sch_tt(struct mu3h_sch_ep_info *sch_ep, bool used) tt->fs_frame_bw[f] -= (u16)sch_ep->bw_budget_table[j]; } } + + if (sch_ep->ep_type == ISOC_IN_EP || sch_ep->ep_type == INT_IN_EP) { + k = XHCI_MTK_BW_INDEX(base); + if (used) + tt->in_ss_cnt[k]++; + else + tt->in_ss_cnt[k]--; + } } if (used) diff --git a/drivers/usb/host/xhci-mtk.h b/drivers/usb/host/xhci-mtk.h index 865b55e23b15..39f7ae7d3087 100644 --- a/drivers/usb/host/xhci-mtk.h +++ b/drivers/usb/host/xhci-mtk.h @@ -38,6 +38,7 @@ * @fs_bus_bw_in: save bandwidth used by FS/LS IN eps in each uframes * @ls_bus_bw: save bandwidth used by LS eps in each uframes * @fs_frame_bw: save bandwidth used by FS/LS eps in each FS frames + * @in_ss_cnt: the count of Start-Split for IN eps * @ep_list: Endpoints using this TT */ struct mu3h_sch_tt { @@ -45,6 +46,7 @@ struct mu3h_sch_tt { u16 fs_bus_bw_in[XHCI_MTK_MAX_ESIT]; u8 ls_bus_bw[XHCI_MTK_MAX_ESIT]; u16 fs_frame_bw[XHCI_MTK_FRAMES_CNT]; + u8 in_ss_cnt[XHCI_MTK_MAX_ESIT]; struct list_head ep_list; }; From 4b435764f7c2922822962e7f6343cce645d502f1 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 21 Nov 2023 13:46:47 +0200 Subject: [PATCH 152/234] usb: typec: tipd: Supply also I2C driver data If there is no fwnode, device_get_match_data() does not return anything making the probe to always fail. Using i2c_get_match_data() when there is no fwnode to fix that. Fixes: 5bd4853da049 ("USB: typec: tps6598x: Add device data to of_device_id") Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20231121114647.2005011-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tipd/core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c index b0184be06c3d..196535ad996d 100644 --- a/drivers/usb/typec/tipd/core.c +++ b/drivers/usb/typec/tipd/core.c @@ -1227,7 +1227,10 @@ static int tps6598x_probe(struct i2c_client *client) TPS_REG_INT_PLUG_EVENT; } - tps->data = device_get_match_data(tps->dev); + if (dev_fwnode(tps->dev)) + tps->data = device_get_match_data(tps->dev); + else + tps->data = i2c_get_match_data(client); if (!tps->data) return -EINVAL; @@ -1426,7 +1429,7 @@ static const struct of_device_id tps6598x_of_match[] = { MODULE_DEVICE_TABLE(of, tps6598x_of_match); static const struct i2c_device_id tps6598x_id[] = { - { "tps6598x" }, + { "tps6598x", (kernel_ulong_t)&tps6598x_data }, { } }; MODULE_DEVICE_TABLE(i2c, tps6598x_id); From 16b7e0cccb243033de4406ffb4d892365041a1e7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 3 Nov 2023 17:43:23 +0100 Subject: [PATCH 153/234] USB: xhci-plat: fix legacy PHY double init Commits 7b8ef22ea547 ("usb: xhci: plat: Add USB phy support") and 9134c1fd0503 ("usb: xhci: plat: Add USB 3.0 phy support") added support for looking up legacy PHYs from the sysdev devicetree node and initialising them. This broke drivers such as dwc3 which manages PHYs themself as the PHYs would now be initialised twice, something which specifically can lead to resources being left enabled during suspend (e.g. with the usb_phy_generic PHY driver). As the dwc3 driver uses driver-name matching for the xhci platform device, fix this by only looking up and initialising PHYs for devices that have been matched using OF. Note that checking that the platform device has a devicetree node would currently be sufficient, but that could lead to subtle breakages in case anyone ever tries to reuse an ancestor's node. Fixes: 7b8ef22ea547 ("usb: xhci: plat: Add USB phy support") Fixes: 9134c1fd0503 ("usb: xhci: plat: Add USB 3.0 phy support") Cc: stable@vger.kernel.org # 4.1 Cc: Maxime Ripard Cc: Stanley Chang Signed-off-by: Johan Hovold Tested-by: Stefan Eichenberger Tested-by: Stanley Chang Link: https://lore.kernel.org/r/20231103164323.14294-1-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-plat.c | 50 +++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 20 deletions(-) diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index b93161374293..732cdeb73920 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -148,7 +149,7 @@ int xhci_plat_probe(struct platform_device *pdev, struct device *sysdev, const s int ret; int irq; struct xhci_plat_priv *priv = NULL; - + bool of_match; if (usb_disabled()) return -ENODEV; @@ -253,16 +254,23 @@ int xhci_plat_probe(struct platform_device *pdev, struct device *sysdev, const s &xhci->imod_interval); } - hcd->usb_phy = devm_usb_get_phy_by_phandle(sysdev, "usb-phy", 0); - if (IS_ERR(hcd->usb_phy)) { - ret = PTR_ERR(hcd->usb_phy); - if (ret == -EPROBE_DEFER) - goto disable_clk; - hcd->usb_phy = NULL; - } else { - ret = usb_phy_init(hcd->usb_phy); - if (ret) - goto disable_clk; + /* + * Drivers such as dwc3 manages PHYs themself (and rely on driver name + * matching for the xhci platform device). + */ + of_match = of_match_device(pdev->dev.driver->of_match_table, &pdev->dev); + if (of_match) { + hcd->usb_phy = devm_usb_get_phy_by_phandle(sysdev, "usb-phy", 0); + if (IS_ERR(hcd->usb_phy)) { + ret = PTR_ERR(hcd->usb_phy); + if (ret == -EPROBE_DEFER) + goto disable_clk; + hcd->usb_phy = NULL; + } else { + ret = usb_phy_init(hcd->usb_phy); + if (ret) + goto disable_clk; + } } hcd->tpl_support = of_usb_host_tpl_support(sysdev->of_node); @@ -285,15 +293,17 @@ int xhci_plat_probe(struct platform_device *pdev, struct device *sysdev, const s goto dealloc_usb2_hcd; } - xhci->shared_hcd->usb_phy = devm_usb_get_phy_by_phandle(sysdev, - "usb-phy", 1); - if (IS_ERR(xhci->shared_hcd->usb_phy)) { - xhci->shared_hcd->usb_phy = NULL; - } else { - ret = usb_phy_init(xhci->shared_hcd->usb_phy); - if (ret) - dev_err(sysdev, "%s init usb3phy fail (ret=%d)\n", - __func__, ret); + if (of_match) { + xhci->shared_hcd->usb_phy = devm_usb_get_phy_by_phandle(sysdev, + "usb-phy", 1); + if (IS_ERR(xhci->shared_hcd->usb_phy)) { + xhci->shared_hcd->usb_phy = NULL; + } else { + ret = usb_phy_init(xhci->shared_hcd->usb_phy); + if (ret) + dev_err(sysdev, "%s init usb3phy fail (ret=%d)\n", + __func__, ret); + } } xhci->shared_hcd->tpl_support = hcd->tpl_support; From 98c598afc22d4e43c2ad91860b65996d0c099a5d Mon Sep 17 00:00:00 2001 From: Li Nan Date: Mon, 11 Sep 2023 10:33:08 +0800 Subject: [PATCH 154/234] nbd: pass nbd_sock to nbd_read_reply() instead of index If a socket is processing ioctl 'NBD_SET_SOCK', config->socks might be krealloc in nbd_add_socket(), and a garbage request is received now, a UAF may occurs. T1 nbd_ioctl __nbd_ioctl nbd_add_socket blk_mq_freeze_queue T2 recv_work nbd_read_reply sock_xmit krealloc config->socks def config->socks Pass nbd_sock to nbd_read_reply(). And introduce a new function sock_xmit_recv(), which differs from sock_xmit only in the way it get socket. ================================================================== BUG: KASAN: use-after-free in sock_xmit+0x525/0x550 Read of size 8 at addr ffff8880188ec428 by task kworker/u12:1/18779 Workqueue: knbd4-recv recv_work Call Trace: __dump_stack dump_stack+0xbe/0xfd print_address_description.constprop.0+0x19/0x170 __kasan_report.cold+0x6c/0x84 kasan_report+0x3a/0x50 sock_xmit+0x525/0x550 nbd_read_reply+0xfe/0x2c0 recv_work+0x1c2/0x750 process_one_work+0x6b6/0xf10 worker_thread+0xdd/0xd80 kthread+0x30a/0x410 ret_from_fork+0x22/0x30 Allocated by task 18784: kasan_save_stack+0x1b/0x40 kasan_set_track set_alloc_info __kasan_kmalloc __kasan_kmalloc.constprop.0+0xf0/0x130 slab_post_alloc_hook slab_alloc_node slab_alloc __kmalloc_track_caller+0x157/0x550 __do_krealloc krealloc+0x37/0xb0 nbd_add_socket +0x2d3/0x880 __nbd_ioctl nbd_ioctl+0x584/0x8e0 __blkdev_driver_ioctl blkdev_ioctl+0x2a0/0x6e0 block_ioctl+0xee/0x130 vfs_ioctl __do_sys_ioctl __se_sys_ioctl+0x138/0x190 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x61/0xc6 Freed by task 18784: kasan_save_stack+0x1b/0x40 kasan_set_track+0x1c/0x30 kasan_set_free_info+0x20/0x40 __kasan_slab_free.part.0+0x13f/0x1b0 slab_free_hook slab_free_freelist_hook slab_free kfree+0xcb/0x6c0 krealloc+0x56/0xb0 nbd_add_socket+0x2d3/0x880 __nbd_ioctl nbd_ioctl+0x584/0x8e0 __blkdev_driver_ioctl blkdev_ioctl+0x2a0/0x6e0 block_ioctl+0xee/0x130 vfs_ioctl __do_sys_ioctl __se_sys_ioctl+0x138/0x190 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x61/0xc6 Signed-off-by: Li Nan Reviewed-by: Yu Kuai Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20230911023308.3467802-1-linan666@huaweicloud.com Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 3f03cb3dc33c..b6414e1e645b 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -67,6 +67,7 @@ struct nbd_sock { struct recv_thread_args { struct work_struct work; struct nbd_device *nbd; + struct nbd_sock *nsock; int index; }; @@ -505,15 +506,9 @@ done: return BLK_EH_DONE; } -/* - * Send or receive packet. Return a positive value on success and - * negtive value on failue, and never return 0. - */ -static int sock_xmit(struct nbd_device *nbd, int index, int send, - struct iov_iter *iter, int msg_flags, int *sent) +static int __sock_xmit(struct nbd_device *nbd, struct socket *sock, int send, + struct iov_iter *iter, int msg_flags, int *sent) { - struct nbd_config *config = nbd->config; - struct socket *sock = config->socks[index]->sock; int result; struct msghdr msg; unsigned int noreclaim_flag; @@ -556,6 +551,19 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send, return result; } +/* + * Send or receive packet. Return a positive value on success and + * negtive value on failure, and never return 0. + */ +static int sock_xmit(struct nbd_device *nbd, int index, int send, + struct iov_iter *iter, int msg_flags, int *sent) +{ + struct nbd_config *config = nbd->config; + struct socket *sock = config->socks[index]->sock; + + return __sock_xmit(nbd, sock, send, iter, msg_flags, sent); +} + /* * Different settings for sk->sk_sndtimeo can result in different return values * if there is a signal pending when we enter sendmsg, because reasons? @@ -712,7 +720,7 @@ out: return 0; } -static int nbd_read_reply(struct nbd_device *nbd, int index, +static int nbd_read_reply(struct nbd_device *nbd, struct socket *sock, struct nbd_reply *reply) { struct kvec iov = {.iov_base = reply, .iov_len = sizeof(*reply)}; @@ -721,7 +729,7 @@ static int nbd_read_reply(struct nbd_device *nbd, int index, reply->magic = 0; iov_iter_kvec(&to, ITER_DEST, &iov, 1, sizeof(*reply)); - result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); + result = __sock_xmit(nbd, sock, 0, &to, MSG_WAITALL, NULL); if (result < 0) { if (!nbd_disconnected(nbd->config)) dev_err(disk_to_dev(nbd->disk), @@ -845,14 +853,14 @@ static void recv_work(struct work_struct *work) struct nbd_device *nbd = args->nbd; struct nbd_config *config = nbd->config; struct request_queue *q = nbd->disk->queue; - struct nbd_sock *nsock; + struct nbd_sock *nsock = args->nsock; struct nbd_cmd *cmd; struct request *rq; while (1) { struct nbd_reply reply; - if (nbd_read_reply(nbd, args->index, &reply)) + if (nbd_read_reply(nbd, nsock->sock, &reply)) break; /* @@ -887,7 +895,6 @@ static void recv_work(struct work_struct *work) percpu_ref_put(&q->q_usage_counter); } - nsock = config->socks[args->index]; mutex_lock(&nsock->tx_lock); nbd_mark_nsock_dead(nbd, nsock, 1); mutex_unlock(&nsock->tx_lock); @@ -1231,6 +1238,7 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg) INIT_WORK(&args->work, recv_work); args->index = i; args->nbd = nbd; + args->nsock = nsock; nsock->cookie++; mutex_unlock(&nsock->tx_lock); sockfd_put(old); @@ -1413,6 +1421,7 @@ static int nbd_start_device(struct nbd_device *nbd) refcount_inc(&nbd->config_refs); INIT_WORK(&args->work, recv_work); args->nbd = nbd; + args->nsock = config->socks[i]; args->index = i; queue_work(nbd->recv_workq, &args->work); } From 2e569ada424c40ce27c99bfab4f9780619061c83 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Wed, 15 Nov 2023 22:02:11 +0100 Subject: [PATCH 155/234] x86/microcode: Remove the driver announcement and version First of all, the print is useless. The driver will either load and say which microcode revision the machine has or issue an error. Then, the version number is meaningless and actively confusing, as Yazen mentioned recently: when a subset of patches are backported to a distro kernel, one can't assume the driver version is the same as the upstream one. And besides, the version number of the loader hasn't been used and incremented for a long time. So drop it. Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20231115210212.9981-2-bp@alien8.de --- arch/x86/kernel/cpu/microcode/core.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 666d25bbc5ad..b4be3a2c79df 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -41,8 +41,6 @@ #include "internal.h" -#define DRIVER_VERSION "2.2" - static struct microcode_ops *microcode_ops; bool dis_ucode_ldr = true; @@ -846,8 +844,6 @@ static int __init microcode_init(void) cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/microcode:online", mc_cpu_online, mc_cpu_down_prep); - pr_info("Microcode Update Driver: v%s.", DRIVER_VERSION); - return 0; out_pdev: From 080990aa3344123673f686cda2df0d1b0deee046 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Wed, 15 Nov 2023 22:02:12 +0100 Subject: [PATCH 156/234] x86/microcode: Rework early revisions reporting The AMD side of the loader issues the microcode revision for each logical thread on the system, which can become really noisy on huge machines. And doing that doesn't make a whole lot of sense - the microcode revision is already in /proc/cpuinfo. So in case one is interested in the theoretical support of mixed silicon steppings on AMD, one can check there. What is also missing on the AMD side - something which people have requested before - is showing the microcode revision the CPU had *before* the early update. So abstract that up in the main code and have the BSP on each vendor provide those revision numbers. Then, dump them only once on driver init. On Intel, do not dump the patch date - it is not needed. Reported-by: Linus Torvalds Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/CAHk-=wg=%2B8rceshMkB4VnKxmRccVLtBLPBawnewZuuqyx5U=3A@mail.gmail.com --- arch/x86/kernel/cpu/microcode/amd.c | 39 +++++++----------------- arch/x86/kernel/cpu/microcode/core.c | 11 +++++-- arch/x86/kernel/cpu/microcode/intel.c | 17 +++++------ arch/x86/kernel/cpu/microcode/internal.h | 14 ++++++--- 4 files changed, 37 insertions(+), 44 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 9373ec01c5ae..13b45b9c806d 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -104,8 +104,6 @@ struct cont_desc { size_t size; }; -static u32 ucode_new_rev; - /* * Microcode patch container file is prepended to the initrd in cpio * format. See Documentation/arch/x86/microcode.rst @@ -442,12 +440,11 @@ static int __apply_microcode_amd(struct microcode_amd *mc) * * Returns true if container found (sets @desc), false otherwise. */ -static bool early_apply_microcode(u32 cpuid_1_eax, void *ucode, size_t size) +static bool early_apply_microcode(u32 cpuid_1_eax, u32 old_rev, void *ucode, size_t size) { struct cont_desc desc = { 0 }; struct microcode_amd *mc; bool ret = false; - u32 rev, dummy; desc.cpuid_1_eax = cpuid_1_eax; @@ -457,22 +454,15 @@ static bool early_apply_microcode(u32 cpuid_1_eax, void *ucode, size_t size) if (!mc) return ret; - native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - /* * Allow application of the same revision to pick up SMT-specific * changes even if the revision of the other SMT thread is already * up-to-date. */ - if (rev > mc->hdr.patch_id) + if (old_rev > mc->hdr.patch_id) return ret; - if (!__apply_microcode_amd(mc)) { - ucode_new_rev = mc->hdr.patch_id; - ret = true; - } - - return ret; + return !__apply_microcode_amd(mc); } static bool get_builtin_microcode(struct cpio_data *cp, unsigned int family) @@ -506,9 +496,12 @@ static void __init find_blobs_in_containers(unsigned int cpuid_1_eax, struct cpi *ret = cp; } -void __init load_ucode_amd_bsp(unsigned int cpuid_1_eax) +void __init load_ucode_amd_bsp(struct early_load_data *ed, unsigned int cpuid_1_eax) { struct cpio_data cp = { }; + u32 dummy; + + native_rdmsr(MSR_AMD64_PATCH_LEVEL, ed->old_rev, dummy); /* Needed in load_microcode_amd() */ ucode_cpu_info[0].cpu_sig.sig = cpuid_1_eax; @@ -517,7 +510,8 @@ void __init load_ucode_amd_bsp(unsigned int cpuid_1_eax) if (!(cp.data && cp.size)) return; - early_apply_microcode(cpuid_1_eax, cp.data, cp.size); + if (early_apply_microcode(cpuid_1_eax, ed->old_rev, cp.data, cp.size)) + native_rdmsr(MSR_AMD64_PATCH_LEVEL, ed->new_rev, dummy); } static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size); @@ -625,10 +619,8 @@ void reload_ucode_amd(unsigned int cpu) rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); if (rev < mc->hdr.patch_id) { - if (!__apply_microcode_amd(mc)) { - ucode_new_rev = mc->hdr.patch_id; - pr_info("reload patch_level=0x%08x\n", ucode_new_rev); - } + if (!__apply_microcode_amd(mc)) + pr_info_once("reload revision: 0x%08x\n", mc->hdr.patch_id); } } @@ -649,8 +641,6 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) if (p && (p->patch_id == csig->rev)) uci->mc = p->data; - pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev); - return 0; } @@ -691,8 +681,6 @@ static enum ucode_state apply_microcode_amd(int cpu) rev = mc_amd->hdr.patch_id; ret = UCODE_UPDATED; - pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev); - out: uci->cpu_sig.rev = rev; c->microcode = rev; @@ -935,11 +923,6 @@ struct microcode_ops * __init init_amd_microcode(void) pr_warn("AMD CPU family 0x%x not supported\n", c->x86); return NULL; } - - if (ucode_new_rev) - pr_info_once("microcode updated early to new patch_level=0x%08x\n", - ucode_new_rev); - return µcode_amd_ops; } diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index b4be3a2c79df..232026a239a6 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -75,6 +75,8 @@ static u32 final_levels[] = { 0, /* T-101 terminator */ }; +struct early_load_data early_data; + /* * Check the current patch level on this CPU. * @@ -153,9 +155,9 @@ void __init load_ucode_bsp(void) return; if (intel) - load_ucode_intel_bsp(); + load_ucode_intel_bsp(&early_data); else - load_ucode_amd_bsp(cpuid_1_eax); + load_ucode_amd_bsp(&early_data, cpuid_1_eax); } void load_ucode_ap(void) @@ -826,6 +828,11 @@ static int __init microcode_init(void) if (!microcode_ops) return -ENODEV; + pr_info_once("Current revision: 0x%08x\n", (early_data.new_rev ?: early_data.old_rev)); + + if (early_data.new_rev) + pr_info_once("Updated early from: 0x%08x\n", early_data.old_rev); + microcode_pdev = platform_device_register_simple("microcode", -1, NULL, 0); if (IS_ERR(microcode_pdev)) return PTR_ERR(microcode_pdev); diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 6024feb98d29..070426b9895f 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -339,16 +339,9 @@ static enum ucode_state __apply_microcode(struct ucode_cpu_info *uci, static enum ucode_state apply_microcode_early(struct ucode_cpu_info *uci) { struct microcode_intel *mc = uci->mc; - enum ucode_state ret; - u32 cur_rev, date; + u32 cur_rev; - ret = __apply_microcode(uci, mc, &cur_rev); - if (ret == UCODE_UPDATED) { - date = mc->hdr.date; - pr_info_once("updated early: 0x%x -> 0x%x, date = %04x-%02x-%02x\n", - cur_rev, mc->hdr.rev, date & 0xffff, date >> 24, (date >> 16) & 0xff); - } - return ret; + return __apply_microcode(uci, mc, &cur_rev); } static __init bool load_builtin_intel_microcode(struct cpio_data *cp) @@ -413,13 +406,17 @@ static int __init save_builtin_microcode(void) early_initcall(save_builtin_microcode); /* Load microcode on BSP from initrd or builtin blobs */ -void __init load_ucode_intel_bsp(void) +void __init load_ucode_intel_bsp(struct early_load_data *ed) { struct ucode_cpu_info uci; + ed->old_rev = intel_get_microcode_revision(); + uci.mc = get_microcode_blob(&uci, false); if (uci.mc && apply_microcode_early(&uci) == UCODE_UPDATED) ucode_patch_va = UCODE_BSP_LOADED; + + ed->new_rev = uci.cpu_sig.rev; } void load_ucode_intel_ap(void) diff --git a/arch/x86/kernel/cpu/microcode/internal.h b/arch/x86/kernel/cpu/microcode/internal.h index f8047b12329a..21776c529fa9 100644 --- a/arch/x86/kernel/cpu/microcode/internal.h +++ b/arch/x86/kernel/cpu/microcode/internal.h @@ -37,6 +37,12 @@ struct microcode_ops { use_nmi : 1; }; +struct early_load_data { + u32 old_rev; + u32 new_rev; +}; + +extern struct early_load_data early_data; extern struct ucode_cpu_info ucode_cpu_info[]; struct cpio_data find_microcode_in_initrd(const char *path); @@ -92,14 +98,14 @@ extern bool dis_ucode_ldr; extern bool force_minrev; #ifdef CONFIG_CPU_SUP_AMD -void load_ucode_amd_bsp(unsigned int family); +void load_ucode_amd_bsp(struct early_load_data *ed, unsigned int family); void load_ucode_amd_ap(unsigned int family); int save_microcode_in_initrd_amd(unsigned int family); void reload_ucode_amd(unsigned int cpu); struct microcode_ops *init_amd_microcode(void); void exit_amd_microcode(void); #else /* CONFIG_CPU_SUP_AMD */ -static inline void load_ucode_amd_bsp(unsigned int family) { } +static inline void load_ucode_amd_bsp(struct early_load_data *ed, unsigned int family) { } static inline void load_ucode_amd_ap(unsigned int family) { } static inline int save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; } static inline void reload_ucode_amd(unsigned int cpu) { } @@ -108,12 +114,12 @@ static inline void exit_amd_microcode(void) { } #endif /* !CONFIG_CPU_SUP_AMD */ #ifdef CONFIG_CPU_SUP_INTEL -void load_ucode_intel_bsp(void); +void load_ucode_intel_bsp(struct early_load_data *ed); void load_ucode_intel_ap(void); void reload_ucode_intel(void); struct microcode_ops *init_intel_microcode(void); #else /* CONFIG_CPU_SUP_INTEL */ -static inline void load_ucode_intel_bsp(void) { } +static inline void load_ucode_intel_bsp(struct early_load_data *ed) { } static inline void load_ucode_intel_ap(void) { } static inline void reload_ucode_intel(void) { } static inline struct microcode_ops *init_intel_microcode(void) { return NULL; } From cea7008190ad65b4aaae6e94667a358d2c10a696 Mon Sep 17 00:00:00 2001 From: Cong Yang Date: Mon, 20 Nov 2023 10:01:09 +0800 Subject: [PATCH 157/234] drm/panel: boe-tv101wum-nl6: Fine tune Himax83102-j02 panel HFP and HBP The refresh reported by modetest is 60.46Hz, and the actual measurement is 60.01Hz, which is outside the expected tolerance. Adjust hporch and pixel clock to fix it. After repair, modetest and actual measurement were all 60.01Hz. Modetest refresh = Pixel CLK/ htotal* vtotal, but measurement frame rate is HS->LP cycle time(Vblanking). Measured frame rate is not only affecte by Htotal/Vtotal/pixel clock, also affected by Lane-num/PixelBit/LineTime /DSI CLK. Assume that the DSI controller could not make the mode that we requested(presumably it's PLL couldn't generate the exact pixel clock?). If you use a different DSI controller, you may need to readjust these parameters. Now this panel looks like it's only used by me on the MTK platform, so let's change this set of parameters. Fixes: 1bc2ef065f13 ("drm/panel: Support for Starry-himax83102-j02 TDDI MIPI-DSI panel") Signed-off-by: Cong Yang Reviewed-by: Douglas Anderson Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20231120020109.3216343-1-yangcong5@huaqin.corp-partner.google.com --- drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c index a287be1aaf70..be8f48e3c1db 100644 --- a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c +++ b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c @@ -1767,11 +1767,11 @@ static const struct panel_desc starry_qfh032011_53g_desc = { }; static const struct drm_display_mode starry_himax83102_j02_default_mode = { - .clock = 161600, + .clock = 162850, .hdisplay = 1200, - .hsync_start = 1200 + 40, - .hsync_end = 1200 + 40 + 20, - .htotal = 1200 + 40 + 20 + 40, + .hsync_start = 1200 + 50, + .hsync_end = 1200 + 50 + 20, + .htotal = 1200 + 50 + 20 + 50, .vdisplay = 1920, .vsync_start = 1920 + 116, .vsync_end = 1920 + 116 + 8, From ab93edb2f94c3c0d5965be3815782472adbe3f52 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 22 Nov 2023 06:11:09 +1000 Subject: [PATCH 158/234] nouveau/gsp: allocate enough space for all channel ids. This probably isn't the ideal fix, but we ended up using chids sparsely, and lots of things rely on indexing into the full range, so just allocate the full range up front. The GSP code fixes 8 channels into a userd page, but we end up using a single userd page per channel so end up sparsely using the range. Fixes a few crashes seen with multiple channels. Link: https://gitlab.freedesktop.org/drm/nouveau/-/issues/277 Signed-off-by: Dave Airlie Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20231121201109.2988516-1-airlied@gmail.com --- drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c index 3adbb05ff587..d088e636edc3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c @@ -539,7 +539,7 @@ r535_fifo_runl_ctor(struct nvkm_fifo *fifo) struct nvkm_runl *runl; struct nvkm_engn *engn; u32 cgids = 2048; - u32 chids = 2048 / CHID_PER_USERD; + u32 chids = 2048; int ret; NV2080_CTRL_FIFO_GET_DEVICE_INFO_TABLE_PARAMS *ctrl; From 0739af07d1d947af27c877f797cb82ceee702515 Mon Sep 17 00:00:00 2001 From: Jose Ignacio Tornos Martinez Date: Mon, 20 Nov 2023 13:06:29 +0100 Subject: [PATCH 159/234] net: usb: ax88179_178a: fix failed operations during ax88179_reset Using generic ASIX Electronics Corp. AX88179 Gigabit Ethernet device, the following test cycle has been implemented: - power on - check logs - shutdown - after detecting the system shutdown, disconnect power - after approximately 60 seconds of sleep, power is restored Running some cycles, sometimes error logs like this appear: kernel: ax88179_178a 2-9:1.0 (unnamed net_device) (uninitialized): Failed to write reg index 0x0001: -19 kernel: ax88179_178a 2-9:1.0 (unnamed net_device) (uninitialized): Failed to read reg index 0x0001: -19 ... These failed operation are happening during ax88179_reset execution, so the initialization could not be correct. In order to avoid this, we need to increase the delay after reset and clock initial operations. By using these larger values, many cycles have been run and no failed operations appear. It would be better to check some status register to verify when the operation has finished, but I do not have found any available information (neither in the public datasheets nor in the manufacturer's driver). The only available information for the necessary delays is the maufacturer's driver (original values) but the proposed values are not enough for the tested devices. Fixes: e2ca90c276e1f ("ax88179_178a: ASIX AX88179_178A USB 3.0/2.0 to gigabit ethernet adapter driver") Reported-by: Herb Wei Tested-by: Herb Wei Signed-off-by: Jose Ignacio Tornos Martinez Link: https://lore.kernel.org/r/20231120120642.54334-1-jtornosm@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/ax88179_178a.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index aff39bf3161d..4ea0e155bb0d 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1583,11 +1583,11 @@ static int ax88179_reset(struct usbnet *dev) *tmp16 = AX_PHYPWR_RSTCTL_IPRL; ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_PHYPWR_RSTCTL, 2, 2, tmp16); - msleep(200); + msleep(500); *tmp = AX_CLK_SELECT_ACS | AX_CLK_SELECT_BCS; ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_CLK_SELECT, 1, 1, tmp); - msleep(100); + msleep(200); /* Ethernet PHY Auto Detach*/ ax88179_auto_detach(dev); From 495ec91b48e489afefb2ad714f0d9b68c3016c6c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 20 Nov 2023 12:01:09 -0800 Subject: [PATCH 160/234] docs: netdev: try to guide people on dealing with silence There has been more than a few threads which went idle before the merge window and now people came back to them and started asking about next steps. We currently tell people to be patient and not to repost too often. Our "not too often", however, is still a few orders of magnitude faster than other subsystems. Or so I feel after hearing people talk about review rates at LPC. Clarify in the doc that if the discussion went idle for a week on netdev, 95% of the time there's no point waiting longer. Link: https://lore.kernel.org/r/20231120200109.620392-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/process/maintainer-netdev.rst | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/Documentation/process/maintainer-netdev.rst b/Documentation/process/maintainer-netdev.rst index 7feacc20835e..84ee60fceef2 100644 --- a/Documentation/process/maintainer-netdev.rst +++ b/Documentation/process/maintainer-netdev.rst @@ -193,9 +193,23 @@ Review timelines Generally speaking, the patches get triaged quickly (in less than 48h). But be patient, if your patch is active in patchwork (i.e. it's listed on the project's patch list) the chances it was missed are close to zero. -Asking the maintainer for status updates on your -patch is a good way to ensure your patch is ignored or pushed to the -bottom of the priority list. + +The high volume of development on netdev makes reviewers move on +from discussions relatively quickly. New comments and replies +are very unlikely to arrive after a week of silence. If a patch +is no longer active in patchwork and the thread went idle for more +than a week - clarify the next steps and/or post the next version. + +For RFC postings specifically, if nobody responded in a week - reviewers +either missed the posting or have no strong opinions. If the code is ready, +repost as a PATCH. + +Emails saying just "ping" or "bump" are considered rude. If you can't figure +out the status of the patch from patchwork or where the discussion has +landed - describe your best guess and ask if it's correct. For example:: + + I don't understand what the next steps are. Person X seems to be unhappy + with A, should I do B and repost the patches? .. _Changes requested: From b6fe6f03716da246b453369f98a553d4ab21447c Mon Sep 17 00:00:00 2001 From: Hao Ge Date: Tue, 21 Nov 2023 09:37:09 +0800 Subject: [PATCH 161/234] dpll: Fix potential msg memleak when genlmsg_put_reply failed We should clean the skb resource if genlmsg_put_reply failed. Fixes: 9d71b54b65b1 ("dpll: netlink: Add DPLL framework base functions") Signed-off-by: Hao Ge Reviewed-by: Vadim Fedorenko Link: https://lore.kernel.org/r/20231121013709.73323-1-gehao@kylinos.cn Signed-off-by: Jakub Kicinski --- drivers/dpll/dpll_netlink.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c index a6dc3997bf5c..442a0ebeb953 100644 --- a/drivers/dpll/dpll_netlink.c +++ b/drivers/dpll/dpll_netlink.c @@ -1093,9 +1093,10 @@ int dpll_nl_pin_id_get_doit(struct sk_buff *skb, struct genl_info *info) return -ENOMEM; hdr = genlmsg_put_reply(msg, info, &dpll_nl_family, 0, DPLL_CMD_PIN_ID_GET); - if (!hdr) + if (!hdr) { + nlmsg_free(msg); return -EMSGSIZE; - + } pin = dpll_pin_find_from_nlattr(info); if (!IS_ERR(pin)) { ret = dpll_msg_add_pin_handle(msg, pin); @@ -1123,8 +1124,10 @@ int dpll_nl_pin_get_doit(struct sk_buff *skb, struct genl_info *info) return -ENOMEM; hdr = genlmsg_put_reply(msg, info, &dpll_nl_family, 0, DPLL_CMD_PIN_GET); - if (!hdr) + if (!hdr) { + nlmsg_free(msg); return -EMSGSIZE; + } ret = dpll_cmd_pin_get_one(msg, pin, info->extack); if (ret) { nlmsg_free(msg); @@ -1256,8 +1259,10 @@ int dpll_nl_device_id_get_doit(struct sk_buff *skb, struct genl_info *info) return -ENOMEM; hdr = genlmsg_put_reply(msg, info, &dpll_nl_family, 0, DPLL_CMD_DEVICE_ID_GET); - if (!hdr) + if (!hdr) { + nlmsg_free(msg); return -EMSGSIZE; + } dpll = dpll_device_find_from_nlattr(info); if (!IS_ERR(dpll)) { @@ -1284,8 +1289,10 @@ int dpll_nl_device_get_doit(struct sk_buff *skb, struct genl_info *info) return -ENOMEM; hdr = genlmsg_put_reply(msg, info, &dpll_nl_family, 0, DPLL_CMD_DEVICE_GET); - if (!hdr) + if (!hdr) { + nlmsg_free(msg); return -EMSGSIZE; + } ret = dpll_device_get_one(dpll, msg, info->extack); if (ret) { From 18286883e779fb79b413a7462968ee3f6768f19c Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 14 Nov 2023 17:59:28 +0100 Subject: [PATCH 162/234] x86/hyperv: Use atomic_try_cmpxchg() to micro-optimize hv_nmi_unknown() Use atomic_try_cmpxchg() instead of atomic_cmpxchg(*ptr, old, new) == old in hv_nmi_unknown(). On x86 the CMPXCHG instruction returns success in the ZF flag, so this change saves a compare after CMPXCHG. The generated asm code improves from: 3e: 65 8b 15 00 00 00 00 mov %gs:0x0(%rip),%edx 45: b8 ff ff ff ff mov $0xffffffff,%eax 4a: f0 0f b1 15 00 00 00 lock cmpxchg %edx,0x0(%rip) 51: 00 52: 83 f8 ff cmp $0xffffffff,%eax 55: 0f 95 c0 setne %al to: 3e: 65 8b 15 00 00 00 00 mov %gs:0x0(%rip),%edx 45: b8 ff ff ff ff mov $0xffffffff,%eax 4a: f0 0f b1 15 00 00 00 lock cmpxchg %edx,0x0(%rip) 51: 00 52: 0f 95 c0 setne %al No functional change intended. Cc: "K. Y. Srinivasan" Cc: Haiyang Zhang Cc: Wei Liu Cc: Dexuan Cui Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: "H. Peter Anvin" Signed-off-by: Uros Bizjak Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20231114170038.381634-1-ubizjak@gmail.com Signed-off-by: Wei Liu Message-ID: <20231114170038.381634-1-ubizjak@gmail.com> --- arch/x86/kernel/cpu/mshyperv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index e6bba12c759c..01fa06dd06b6 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -262,11 +262,14 @@ static uint32_t __init ms_hyperv_platform(void) static int hv_nmi_unknown(unsigned int val, struct pt_regs *regs) { static atomic_t nmi_cpu = ATOMIC_INIT(-1); + unsigned int old_cpu, this_cpu; if (!unknown_nmi_panic) return NMI_DONE; - if (atomic_cmpxchg(&nmi_cpu, -1, raw_smp_processor_id()) != -1) + old_cpu = -1; + this_cpu = raw_smp_processor_id(); + if (!atomic_try_cmpxchg(&nmi_cpu, &old_cpu, this_cpu)) return NMI_HANDLED; return NMI_DONE; From e389fe8b68137344562fb6e4d53d8a89ef6212dd Mon Sep 17 00:00:00 2001 From: Victor Fragoso Date: Tue, 21 Nov 2023 21:05:56 +0000 Subject: [PATCH 163/234] USB: serial: option: add Fibocom L7xx modules Add support for Fibocom L716-EU module series. L716-EU is a Fibocom module based on ZTE's V3E/V3T chipset. Device creates multiple interfaces when connected to PC as follows: - Network Interface: ECM or RNDIS (set by FW or AT Command) - ttyUSB0: AT port - ttyUSB1: Modem port - ttyUSB2: AT2 port - ttyUSB3: Trace port for log information - ADB: ADB port for debugging. ("Driver=usbfs" when ADB server enabled) Here are the outputs of lsusb and usb-devices: $ ls /dev/ttyUSB* /dev/ttyUSB0 /dev/ttyUSB1 /dev/ttyUSB2 /dev/ttyUSB3 usb-devices: L716-EU (ECM mode): T: Bus=03 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 51 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2cb7 ProdID=0001 Rev= 1.00 S: Manufacturer=Fibocom,Incorporated S: Product=Fibocom Mobile Boardband S: SerialNumber=1234567890ABCDEF C:* #Ifs= 7 Cfg#= 1 Atr=e0 MxPwr=500mA A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=06 Prot=00 I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=06 Prot=00 Driver=cdc_ether E: Ad=87(I) Atr=03(Int.) MxPS= 16 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 6 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=usbfs E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms L716-EU (RNDIS mode): T: Bus=03 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 49 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2cb7 ProdID=0001 Rev= 1.00 S: Manufacturer=Fibocom,Incorporated S: Product=Fibocom Mobile Boardband S: SerialNumber=1234567890ABCDEF C:* #Ifs= 7 Cfg#= 1 Atr=e0 MxPwr=500mA A: FirstIf#= 0 IfCount= 2 Cls=e0(wlcon) Sub=01 Prot=03 I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=02 Prot=ff Driver=rndis_host E: Ad=87(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=rndis_host E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 6 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=usbfs E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Victor Fragoso Reviewed-by: Lars Melin Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 9c76095ebfe1..06b9b04c022a 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2250,6 +2250,7 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(4) | RSVD(5) | RSVD(6) }, { USB_DEVICE(0x1782, 0x4d10) }, /* Fibocom L610 (AT mode) */ { USB_DEVICE_INTERFACE_CLASS(0x1782, 0x4d11, 0xff) }, /* Fibocom L610 (ECM/RNDIS mode) */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x0001, 0xff, 0xff, 0xff) }, /* Fibocom L716-EU (ECM/RNDIS mode) */ { USB_DEVICE(0x2cb7, 0x0104), /* Fibocom NL678 series */ .driver_info = RSVD(4) | RSVD(5) }, { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0105, 0xff), /* Fibocom NL678 series */ From 06ae5afce8cc1f7621cc5c7751e449ce20d68af7 Mon Sep 17 00:00:00 2001 From: Denis Benato Date: Fri, 17 Nov 2023 14:15:55 +1300 Subject: [PATCH 164/234] HID: hid-asus: add const to read-only outgoing usb buffer In the function asus_kbd_set_report the parameter buf is read-only as it gets copied in a memory portion suitable for USB transfer, but the parameter is not marked as const: add the missing const and mark const immutable buffers passed to that function. Signed-off-by: Denis Benato Signed-off-by: Luke D. Jones Signed-off-by: Jiri Kosina --- drivers/hid/hid-asus.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index fd61dba88233..b70673a929a1 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c @@ -381,7 +381,7 @@ static int asus_raw_event(struct hid_device *hdev, return 0; } -static int asus_kbd_set_report(struct hid_device *hdev, u8 *buf, size_t buf_size) +static int asus_kbd_set_report(struct hid_device *hdev, const u8 *buf, size_t buf_size) { unsigned char *dmabuf; int ret; @@ -404,7 +404,7 @@ static int asus_kbd_set_report(struct hid_device *hdev, u8 *buf, size_t buf_size static int asus_kbd_init(struct hid_device *hdev) { - u8 buf[] = { FEATURE_KBD_REPORT_ID, 0x41, 0x53, 0x55, 0x53, 0x20, 0x54, + const u8 buf[] = { FEATURE_KBD_REPORT_ID, 0x41, 0x53, 0x55, 0x53, 0x20, 0x54, 0x65, 0x63, 0x68, 0x2e, 0x49, 0x6e, 0x63, 0x2e, 0x00 }; int ret; @@ -418,7 +418,7 @@ static int asus_kbd_init(struct hid_device *hdev) static int asus_kbd_get_functions(struct hid_device *hdev, unsigned char *kbd_func) { - u8 buf[] = { FEATURE_KBD_REPORT_ID, 0x05, 0x20, 0x31, 0x00, 0x08 }; + const u8 buf[] = { FEATURE_KBD_REPORT_ID, 0x05, 0x20, 0x31, 0x00, 0x08 }; u8 *readbuf; int ret; @@ -449,7 +449,7 @@ static int asus_kbd_get_functions(struct hid_device *hdev, static int rog_nkey_led_init(struct hid_device *hdev) { - u8 buf_init_start[] = { FEATURE_KBD_LED_REPORT_ID1, 0xB9 }; + const u8 buf_init_start[] = { FEATURE_KBD_LED_REPORT_ID1, 0xB9 }; u8 buf_init2[] = { FEATURE_KBD_LED_REPORT_ID1, 0x41, 0x53, 0x55, 0x53, 0x20, 0x54, 0x65, 0x63, 0x68, 0x2e, 0x49, 0x6e, 0x63, 0x2e, 0x00 }; u8 buf_init3[] = { FEATURE_KBD_LED_REPORT_ID1, From 546edbd26cff7ae990e480a59150e801a06f77b1 Mon Sep 17 00:00:00 2001 From: Denis Benato Date: Fri, 17 Nov 2023 14:15:56 +1300 Subject: [PATCH 165/234] HID: hid-asus: reset the backlight brightness level on resume Some devices managed by this driver automatically set brightness to 0 before entering a suspended state and reset it back to a default brightness level after the resume: this has the effect of having the kernel report wrong brightness status after a sleep, and on some devices (like the Asus RC71L) that brightness is the intensity of LEDs directly facing the user. Fix the above issue by setting back brightness to the level it had before entering a sleep state. Signed-off-by: Denis Benato Signed-off-by: Luke D. Jones Signed-off-by: Jiri Kosina --- drivers/hid/hid-asus.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index b70673a929a1..78cdfb8b9a7a 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c @@ -1000,6 +1000,24 @@ static int asus_start_multitouch(struct hid_device *hdev) return 0; } +static int __maybe_unused asus_resume(struct hid_device *hdev) { + struct asus_drvdata *drvdata = hid_get_drvdata(hdev); + int ret = 0; + + if (drvdata->kbd_backlight) { + const u8 buf[] = { FEATURE_KBD_REPORT_ID, 0xba, 0xc5, 0xc4, + drvdata->kbd_backlight->cdev.brightness }; + ret = asus_kbd_set_report(hdev, buf, sizeof(buf)); + if (ret < 0) { + hid_err(hdev, "Asus failed to set keyboard backlight: %d\n", ret); + goto asus_resume_err; + } + } + +asus_resume_err: + return ret; +} + static int __maybe_unused asus_reset_resume(struct hid_device *hdev) { struct asus_drvdata *drvdata = hid_get_drvdata(hdev); @@ -1294,6 +1312,7 @@ static struct hid_driver asus_driver = { .input_configured = asus_input_configured, #ifdef CONFIG_PM .reset_resume = asus_reset_resume, + .resume = asus_resume, #endif .event = asus_event, .raw_event = asus_raw_event From 9ffccb691adb854e7b7f3ee57fbbda12ff70533f Mon Sep 17 00:00:00 2001 From: Aoba K Date: Tue, 21 Nov 2023 20:23:11 +0800 Subject: [PATCH 166/234] HID: multitouch: Add quirk for HONOR GLO-GXXX touchpad Honor MagicBook 13 2023 has a touchpad which do not switch to the multitouch mode until the input mode feature is written by the host. The touchpad do report the input mode at touchpad(3), while itself working under mouse mode. As a workaround, it is possible to call MT_QUIRE_FORCE_GET_FEATURE to force set feature in mt_set_input_mode for such device. The touchpad reports as BLTP7853, which cannot retrive any useful manufacture information on the internel by this string at present. As the serial number of the laptop is GLO-G52, while DMI info reports the laptop serial number as GLO-GXXX, this workaround should applied to all models which has the GLO-GXXX. Signed-off-by: Aoba K Signed-off-by: Jiri Kosina --- drivers/hid/hid-multitouch.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index e098cc7b3944..fd5b0637dad6 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -2046,6 +2046,11 @@ static const struct hid_device_id mt_devices[] = { MT_USB_DEVICE(USB_VENDOR_ID_HANVON_ALT, USB_DEVICE_ID_HANVON_ALT_MULTITOUCH) }, + /* HONOR GLO-GXXX panel */ + { .driver_data = MT_CLS_VTL, + HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, + 0x347d, 0x7853) }, + /* Ilitek dual touch panel */ { .driver_data = MT_CLS_NSMU, MT_USB_DEVICE(USB_VENDOR_ID_ILITEK, From 84d2db91f14a32dc856a5972e3f0907089093c7a Mon Sep 17 00:00:00 2001 From: Nguyen Dinh Phi Date: Tue, 21 Nov 2023 15:53:57 +0800 Subject: [PATCH 167/234] nfc: virtual_ncidev: Add variable to check if ndev is running syzbot reported an memory leak that happens when an skb is add to send_buff after virtual nci closed. This patch adds a variable to track if the ndev is running before handling new skb in send function. Signed-off-by: Nguyen Dinh Phi Reported-by: syzbot+6eb09d75211863f15e3e@syzkaller.appspotmail.com Closes: https://lore.kernel.org/lkml/00000000000075472b06007df4fb@google.com Reviewed-by: Bongsu Jeon Reviewed-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- drivers/nfc/virtual_ncidev.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/nfc/virtual_ncidev.c b/drivers/nfc/virtual_ncidev.c index b027be0b0b6f..590b038e449e 100644 --- a/drivers/nfc/virtual_ncidev.c +++ b/drivers/nfc/virtual_ncidev.c @@ -26,10 +26,14 @@ struct virtual_nci_dev { struct mutex mtx; struct sk_buff *send_buff; struct wait_queue_head wq; + bool running; }; static int virtual_nci_open(struct nci_dev *ndev) { + struct virtual_nci_dev *vdev = nci_get_drvdata(ndev); + + vdev->running = true; return 0; } @@ -40,6 +44,7 @@ static int virtual_nci_close(struct nci_dev *ndev) mutex_lock(&vdev->mtx); kfree_skb(vdev->send_buff); vdev->send_buff = NULL; + vdev->running = false; mutex_unlock(&vdev->mtx); return 0; @@ -50,7 +55,7 @@ static int virtual_nci_send(struct nci_dev *ndev, struct sk_buff *skb) struct virtual_nci_dev *vdev = nci_get_drvdata(ndev); mutex_lock(&vdev->mtx); - if (vdev->send_buff) { + if (vdev->send_buff || !vdev->running) { mutex_unlock(&vdev->mtx); kfree_skb(skb); return -1; From e6d71b437abc2f249e3b6a1ae1a7228e09c6e563 Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Wed, 22 Nov 2023 10:37:05 +0800 Subject: [PATCH 168/234] net/smc: avoid data corruption caused by decline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We found a data corruption issue during testing of SMC-R on Redis applications. The benchmark has a low probability of reporting a strange error as shown below. "Error: Protocol error, got "\xe2" as reply type byte" Finally, we found that the retrieved error data was as follows: 0xE2 0xD4 0xC3 0xD9 0x04 0x00 0x2C 0x20 0xA6 0x56 0x00 0x16 0x3E 0x0C 0xCB 0x04 0x02 0x01 0x00 0x00 0x20 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0xE2 It is quite obvious that this is a SMC DECLINE message, which means that the applications received SMC protocol message. We found that this was caused by the following situations: client server ¦ clc proposal -------------> ¦ clc accept <------------- ¦ clc confirm -------------> wait llc confirm send llc confirm ¦failed llc confirm ¦ x------ (after 2s)timeout wait llc confirm rsp wait decline (after 1s) timeout (after 2s) timeout ¦ decline --------------> ¦ decline <-------------- As a result, a decline message was sent in the implementation, and this message was read from TCP by the already-fallback connection. This patch double the client timeout as 2x of the server value, With this simple change, the Decline messages should never cross or collide (during Confirm link timeout). This issue requires an immediate solution, since the protocol updates involve a more long-term solution. Fixes: 0fb0b02bd6fd ("net/smc: adapt SMC client code to use the LLC flow") Signed-off-by: D. Wythe Reviewed-by: Wen Gu Reviewed-by: Wenjia Zhang Signed-off-by: David S. Miller --- net/smc/af_smc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index da97f946b79b..2a1388841951 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -598,8 +598,12 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc) struct smc_llc_qentry *qentry; int rc; - /* receive CONFIRM LINK request from server over RoCE fabric */ - qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME, + /* Receive CONFIRM LINK request from server over RoCE fabric. + * Increasing the client's timeout by twice as much as the server's + * timeout by default can temporarily avoid decline messages of + * both sides crossing or colliding + */ + qentry = smc_llc_wait(link->lgr, NULL, 2 * SMC_LLC_WAIT_TIME, SMC_LLC_CONFIRM_LINK); if (!qentry) { struct smc_clc_msg_decline dclc; From 372ee6a3368ec6ff46ee4e6ff4ffe2fe1e059dbb Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 21 Nov 2023 21:32:05 +0100 Subject: [PATCH 169/234] usb: misc: ljca: Fix enumeration error on Dell Latitude 9420 Not all LJCA chips implement SPI and on chips without SPI reading the SPI descriptors will timeout. On laptop models like the Dell Latitude 9420, this is expected behavior and not an error. Modify the driver to continue without instantiating a SPI auxbus child, instead of failing to probe() the whole LJCA chip. Fixes: acd6199f195d ("usb: Add support for Intel LJCA device") Signed-off-by: Hans de Goede Reviewed-by: Wentong Wu Link: https://lore.kernel.org/r/20231104175104.38786-1-hdegoede@redhat.com Link: https://lore.kernel.org/r/20231121203205.223047-1-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/usb-ljca.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/misc/usb-ljca.c b/drivers/usb/misc/usb-ljca.c index 7f0deebebc13..35770e608c64 100644 --- a/drivers/usb/misc/usb-ljca.c +++ b/drivers/usb/misc/usb-ljca.c @@ -646,10 +646,11 @@ static int ljca_enumerate_spi(struct ljca_adapter *adap) unsigned int i; int ret; + /* Not all LJCA chips implement SPI, a timeout reading the descriptors is normal */ ret = ljca_send(adap, LJCA_CLIENT_MNG, LJCA_MNG_ENUM_SPI, NULL, 0, buf, sizeof(buf), true, LJCA_ENUM_CLIENT_TIMEOUT_MS); if (ret < 0) - return ret; + return (ret == -ETIMEDOUT) ? 0 : ret; /* check firmware response */ desc = (struct ljca_spi_descriptor *)buf; From 6a26310273c323380da21eb23fcfd50e31140913 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 21 Nov 2023 09:09:33 +0100 Subject: [PATCH 170/234] Revert "net: r8169: Disable multicast filter for RTL8168H and RTL8107E" This reverts commit efa5f1311c4998e9e6317c52bc5ee93b3a0f36df. I couldn't reproduce the reported issue. What I did, based on a pcap packet log provided by the reporter: - Used same chip version (RTL8168h) - Set MAC address to the one used on the reporters system - Replayed the EAPOL unicast packet that, according to the reporter, was filtered out by the mc filter. The packet was properly received. Therefore the root cause of the reported issue seems to be somewhere else. Disabling mc filtering completely for the most common chip version is a quite big hammer. Therefore revert the change and wait for further analysis results from the reporter. Cc: stable@vger.kernel.org Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index b9bb1d2f0237..295366a85c63 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2599,9 +2599,7 @@ static void rtl_set_rx_mode(struct net_device *dev) rx_mode &= ~AcceptMulticast; } else if (netdev_mc_count(dev) > MC_FILTER_LIMIT || dev->flags & IFF_ALLMULTI || - tp->mac_version == RTL_GIGA_MAC_VER_35 || - tp->mac_version == RTL_GIGA_MAC_VER_46 || - tp->mac_version == RTL_GIGA_MAC_VER_48) { + tp->mac_version == RTL_GIGA_MAC_VER_35) { /* accept all multicasts */ } else if (netdev_mc_empty(dev)) { rx_mode &= ~AcceptMulticast; From d0c930b745cafde8e7d25d0356c648bca669556a Mon Sep 17 00:00:00 2001 From: Stefan Eichenberger Date: Mon, 13 Nov 2023 15:59:20 +0100 Subject: [PATCH 171/234] dt-bindings: usb: microchip,usb5744: Add second supply The USB5744 has two power supplies one for 3V3 and one for 1V2. Add the second supply to the USB5744 DT binding. Signed-off-by: Stefan Eichenberger Signed-off-by: Francesco Dolcini Acked-by: Conor Dooley Cc: stable Link: https://lore.kernel.org/r/20231113145921.30104-2-francesco@dolcini.it Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/usb/microchip,usb5744.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/usb/microchip,usb5744.yaml b/Documentation/devicetree/bindings/usb/microchip,usb5744.yaml index ff3a1707ef57..6d4cfd943f58 100644 --- a/Documentation/devicetree/bindings/usb/microchip,usb5744.yaml +++ b/Documentation/devicetree/bindings/usb/microchip,usb5744.yaml @@ -36,7 +36,11 @@ properties: vdd-supply: description: - VDD power supply to the hub + 3V3 power supply to the hub + + vdd2-supply: + description: + 1V2 power supply to the hub peer-hub: $ref: /schemas/types.yaml#/definitions/phandle @@ -62,6 +66,7 @@ allOf: properties: reset-gpios: false vdd-supply: false + vdd2-supply: false peer-hub: false i2c-bus: false else: From 6972b38ca05235f6142715db7062ecc87a422e22 Mon Sep 17 00:00:00 2001 From: Stefan Eichenberger Date: Mon, 13 Nov 2023 15:59:21 +0100 Subject: [PATCH 172/234] usb: misc: onboard-hub: add support for Microchip USB5744 Add support for the Microchip USB5744 USB3.0 and USB2.0 Hub. The Microchip USB5744 supports two power supplies, one for 1V2 and one for 3V3. According to the datasheet there is no need for a delay between power on and reset, so this value is set to 0. Signed-off-by: Stefan Eichenberger Signed-off-by: Francesco Dolcini Cc: stable Acked-by: Matthias Kaehlcke Link: https://lore.kernel.org/r/20231113145921.30104-3-francesco@dolcini.it Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/onboard_usb_hub.c | 2 ++ drivers/usb/misc/onboard_usb_hub.h | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/usb/misc/onboard_usb_hub.c b/drivers/usb/misc/onboard_usb_hub.c index a341b2fbb7b4..2b45404e9732 100644 --- a/drivers/usb/misc/onboard_usb_hub.c +++ b/drivers/usb/misc/onboard_usb_hub.c @@ -432,6 +432,8 @@ static const struct usb_device_id onboard_hub_id_table[] = { { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2412) }, /* USB2412 USB 2.0 */ { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2514) }, /* USB2514B USB 2.0 */ { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2517) }, /* USB2517 USB 2.0 */ + { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2744) }, /* USB5744 USB 2.0 */ + { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x5744) }, /* USB5744 USB 3.0 */ { USB_DEVICE(VENDOR_ID_REALTEK, 0x0411) }, /* RTS5411 USB 3.1 */ { USB_DEVICE(VENDOR_ID_REALTEK, 0x5411) }, /* RTS5411 USB 2.1 */ { USB_DEVICE(VENDOR_ID_REALTEK, 0x0414) }, /* RTS5414 USB 3.2 */ diff --git a/drivers/usb/misc/onboard_usb_hub.h b/drivers/usb/misc/onboard_usb_hub.h index c4e24a7b9290..292110e64a1d 100644 --- a/drivers/usb/misc/onboard_usb_hub.h +++ b/drivers/usb/misc/onboard_usb_hub.h @@ -16,6 +16,11 @@ static const struct onboard_hub_pdata microchip_usb424_data = { .num_supplies = 1, }; +static const struct onboard_hub_pdata microchip_usb5744_data = { + .reset_us = 0, + .num_supplies = 2, +}; + static const struct onboard_hub_pdata realtek_rts5411_data = { .reset_us = 0, .num_supplies = 1, @@ -50,6 +55,8 @@ static const struct of_device_id onboard_hub_match[] = { { .compatible = "usb424,2412", .data = µchip_usb424_data, }, { .compatible = "usb424,2514", .data = µchip_usb424_data, }, { .compatible = "usb424,2517", .data = µchip_usb424_data, }, + { .compatible = "usb424,2744", .data = µchip_usb5744_data, }, + { .compatible = "usb424,5744", .data = µchip_usb5744_data, }, { .compatible = "usb451,8140", .data = &ti_tusb8041_data, }, { .compatible = "usb451,8142", .data = &ti_tusb8041_data, }, { .compatible = "usb4b4,6504", .data = &cypress_hx3_data, }, From 0c2671f33a9c975d752216739d6a05cb88e98aa4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 20 Nov 2023 17:16:05 +0100 Subject: [PATCH 173/234] dt-bindings: usb: qcom,dwc3: fix example wakeup interrupt types The DP/DM wakeup interrupts are edge triggered and which edge to trigger on depends on use-case and whether a Low speed or Full/High speed device is connected. Fixes: 3828026c9ec8 ("dt-bindings: usb: qcom,dwc3: Convert USB DWC3 bindings") Signed-off-by: Johan Hovold Acked-by: Krzysztof Kozlowski Reviewed-by: Andrew Halaney Link: https://lore.kernel.org/r/20231120161607.7405-2-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/qcom,dwc3.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/usb/qcom,dwc3.yaml b/Documentation/devicetree/bindings/usb/qcom,dwc3.yaml index e889158ca205..915c8205623b 100644 --- a/Documentation/devicetree/bindings/usb/qcom,dwc3.yaml +++ b/Documentation/devicetree/bindings/usb/qcom,dwc3.yaml @@ -521,8 +521,8 @@ examples: interrupts = , , - , - ; + , + ; interrupt-names = "hs_phy_irq", "ss_phy_irq", "dm_hs_phy_irq", "dp_hs_phy_irq"; From 41f5a0973259db9e4e3c9963d36505f80107d1a0 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 20 Nov 2023 17:16:06 +0100 Subject: [PATCH 174/234] USB: dwc3: qcom: fix wakeup after probe deferral The Qualcomm glue driver is overriding the interrupt trigger types defined by firmware when requesting the wakeup interrupts during probe. This can lead to a failure to map the DP/DM wakeup interrupts after a probe deferral as the firmware defined trigger types do not match the type used for the initial mapping: irq: type mismatch, failed to map hwirq-14 for interrupt-controller@b220000! irq: type mismatch, failed to map hwirq-15 for interrupt-controller@b220000! Fix this by not overriding the firmware provided trigger types when requesting the wakeup interrupts. Fixes: a4333c3a6ba9 ("usb: dwc3: Add Qualcomm DWC3 glue driver") Cc: stable@vger.kernel.org # 4.18 Signed-off-by: Johan Hovold Reviewed-by: Andrew Halaney Link: https://lore.kernel.org/r/20231120161607.7405-3-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-qcom.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index 3de43df6bbe8..cf200342273a 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -549,7 +549,7 @@ static int dwc3_qcom_setup_irq(struct platform_device *pdev) irq_set_status_flags(irq, IRQ_NOAUTOEN); ret = devm_request_threaded_irq(qcom->dev, irq, NULL, qcom_dwc3_resume_irq, - IRQF_TRIGGER_HIGH | IRQF_ONESHOT, + IRQF_ONESHOT, "qcom_dwc3 HS", qcom); if (ret) { dev_err(qcom->dev, "hs_phy_irq failed: %d\n", ret); @@ -564,7 +564,7 @@ static int dwc3_qcom_setup_irq(struct platform_device *pdev) irq_set_status_flags(irq, IRQ_NOAUTOEN); ret = devm_request_threaded_irq(qcom->dev, irq, NULL, qcom_dwc3_resume_irq, - IRQF_TRIGGER_HIGH | IRQF_ONESHOT, + IRQF_ONESHOT, "qcom_dwc3 DP_HS", qcom); if (ret) { dev_err(qcom->dev, "dp_hs_phy_irq failed: %d\n", ret); @@ -579,7 +579,7 @@ static int dwc3_qcom_setup_irq(struct platform_device *pdev) irq_set_status_flags(irq, IRQ_NOAUTOEN); ret = devm_request_threaded_irq(qcom->dev, irq, NULL, qcom_dwc3_resume_irq, - IRQF_TRIGGER_HIGH | IRQF_ONESHOT, + IRQF_ONESHOT, "qcom_dwc3 DM_HS", qcom); if (ret) { dev_err(qcom->dev, "dm_hs_phy_irq failed: %d\n", ret); @@ -594,7 +594,7 @@ static int dwc3_qcom_setup_irq(struct platform_device *pdev) irq_set_status_flags(irq, IRQ_NOAUTOEN); ret = devm_request_threaded_irq(qcom->dev, irq, NULL, qcom_dwc3_resume_irq, - IRQF_TRIGGER_HIGH | IRQF_ONESHOT, + IRQF_ONESHOT, "qcom_dwc3 SS", qcom); if (ret) { dev_err(qcom->dev, "ss_phy_irq failed: %d\n", ret); From aee70a1d711327dae409671035a0368c1dc4a2ea Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 20 Nov 2023 17:16:07 +0100 Subject: [PATCH 175/234] USB: dwc3: qcom: simplify wakeup interrupt setup Use the IRQF_NO_AUTOEN irq flag when requesting the wakeup interrupts instead of setting it separately. No functional change intended. Signed-off-by: Johan Hovold Reviewed-by: Andrew Halaney Link: https://lore.kernel.org/r/20231120161607.7405-4-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-qcom.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index cf200342273a..8a76973f1fa2 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -546,10 +546,9 @@ static int dwc3_qcom_setup_irq(struct platform_device *pdev) pdata ? pdata->hs_phy_irq_index : -1); if (irq > 0) { /* Keep wakeup interrupts disabled until suspend */ - irq_set_status_flags(irq, IRQ_NOAUTOEN); ret = devm_request_threaded_irq(qcom->dev, irq, NULL, qcom_dwc3_resume_irq, - IRQF_ONESHOT, + IRQF_ONESHOT | IRQF_NO_AUTOEN, "qcom_dwc3 HS", qcom); if (ret) { dev_err(qcom->dev, "hs_phy_irq failed: %d\n", ret); @@ -561,10 +560,9 @@ static int dwc3_qcom_setup_irq(struct platform_device *pdev) irq = dwc3_qcom_get_irq(pdev, "dp_hs_phy_irq", pdata ? pdata->dp_hs_phy_irq_index : -1); if (irq > 0) { - irq_set_status_flags(irq, IRQ_NOAUTOEN); ret = devm_request_threaded_irq(qcom->dev, irq, NULL, qcom_dwc3_resume_irq, - IRQF_ONESHOT, + IRQF_ONESHOT | IRQF_NO_AUTOEN, "qcom_dwc3 DP_HS", qcom); if (ret) { dev_err(qcom->dev, "dp_hs_phy_irq failed: %d\n", ret); @@ -576,10 +574,9 @@ static int dwc3_qcom_setup_irq(struct platform_device *pdev) irq = dwc3_qcom_get_irq(pdev, "dm_hs_phy_irq", pdata ? pdata->dm_hs_phy_irq_index : -1); if (irq > 0) { - irq_set_status_flags(irq, IRQ_NOAUTOEN); ret = devm_request_threaded_irq(qcom->dev, irq, NULL, qcom_dwc3_resume_irq, - IRQF_ONESHOT, + IRQF_ONESHOT | IRQF_NO_AUTOEN, "qcom_dwc3 DM_HS", qcom); if (ret) { dev_err(qcom->dev, "dm_hs_phy_irq failed: %d\n", ret); @@ -591,10 +588,9 @@ static int dwc3_qcom_setup_irq(struct platform_device *pdev) irq = dwc3_qcom_get_irq(pdev, "ss_phy_irq", pdata ? pdata->ss_phy_irq_index : -1); if (irq > 0) { - irq_set_status_flags(irq, IRQ_NOAUTOEN); ret = devm_request_threaded_irq(qcom->dev, irq, NULL, qcom_dwc3_resume_irq, - IRQF_ONESHOT, + IRQF_ONESHOT | IRQF_NO_AUTOEN, "qcom_dwc3 SS", qcom); if (ret) { dev_err(qcom->dev, "ss_phy_irq failed: %d\n", ret); From 51392a1879ff06dc21b68aef4825f6ef68a7be42 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 17 Nov 2023 18:36:48 +0100 Subject: [PATCH 176/234] USB: dwc3: qcom: fix resource leaks on probe deferral The driver needs to deregister and free the newly allocated dwc3 core platform device on ACPI probe errors (e.g. probe deferral) and on driver unbind but instead it leaked those resources while erroneously dropping a reference to the parent platform device which is still in use. For OF probing the driver takes a reference to the dwc3 core platform device which has also always been leaked. Fix the broken ACPI tear down and make sure to drop the dwc3 core reference for both OF and ACPI. Fixes: 8fd95da2cfb5 ("usb: dwc3: qcom: Release the correct resources in dwc3_qcom_remove()") Fixes: 2bc02355f8ba ("usb: dwc3: qcom: Add support for booting with ACPI") Fixes: a4333c3a6ba9 ("usb: dwc3: Add Qualcomm DWC3 glue driver") Cc: stable@vger.kernel.org # 4.18 Cc: Christophe JAILLET Cc: Lee Jones Signed-off-by: Johan Hovold Acked-by: Andrew Halaney Link: https://lore.kernel.org/r/20231117173650.21161-2-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-qcom.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index 8a76973f1fa2..313a8ac2bd60 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -754,6 +754,7 @@ static int dwc3_qcom_of_register_core(struct platform_device *pdev) if (!qcom->dwc3) { ret = -ENODEV; dev_err(dev, "failed to get dwc3 platform device\n"); + of_platform_depopulate(dev); } node_put: @@ -895,7 +896,7 @@ static int dwc3_qcom_probe(struct platform_device *pdev) if (ret) { dev_err(dev, "failed to register DWC3 Core, err=%d\n", ret); - goto depopulate; + goto clk_disable; } ret = dwc3_qcom_interconnect_init(qcom); @@ -930,7 +931,8 @@ depopulate: if (np) of_platform_depopulate(&pdev->dev); else - platform_device_put(pdev); + platform_device_del(qcom->dwc3); + platform_device_put(qcom->dwc3); clk_disable: for (i = qcom->num_clocks - 1; i >= 0; i--) { clk_disable_unprepare(qcom->clks[i]); @@ -953,7 +955,8 @@ static void dwc3_qcom_remove(struct platform_device *pdev) if (np) of_platform_depopulate(&pdev->dev); else - platform_device_put(pdev); + platform_device_del(qcom->dwc3); + platform_device_put(qcom->dwc3); for (i = qcom->num_clocks - 1; i >= 0; i--) { clk_disable_unprepare(qcom->clks[i]); From 9feefbf57d92e8ee293dad67585d351c7d0b6e37 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 17 Nov 2023 18:36:49 +0100 Subject: [PATCH 177/234] USB: dwc3: qcom: fix software node leak on probe errors Make sure to remove the software node also on (ACPI) probe errors to avoid leaking the underlying resources. Note that the software node is only used for ACPI probe so the driver unbind tear down is updated to match probe. Fixes: 8dc6e6dd1bee ("usb: dwc3: qcom: Constify the software node") Cc: stable@vger.kernel.org # 5.12 Cc: Heikki Krogerus Signed-off-by: Johan Hovold Acked-by: Heikki Krogerus Acked-by: Andrew Halaney Link: https://lore.kernel.org/r/20231117173650.21161-3-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-qcom.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index 313a8ac2bd60..a9bce4548019 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -928,10 +928,12 @@ static int dwc3_qcom_probe(struct platform_device *pdev) interconnect_exit: dwc3_qcom_interconnect_exit(qcom); depopulate: - if (np) + if (np) { of_platform_depopulate(&pdev->dev); - else + } else { + device_remove_software_node(&qcom->dwc3->dev); platform_device_del(qcom->dwc3); + } platform_device_put(qcom->dwc3); clk_disable: for (i = qcom->num_clocks - 1; i >= 0; i--) { @@ -951,11 +953,12 @@ static void dwc3_qcom_remove(struct platform_device *pdev) struct device *dev = &pdev->dev; int i; - device_remove_software_node(&qcom->dwc3->dev); - if (np) + if (np) { of_platform_depopulate(&pdev->dev); - else + } else { + device_remove_software_node(&qcom->dwc3->dev); platform_device_del(qcom->dwc3); + } platform_device_put(qcom->dwc3); for (i = qcom->num_clocks - 1; i >= 0; i--) { From 9cf87666fc6e08572341fe08ecd909935998fbbd Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 17 Nov 2023 18:36:50 +0100 Subject: [PATCH 178/234] USB: dwc3: qcom: fix ACPI platform device leak Make sure to free the "urs" platform device, which is created for some ACPI platforms, on probe errors and on driver unbind. Compile-tested only. Fixes: c25c210f590e ("usb: dwc3: qcom: add URS Host support for sdm845 ACPI boot") Cc: Shawn Guo Signed-off-by: Johan Hovold Acked-by: Andrew Halaney Acked-by: Shawn Guo Link: https://lore.kernel.org/r/20231117173650.21161-4-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-qcom.c | 37 +++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index a9bce4548019..fdf6d5d3c2ad 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -763,9 +763,9 @@ node_put: return ret; } -static struct platform_device * -dwc3_qcom_create_urs_usb_platdev(struct device *dev) +static struct platform_device *dwc3_qcom_create_urs_usb_platdev(struct device *dev) { + struct platform_device *urs_usb = NULL; struct fwnode_handle *fwh; struct acpi_device *adev; char name[8]; @@ -785,9 +785,26 @@ dwc3_qcom_create_urs_usb_platdev(struct device *dev) adev = to_acpi_device_node(fwh); if (!adev) - return NULL; + goto err_put_handle; - return acpi_create_platform_device(adev, NULL); + urs_usb = acpi_create_platform_device(adev, NULL); + if (IS_ERR_OR_NULL(urs_usb)) + goto err_put_handle; + + return urs_usb; + +err_put_handle: + fwnode_handle_put(fwh); + + return urs_usb; +} + +static void dwc3_qcom_destroy_urs_usb_platdev(struct platform_device *urs_usb) +{ + struct fwnode_handle *fwh = urs_usb->dev.fwnode; + + platform_device_unregister(urs_usb); + fwnode_handle_put(fwh); } static int dwc3_qcom_probe(struct platform_device *pdev) @@ -871,13 +888,13 @@ static int dwc3_qcom_probe(struct platform_device *pdev) qcom->qscratch_base = devm_ioremap_resource(dev, parent_res); if (IS_ERR(qcom->qscratch_base)) { ret = PTR_ERR(qcom->qscratch_base); - goto clk_disable; + goto free_urs; } ret = dwc3_qcom_setup_irq(pdev); if (ret) { dev_err(dev, "failed to setup IRQs, err=%d\n", ret); - goto clk_disable; + goto free_urs; } /* @@ -896,7 +913,7 @@ static int dwc3_qcom_probe(struct platform_device *pdev) if (ret) { dev_err(dev, "failed to register DWC3 Core, err=%d\n", ret); - goto clk_disable; + goto free_urs; } ret = dwc3_qcom_interconnect_init(qcom); @@ -935,6 +952,9 @@ depopulate: platform_device_del(qcom->dwc3); } platform_device_put(qcom->dwc3); +free_urs: + if (qcom->urs_usb) + dwc3_qcom_destroy_urs_usb_platdev(qcom->urs_usb); clk_disable: for (i = qcom->num_clocks - 1; i >= 0; i--) { clk_disable_unprepare(qcom->clks[i]); @@ -961,6 +981,9 @@ static void dwc3_qcom_remove(struct platform_device *pdev) } platform_device_put(qcom->dwc3); + if (qcom->urs_usb) + dwc3_qcom_destroy_urs_usb_platdev(qcom->urs_usb); + for (i = qcom->num_clocks - 1; i >= 0; i--) { clk_disable_unprepare(qcom->clks[i]); clk_put(qcom->clks[i]); From 4711b7b8f99583f6105a33e91f106125134beacb Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 30 Oct 2023 11:41:33 +0100 Subject: [PATCH 179/234] s390/pai: cleanup event initialization Setting event::hw.last_tag to zero is not necessary. The memory for each event is dynamically allocated by the kernel common code and initialized to zero already. Remove this unnecessary assignment. Move the comment to function paicrypt_start() for clarification. Suggested-by: Sumanth Korikkar Acked-by: Sumanth Korikkar Signed-off-by: Thomas Richter Signed-off-by: Alexander Gordeev --- arch/s390/kernel/perf_pai_crypto.c | 11 +++++------ arch/s390/kernel/perf_pai_ext.c | 1 - 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index 77fd24e6cbb6..39a91b00438a 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -279,12 +279,6 @@ static int paicrypt_event_init(struct perf_event *event) if (IS_ERR(cpump)) return PTR_ERR(cpump); - /* Event initialization sets last_tag to 0. When later on the events - * are deleted and re-added, do not reset the event count value to zero. - * Events are added, deleted and re-added when 2 or more events - * are active at the same time. - */ - event->hw.last_tag = 0; event->destroy = paicrypt_event_destroy; if (a->sample_period) { @@ -318,6 +312,11 @@ static void paicrypt_start(struct perf_event *event, int flags) { u64 sum; + /* Event initialization sets last_tag to 0. When later on the events + * are deleted and re-added, do not reset the event count value to zero. + * Events are added, deleted and re-added when 2 or more events + * are active at the same time. + */ if (!event->hw.last_tag) { event->hw.last_tag = 1; sum = paicrypt_getall(event); /* Get current value */ diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index 8ba0f1a3a39d..e7013a2e8960 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -260,7 +260,6 @@ static int paiext_event_init(struct perf_event *event) rc = paiext_alloc(a, event); if (rc) return rc; - event->hw.last_tag = 0; event->destroy = paiext_event_destroy; if (a->sample_period) { From 673752a839694133a328610fcbc54f3d59ae87f3 Mon Sep 17 00:00:00 2001 From: Mikhail Zaslonko Date: Wed, 8 Nov 2023 18:18:52 +0100 Subject: [PATCH 180/234] s390/ipl: add missing IPL_TYPE_ECKD_DUMP case to ipl_init() Add missing IPL_TYPE_ECKD_DUMP case to ipl_init() creating ECKD ipl device attribute group similar to IPL_TYPE_ECKD case. Commit e2d2a2968f2a ("s390/ipl: add eckd dump support") should have had it from the beginning. Fixes: e2d2a2968f2a ("s390/ipl: add eckd dump support") Signed-off-by: Mikhail Zaslonko Reviewed-by: Sven Schnelle Signed-off-by: Alexander Gordeev --- arch/s390/kernel/ipl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index cc364fce6aa9..ba75f6bee774 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -666,6 +666,7 @@ static int __init ipl_init(void) &ipl_ccw_attr_group_lpar); break; case IPL_TYPE_ECKD: + case IPL_TYPE_ECKD_DUMP: rc = sysfs_create_group(&ipl_kset->kobj, &ipl_eckd_attr_group); break; case IPL_TYPE_FCP: From 0a9ace1117bbaa25687468af703b472235f5c210 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 15 Nov 2023 11:39:02 +0100 Subject: [PATCH 181/234] s390: remove odd comment In the meantime hopefully most people got used to forward declarations, therefore remove the explanation. Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/processor.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index dc17896a001a..c15eadbb9983 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -228,7 +228,6 @@ typedef struct thread_struct thread_struct; execve_tail(); \ } while (0) -/* Forward declaration, a strange C thing */ struct task_struct; struct mm_struct; struct seq_file; From aab1f809d7540def24498e81347740a7239a74d5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 20 Nov 2023 13:00:00 +0100 Subject: [PATCH 182/234] scripts/checkstack.pl: match all stack sizes for s390 For some unknown reason the regular expression for checkstack only matches three digit numbers starting with the number "3", or any higher number. Which means that it skips any stack sizes smaller than 304 bytes. This makes the checkstack script a bit less useful than it could be. Change the script to match any number. To be filtered out stack sizes can be configured with the min_stack variable, which omits any stack frame sizes smaller than 100 bytes by default. Tested-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- scripts/checkstack.pl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl index 84f5fb7f1cec..d83ba5d8f3f4 100755 --- a/scripts/checkstack.pl +++ b/scripts/checkstack.pl @@ -97,8 +97,7 @@ my (@stack, $re, $dre, $sub, $x, $xs, $funcre, $min_stack); # 11160: a7 fb ff 60 aghi %r15,-160 # or # 100092: e3 f0 ff c8 ff 71 lay %r15,-56(%r15) - $re = qr/.*(?:lay|ag?hi).*\%r15,-(([0-9]{2}|[3-9])[0-9]{2}) - (?:\(\%r15\))?$/ox; + $re = qr/.*(?:lay|ag?hi).*\%r15,-([0-9]+)(?:\(\%r15\))?$/o; } elsif ($arch eq 'sparc' || $arch eq 'sparc64') { # f0019d10: 9d e3 bf 90 save %sp, -112, %sp $re = qr/.*save.*%sp, -(([0-9]{2}|[3-9])[0-9]{2}), %sp/o; From 3af755a46881c32fecaecfdeaf3a8f0a869deca5 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 21 Nov 2023 09:01:03 +0100 Subject: [PATCH 183/234] nvme: move nvme_stop_keep_alive() back to original position Stopping keep-alive not only stops the keep-alive workqueue, but also needs to be synchronized with I/O termination as we must not send a keep-alive command after all I/O had been terminated. So to avoid any regressions move the call to stop_keep_alive() back to its original position and ensure that keep-alive is correctly stopped failing to setup the admin queue. Fixes: 4733b65d82bd ("nvme: start keep-alive after admin queue setup") Suggested-by: Sagi Grimberg Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 2 +- drivers/nvme/host/fc.c | 19 ++++++++----------- drivers/nvme/host/rdma.c | 1 + drivers/nvme/host/tcp.c | 1 + 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index fd28e6b6574c..46a4c9c5ea96 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -482,7 +482,6 @@ EXPORT_SYMBOL_GPL(nvme_cancel_tagset); void nvme_cancel_admin_tagset(struct nvme_ctrl *ctrl) { - nvme_stop_keep_alive(ctrl); if (ctrl->admin_tagset) { blk_mq_tagset_busy_iter(ctrl->admin_tagset, nvme_cancel_request, ctrl); @@ -4355,6 +4354,7 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl) { nvme_mpath_stop(ctrl); nvme_auth_stop(ctrl); + nvme_stop_keep_alive(ctrl); nvme_stop_failfast_work(ctrl); flush_work(&ctrl->async_event_work); cancel_work_sync(&ctrl->fw_act_work); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 49c3e46eaa1e..9f9a3b35dc64 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2530,12 +2530,6 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) * clean up the admin queue. Same thing as above. */ nvme_quiesce_admin_queue(&ctrl->ctrl); - - /* - * Open-coding nvme_cancel_admin_tagset() as fc - * is not using nvme_cancel_request(). - */ - nvme_stop_keep_alive(&ctrl->ctrl); blk_sync_queue(ctrl->ctrl.admin_q); blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_fc_terminate_exchange, &ctrl->ctrl); @@ -3138,11 +3132,12 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) nvme_unquiesce_admin_queue(&ctrl->ctrl); ret = nvme_init_ctrl_finish(&ctrl->ctrl, false); - if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags)) - ret = -EIO; if (ret) goto out_disconnect_admin_queue; - + if (test_bit(ASSOC_FAILED, &ctrl->flags)) { + ret = -EIO; + goto out_stop_keep_alive; + } /* sanity checks */ /* FC-NVME does not have other data in the capsule */ @@ -3150,7 +3145,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n", ctrl->ctrl.icdoff); ret = NVME_SC_INVALID_FIELD | NVME_SC_DNR; - goto out_disconnect_admin_queue; + goto out_stop_keep_alive; } /* FC-NVME supports normal SGL Data Block Descriptors */ @@ -3158,7 +3153,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) dev_err(ctrl->ctrl.device, "Mandatory sgls are not supported!\n"); ret = NVME_SC_INVALID_FIELD | NVME_SC_DNR; - goto out_disconnect_admin_queue; + goto out_stop_keep_alive; } if (opts->queue_size > ctrl->ctrl.maxcmd) { @@ -3205,6 +3200,8 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) out_term_aen_ops: nvme_fc_term_aen_ops(ctrl); +out_stop_keep_alive: + nvme_stop_keep_alive(&ctrl->ctrl); out_disconnect_admin_queue: dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: create_assoc failed, assoc_id %llx ret %d\n", diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index a7fea4cbacd7..6d178d555920 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1080,6 +1080,7 @@ destroy_io: nvme_rdma_free_io_queues(ctrl); } destroy_admin: + nvme_stop_keep_alive(&ctrl->ctrl); nvme_quiesce_admin_queue(&ctrl->ctrl); blk_sync_queue(ctrl->ctrl.admin_q); nvme_rdma_stop_queue(&ctrl->queues[0]); diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 6ed794815517..ddcd23fb8b75 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2237,6 +2237,7 @@ destroy_io: nvme_tcp_destroy_io_queues(ctrl, new); } destroy_admin: + nvme_stop_keep_alive(ctrl); nvme_tcp_teardown_admin_queue(ctrl, false); return ret; } From 125b0bb95dd6bec81b806b997a4ccb026eeecf8f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 9 Nov 2023 22:22:13 -0800 Subject: [PATCH 184/234] asm-generic: qspinlock: fix queued_spin_value_unlocked() implementation We really don't want to do atomic_read() or anything like that, since we already have the value, not the lock. The whole point of this is that we've loaded the lock from memory, and we want to check whether the value we loaded was a locked one or not. The main use of this is the lockref code, which loads both the lock and the reference count in one atomic operation, and then works on that combined value. With the atomic_read(), the compiler would pointlessly spill the value to the stack, in order to then be able to read it back "atomically". This is the qspinlock version of commit c6f4a9002252 ("asm-generic: ticket-lock: Optimize arch_spin_value_unlocked()") which fixed this same bug for ticket locks. Cc: Guo Ren Cc: Ingo Molnar Cc: Waiman Long Link: https://lore.kernel.org/all/CAHk-=whNRv0v6kQiV5QO6DJhjH4KEL36vWQ6Re8Csrnh4zbRkQ@mail.gmail.com/ Signed-off-by: Linus Torvalds --- include/asm-generic/qspinlock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h index 995513fa2690..0655aa5b57b2 100644 --- a/include/asm-generic/qspinlock.h +++ b/include/asm-generic/qspinlock.h @@ -70,7 +70,7 @@ static __always_inline int queued_spin_is_locked(struct qspinlock *lock) */ static __always_inline int queued_spin_value_unlocked(struct qspinlock lock) { - return !atomic_read(&lock.val); + return !lock.val.counter; } /** From ed17f7da5f0c8b65b7b5f7c98beb0aadbc0546ee Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 20 Nov 2023 10:31:38 -0800 Subject: [PATCH 185/234] xfs: clean up dqblk extraction Since the introduction of xfs_dqblk in V5, xfs really ought to find the dqblk pointer from the dquot buffer, then compute the xfs_disk_dquot pointer from the dqblk pointer. Fix the open-coded xfs_buf_offset calls and do the type checking in the correct order. Note that this has made no practical difference since the start of the xfs_disk_dquot is coincident with the start of the xfs_dqblk. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Chandan Babu R --- fs/xfs/xfs_dquot.c | 5 +++-- fs/xfs/xfs_dquot_item_recover.c | 7 ++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index ac6ba646624d..a013b87ab8d5 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -562,7 +562,8 @@ xfs_dquot_from_disk( struct xfs_dquot *dqp, struct xfs_buf *bp) { - struct xfs_disk_dquot *ddqp = bp->b_addr + dqp->q_bufoffset; + struct xfs_dqblk *dqb = xfs_buf_offset(bp, dqp->q_bufoffset); + struct xfs_disk_dquot *ddqp = &dqb->dd_diskdq; /* * Ensure that we got the type and ID we were looking for. @@ -1250,7 +1251,7 @@ xfs_qm_dqflush( } /* Flush the incore dquot to the ondisk buffer. */ - dqblk = bp->b_addr + dqp->q_bufoffset; + dqblk = xfs_buf_offset(bp, dqp->q_bufoffset); xfs_dquot_to_disk(&dqblk->dd_diskdq, dqp); /* diff --git a/fs/xfs/xfs_dquot_item_recover.c b/fs/xfs/xfs_dquot_item_recover.c index 8966ba842395..db2cb5e4197b 100644 --- a/fs/xfs/xfs_dquot_item_recover.c +++ b/fs/xfs/xfs_dquot_item_recover.c @@ -65,6 +65,7 @@ xlog_recover_dquot_commit_pass2( { struct xfs_mount *mp = log->l_mp; struct xfs_buf *bp; + struct xfs_dqblk *dqb; struct xfs_disk_dquot *ddq, *recddq; struct xfs_dq_logformat *dq_f; xfs_failaddr_t fa; @@ -130,14 +131,14 @@ xlog_recover_dquot_commit_pass2( return error; ASSERT(bp); - ddq = xfs_buf_offset(bp, dq_f->qlf_boffset); + dqb = xfs_buf_offset(bp, dq_f->qlf_boffset); + ddq = &dqb->dd_diskdq; /* * If the dquot has an LSN in it, recover the dquot only if it's less * than the lsn of the transaction we are replaying. */ if (xfs_has_crc(mp)) { - struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddq; xfs_lsn_t lsn = be64_to_cpu(dqb->dd_lsn); if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { @@ -147,7 +148,7 @@ xlog_recover_dquot_commit_pass2( memcpy(ddq, recddq, item->ri_buf[1].i_len); if (xfs_has_crc(mp)) { - xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk), + xfs_update_cksum((char *)dqb, sizeof(struct xfs_dqblk), XFS_DQUOT_CRC_OFF); } From 9c235dfc3d3f901fe22acb20f2ab37ff39f2ce02 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 20 Nov 2023 10:31:44 -0800 Subject: [PATCH 186/234] xfs: dquot recovery does not validate the recovered dquot When we're recovering ondisk quota records from the log, we need to validate the recovered buffer contents before writing them to disk. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Chandan Babu R --- fs/xfs/xfs_dquot_item_recover.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/xfs/xfs_dquot_item_recover.c b/fs/xfs/xfs_dquot_item_recover.c index db2cb5e4197b..2c2720ce6923 100644 --- a/fs/xfs/xfs_dquot_item_recover.c +++ b/fs/xfs/xfs_dquot_item_recover.c @@ -19,6 +19,7 @@ #include "xfs_log.h" #include "xfs_log_priv.h" #include "xfs_log_recover.h" +#include "xfs_error.h" STATIC void xlog_recover_dquot_ra_pass2( @@ -152,6 +153,19 @@ xlog_recover_dquot_commit_pass2( XFS_DQUOT_CRC_OFF); } + /* Validate the recovered dquot. */ + fa = xfs_dqblk_verify(log->l_mp, dqb, dq_f->qlf_id); + if (fa) { + XFS_CORRUPTION_ERROR("Bad dquot after recovery", + XFS_ERRLEVEL_LOW, mp, dqb, + sizeof(struct xfs_dqblk)); + xfs_alert(mp, + "Metadata corruption detected at %pS, dquot 0x%x", + fa, dq_f->qlf_id); + error = -EFSCORRUPTED; + goto out_release; + } + ASSERT(dq_f->qlf_size == 2); ASSERT(bp->b_mount == mp); bp->b_flags |= _XBF_LOGRECOVERY; From acfa60dbe03802d6afd28401aa47801270e82021 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 17 Nov 2023 13:14:22 +0000 Subject: [PATCH 187/234] arm64: mm: Fix "rodata=on" when CONFIG_RODATA_FULL_DEFAULT_ENABLED=y When CONFIG_RODATA_FULL_DEFAULT_ENABLED=y, passing "rodata=on" on the kernel command-line (rather than "rodata=full") should turn off the "full" behaviour, leaving writable linear aliases of read-only kernel memory. Unfortunately, the option has no effect in this situation and the only way to disable the "rodata=full" behaviour is to disable rodata protection entirely by passing "rodata=off". Fix this by parsing the "on" and "off" options in the arch code, additionally enforcing that 'rodata_full' cannot be set without also setting 'rodata_enabled', allowing us to simplify a couple of checks in the process. Fixes: 2e8cff0a0eee ("arm64: fix rodata=full") Cc: Ard Biesheuvel Cc: Mark Rutland Signed-off-by: Will Deacon Reviewed-by: "Russell King (Oracle)" Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20231117131422.29663-1-will@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/setup.h | 17 +++++++++++++++-- arch/arm64/mm/pageattr.c | 7 +++---- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/setup.h b/arch/arm64/include/asm/setup.h index f4af547ef54c..2e4d7da74fb8 100644 --- a/arch/arm64/include/asm/setup.h +++ b/arch/arm64/include/asm/setup.h @@ -21,9 +21,22 @@ static inline bool arch_parse_debug_rodata(char *arg) extern bool rodata_enabled; extern bool rodata_full; - if (arg && !strcmp(arg, "full")) { + if (!arg) + return false; + + if (!strcmp(arg, "full")) { + rodata_enabled = rodata_full = true; + return true; + } + + if (!strcmp(arg, "off")) { + rodata_enabled = rodata_full = false; + return true; + } + + if (!strcmp(arg, "on")) { rodata_enabled = true; - rodata_full = true; + rodata_full = false; return true; } diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 8e2017ba5f1b..924843f1f661 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -29,8 +29,8 @@ bool can_set_direct_map(void) * * KFENCE pool requires page-granular mapping if initialized late. */ - return (rodata_enabled && rodata_full) || debug_pagealloc_enabled() || - arm64_kfence_can_set_direct_map(); + return rodata_full || debug_pagealloc_enabled() || + arm64_kfence_can_set_direct_map(); } static int change_page_range(pte_t *ptep, unsigned long addr, void *data) @@ -105,8 +105,7 @@ static int change_memory_common(unsigned long addr, int numpages, * If we are manipulating read-only permissions, apply the same * change to the linear mapping of the pages that back this VM area. */ - if (rodata_enabled && - rodata_full && (pgprot_val(set_mask) == PTE_RDONLY || + if (rodata_full && (pgprot_val(set_mask) == PTE_RDONLY || pgprot_val(clear_mask) == PTE_RDONLY)) { for (i = 0; i < area->nr_pages; i++) { __change_memory_common((u64)page_address(area->pages[i]), From 4763d635c907baed212664dc579dde1663bb2676 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 21 Nov 2023 18:10:04 -0500 Subject: [PATCH 188/234] eventfs: Use GFP_NOFS for allocation when eventfs_mutex is held If memory reclaim happens, it can reclaim file system pages. The file system pages from eventfs may take the eventfs_mutex on reclaim. This means that allocation while holding the eventfs_mutex must not call into filesystem reclaim. A lockdep splat uncovered this. Link: https://lkml.kernel.org/r/20231121231112.373501894@goodmis.org Cc: Masami Hiramatsu Cc: Andrew Morton Fixes: 28e12c09f5aa0 ("eventfs: Save ownership and mode") Fixes: 5790b1fb3d672 ("eventfs: Remove eventfs_file and just use eventfs_inode") Reported-by: Mark Rutland Reviewed-by: Josef Bacik Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 3eb6c622a74d..56d192f0ead8 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -95,7 +95,7 @@ static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry, if (!(dentry->d_inode->i_mode & S_IFDIR)) { if (!ei->entry_attrs) { ei->entry_attrs = kzalloc(sizeof(*ei->entry_attrs) * ei->nr_entries, - GFP_KERNEL); + GFP_NOFS); if (!ei->entry_attrs) { ret = -ENOMEM; goto out; @@ -627,7 +627,7 @@ static int add_dentries(struct dentry ***dentries, struct dentry *d, int cnt) { struct dentry **tmp; - tmp = krealloc(*dentries, sizeof(d) * (cnt + 2), GFP_KERNEL); + tmp = krealloc(*dentries, sizeof(d) * (cnt + 2), GFP_NOFS); if (!tmp) return -1; tmp[cnt] = d; From bcae32c5632fc0a0dbce46fa731cd23403117e66 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 21 Nov 2023 18:10:05 -0500 Subject: [PATCH 189/234] eventfs: Move taking of inode_lock into dcache_dir_open_wrapper() The both create_file_dentry() and create_dir_dentry() takes a boolean parameter "lookup", as on lookup the inode_lock should already be taken, but for dcache_dir_open_wrapper() it is not taken. There's no reason that the dcache_dir_open_wrapper() can't take the inode_lock before calling these functions. In fact, it's better if it does, as the lock can be held throughout both directory and file creations. This also simplifies the code, and possibly prevents unexpected race conditions when the lock is released. Link: https://lkml.kernel.org/r/20231121231112.528544825@goodmis.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Andrew Morton Fixes: 5790b1fb3d672 ("eventfs: Remove eventfs_file and just use eventfs_inode") Reviewed-by: Josef Bacik Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 56d192f0ead8..590e8176449b 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -347,15 +347,8 @@ create_file_dentry(struct eventfs_inode *ei, int idx, mutex_unlock(&eventfs_mutex); - /* The lookup already has the parent->d_inode locked */ - if (!lookup) - inode_lock(parent->d_inode); - dentry = create_file(name, mode, attr, parent, data, fops); - if (!lookup) - inode_unlock(parent->d_inode); - mutex_lock(&eventfs_mutex); if (IS_ERR_OR_NULL(dentry)) { @@ -453,15 +446,8 @@ create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei, } mutex_unlock(&eventfs_mutex); - /* The lookup already has the parent->d_inode locked */ - if (!lookup) - inode_lock(parent->d_inode); - dentry = create_dir(ei, parent); - if (!lookup) - inode_unlock(parent->d_inode); - mutex_lock(&eventfs_mutex); if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) { @@ -693,6 +679,7 @@ static int dcache_dir_open_wrapper(struct inode *inode, struct file *file) return -ENOMEM; } + inode_lock(parent->d_inode); list_for_each_entry_srcu(ei_child, &ei->children, list, srcu_read_lock_held(&eventfs_srcu)) { d = create_dir_dentry(ei, ei_child, parent, false); @@ -725,6 +712,7 @@ static int dcache_dir_open_wrapper(struct inode *inode, struct file *file) cnt++; } } + inode_unlock(parent->d_inode); srcu_read_unlock(&eventfs_srcu, idx); ret = dcache_dir_open(inode, file); From fc4561226feaad5fcdcb55646c348d77b8ee69c5 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 21 Nov 2023 18:10:06 -0500 Subject: [PATCH 190/234] eventfs: Do not allow NULL parent to eventfs_start_creating() The eventfs directory is dynamically created via the meta data supplied by the existing trace events. All files and directories in eventfs has a parent. Do not allow NULL to be passed into eventfs_start_creating() as the parent because that should never happen. Warn if it does. Link: https://lkml.kernel.org/r/20231121231112.693841807@goodmis.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Andrew Morton Reviewed-by: Josef Bacik Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/inode.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 5b54948514fe..ae648deed019 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -509,20 +509,15 @@ struct dentry *eventfs_start_creating(const char *name, struct dentry *parent) struct dentry *dentry; int error; + /* Must always have a parent. */ + if (WARN_ON_ONCE(!parent)) + return ERR_PTR(-EINVAL); + error = simple_pin_fs(&trace_fs_type, &tracefs_mount, &tracefs_mount_count); if (error) return ERR_PTR(error); - /* - * If the parent is not specified, we create it in the root. - * We need the root dentry to do this, which is in the super - * block. A pointer to that is in the struct vfsmount that we - * have around. - */ - if (!parent) - parent = tracefs_mount->mnt_root; - if (unlikely(IS_DEADDIR(parent->d_inode))) dentry = ERR_PTR(-ENOENT); else From f49f950c217bfb40f11662bab39cb388d41e4cfb Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 21 Nov 2023 18:10:07 -0500 Subject: [PATCH 191/234] eventfs: Make sure that parent->d_inode is locked in creating files/dirs Since the locking of the parent->d_inode has been moved outside the creation of the files and directories (as it use to be locked via a conditional), add a WARN_ON_ONCE() to the case that it's not locked. Link: https://lkml.kernel.org/r/20231121231112.853962542@goodmis.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Andrew Morton Reviewed-by: Josef Bacik Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 590e8176449b..0b90869fd805 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -327,6 +327,8 @@ create_file_dentry(struct eventfs_inode *ei, int idx, struct dentry **e_dentry = &ei->d_children[idx]; struct dentry *dentry; + WARN_ON_ONCE(!inode_is_locked(parent->d_inode)); + mutex_lock(&eventfs_mutex); if (ei->is_freed) { mutex_unlock(&eventfs_mutex); @@ -430,6 +432,8 @@ create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei, { struct dentry *dentry = NULL; + WARN_ON_ONCE(!inode_is_locked(parent->d_inode)); + mutex_lock(&eventfs_mutex); if (pei->is_freed || ei->is_freed) { mutex_unlock(&eventfs_mutex); From 76d9eafff4484547ed9e606c8227ac9799a9f2da Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Wed, 15 Nov 2023 10:50:18 -0500 Subject: [PATCH 192/234] MAINTAINERS: TRACING: Add Mathieu Desnoyers as Reviewer In order to make sure I get CC'd on tracing changes for which my input would be relevant, add my name as reviewer of the TRACING subsystem. Link: https://lore.kernel.org/linux-trace-kernel/20231115155018.8236-1-mathieu.desnoyers@efficios.com Acked-by: Masami Hiramatsu (Google) Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index ea790149af79..a2d4ef4d90f6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22078,6 +22078,7 @@ F: drivers/watchdog/tqmx86_wdt.c TRACING M: Steven Rostedt M: Masami Hiramatsu +R: Mathieu Desnoyers L: linux-kernel@vger.kernel.org L: linux-trace-kernel@vger.kernel.org S: Maintained From d78abcbabe7e98bb4baa4dea87550806944790ed Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 22 Nov 2023 23:47:17 +0100 Subject: [PATCH 193/234] nvme: target: fix nvme_keyring_id() references In configurations without CONFIG_NVME_TARGET_TCP_TLS, the keyring code might not be available, or using it will result in a runtime failure: x86_64-linux-ld: vmlinux.o: in function `nvmet_ports_make': configfs.c:(.text+0x100a211): undefined reference to `nvme_keyring_id' Add a check to ensure we only check the keyring if there is a chance of it being used, which avoids both the runtime and link-time problems. Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20231122224719.4042108-2-arnd@kernel.org Signed-off-by: Jens Axboe --- drivers/nvme/target/configfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 9eed6e6765ea..e307a044b1a1 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -1893,7 +1893,7 @@ static struct config_group *nvmet_ports_make(struct config_group *group, return ERR_PTR(-ENOMEM); } - if (nvme_keyring_id()) { + if (IS_ENABLED(CONFIG_NVME_TARGET_TCP_TLS) && nvme_keyring_id()) { port->keyring = key_lookup(nvme_keyring_id()); if (IS_ERR(port->keyring)) { pr_warn("NVMe keyring not available, disabling TLS\n"); From 65e2a74c44ddfa174b700f5da2d1d29b4ba6639b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 22 Nov 2023 23:47:18 +0100 Subject: [PATCH 194/234] nvme: target: fix Kconfig select statements When the NVME target code is built-in but its TCP frontend is a loadable module, enabling keyring support causes a link failure: x86_64-linux-ld: vmlinux.o: in function `nvmet_ports_make': configfs.c:(.text+0x100a211): undefined reference to `nvme_keyring_id' The problem is that CONFIG_NVME_TARGET_TCP_TLS is a 'bool' symbol that depends on the tristate CONFIG_NVME_TARGET_TCP, so any 'select' from it inherits the state of the tristate symbol rather than the intended CONFIG_NVME_TARGET one that contains the actual call. The same thing is true for CONFIG_KEYS, which itself is required for NVME_KEYRING. Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20231122224719.4042108-3-arnd@kernel.org Signed-off-by: Jens Axboe --- drivers/nvme/target/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index 31633da9427c..e1ebc73f3e5e 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig @@ -4,6 +4,8 @@ config NVME_TARGET tristate "NVMe Target support" depends on BLOCK depends on CONFIGFS_FS + select NVME_KEYRING if NVME_TARGET_TCP_TLS + select KEYS if NVME_TARGET_TCP_TLS select BLK_DEV_INTEGRITY_T10 if BLK_DEV_INTEGRITY select SGL_ALLOC help @@ -87,9 +89,7 @@ config NVME_TARGET_TCP config NVME_TARGET_TCP_TLS bool "NVMe over Fabrics TCP target TLS encryption support" depends on NVME_TARGET_TCP - select NVME_KEYRING select NET_HANDSHAKE - select KEYS help Enables TLS encryption for the NVMe TCP target using the netlink handshake API. From 0e6c4fe782e683ff55a27fbb10e9c6b5c241533b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 22 Nov 2023 23:47:19 +0100 Subject: [PATCH 195/234] nvme: tcp: fix compile-time checks for TLS mode When CONFIG_NVME_KEYRING is enabled as a loadable module, but the TCP host code is built-in, it fails to link: arm-linux-gnueabi-ld: drivers/nvme/host/tcp.o: in function `nvme_tcp_setup_ctrl': tcp.c:(.text+0x1940): undefined reference to `nvme_tls_psk_default' The problem is that the compile-time conditionals are inconsistent here, using a mix of #ifdef CONFIG_NVME_TCP_TLS, IS_ENABLED(CONFIG_NVME_TCP_TLS) and IS_ENABLED(CONFIG_NVME_KEYRING) checks, with CONFIG_NVME_KEYRING controlling whether the implementation is actually built. Change it to use IS_ENABLED(CONFIG_NVME_KEYRING) checks consistently, which should help readability and make it less error-prone. Combining it with the check for the ctrl->opts->tls flag lets the compiler drop all the TLS code in configurations without this feature, which also helps runtime behavior in addition to avoiding the link failure. To make it possible for the compiler to build the dead code, both the tls_handshake_timeout variable and the TLS specific members of nvme_tcp_queue need to be moved out of the #ifdef block as well, but at least the former of these gets optimized out again. Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20231122224719.4042108-4-arnd@kernel.org Signed-off-by: Jens Axboe --- drivers/nvme/host/tcp.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index ddcd23fb8b75..d79811cfa0ce 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -36,11 +36,11 @@ static int so_priority; module_param(so_priority, int, 0644); MODULE_PARM_DESC(so_priority, "nvme tcp socket optimize priority"); -#ifdef CONFIG_NVME_TCP_TLS /* * TLS handshake timeout */ static int tls_handshake_timeout = 10; +#ifdef CONFIG_NVME_TCP_TLS module_param(tls_handshake_timeout, int, 0644); MODULE_PARM_DESC(tls_handshake_timeout, "nvme TLS handshake timeout in seconds (default 10)"); @@ -161,10 +161,8 @@ struct nvme_tcp_queue { struct ahash_request *snd_hash; __le32 exp_ddgst; __le32 recv_ddgst; -#ifdef CONFIG_NVME_TCP_TLS struct completion tls_complete; int tls_err; -#endif struct page_frag_cache pf_cache; void (*state_change)(struct sock *); @@ -207,6 +205,14 @@ static inline int nvme_tcp_queue_id(struct nvme_tcp_queue *queue) return queue - queue->ctrl->queues; } +static inline bool nvme_tcp_tls(struct nvme_ctrl *ctrl) +{ + if (!IS_ENABLED(CONFIG_NVME_TCP_TLS)) + return 0; + + return ctrl->opts->tls; +} + static inline struct blk_mq_tags *nvme_tcp_tagset(struct nvme_tcp_queue *queue) { u32 queue_idx = nvme_tcp_queue_id(queue); @@ -1412,7 +1418,7 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue) memset(&msg, 0, sizeof(msg)); iov.iov_base = icresp; iov.iov_len = sizeof(*icresp); - if (queue->ctrl->ctrl.opts->tls) { + if (nvme_tcp_tls(&queue->ctrl->ctrl)) { msg.msg_control = cbuf; msg.msg_controllen = sizeof(cbuf); } @@ -1424,7 +1430,7 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue) goto free_icresp; } ret = -ENOTCONN; - if (queue->ctrl->ctrl.opts->tls) { + if (nvme_tcp_tls(&queue->ctrl->ctrl)) { ctype = tls_get_record_type(queue->sock->sk, (struct cmsghdr *)cbuf); if (ctype != TLS_RECORD_TYPE_DATA) { @@ -1548,7 +1554,6 @@ static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue) queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false); } -#ifdef CONFIG_NVME_TCP_TLS static void nvme_tcp_tls_done(void *data, int status, key_serial_t pskid) { struct nvme_tcp_queue *queue = data; @@ -1625,14 +1630,6 @@ static int nvme_tcp_start_tls(struct nvme_ctrl *nctrl, } return ret; } -#else -static int nvme_tcp_start_tls(struct nvme_ctrl *nctrl, - struct nvme_tcp_queue *queue, - key_serial_t pskid) -{ - return -EPROTONOSUPPORT; -} -#endif static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid, key_serial_t pskid) @@ -1759,7 +1756,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid, } /* If PSKs are configured try to start TLS */ - if (pskid) { + if (IS_ENABLED(CONFIG_NVME_TCP_TLS) && pskid) { ret = nvme_tcp_start_tls(nctrl, queue, pskid); if (ret) goto err_init_connect; @@ -1916,7 +1913,7 @@ static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl) int ret; key_serial_t pskid = 0; - if (IS_ENABLED(CONFIG_NVME_TCP_TLS) && ctrl->opts->tls) { + if (nvme_tcp_tls(ctrl)) { if (ctrl->opts->tls_key) pskid = key_serial(ctrl->opts->tls_key); else @@ -1949,7 +1946,7 @@ static int __nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) { int i, ret; - if (ctrl->opts->tls && !ctrl->tls_key) { + if (nvme_tcp_tls(ctrl) && !ctrl->tls_key) { dev_err(ctrl->device, "no PSK negotiated\n"); return -ENOKEY; } From 99360d9620f09fb8bc15548d855011bbb198c680 Mon Sep 17 00:00:00 2001 From: Lech Perczak Date: Sat, 18 Nov 2023 00:19:18 +0100 Subject: [PATCH 196/234] net: usb: qmi_wwan: claim interface 4 for ZTE MF290 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Interface 4 is used by for QMI interface in stock firmware of MF28D, the router which uses MF290 modem. Rebind it to qmi_wwan after freeing it up from option driver. The proper configuration is: Interface mapping is: 0: QCDM, 1: (unknown), 2: AT (PCUI), 2: AT (Modem), 4: QMI T: Bus=01 Lev=02 Prnt=02 Port=00 Cnt=01 Dev#= 4 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=19d2 ProdID=0189 Rev= 0.00 S: Manufacturer=ZTE, Incorporated S: Product=ZTE LTE Technologies MSM C:* #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=84(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=86(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms Cc: Bjørn Mork Signed-off-by: Lech Perczak Link: https://lore.kernel.org/r/20231117231918.100278-3-lech.perczak@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 344af3c5c836..e2e181378f41 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1289,6 +1289,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x19d2, 0x0168, 4)}, {QMI_FIXED_INTF(0x19d2, 0x0176, 3)}, {QMI_FIXED_INTF(0x19d2, 0x0178, 3)}, + {QMI_FIXED_INTF(0x19d2, 0x0189, 4)}, /* ZTE MF290 */ {QMI_FIXED_INTF(0x19d2, 0x0191, 4)}, /* ZTE EuFi890 */ {QMI_FIXED_INTF(0x19d2, 0x0199, 1)}, /* ZTE MF820S */ {QMI_FIXED_INTF(0x19d2, 0x0200, 1)}, From 7bf9a6b46549852a37e6d07e52c601c3c706b562 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 22 Nov 2023 15:07:41 -0800 Subject: [PATCH 197/234] arm/xen: fix xen_vcpu_info allocation alignment xen_vcpu_info is a percpu area than needs to be mapped by Xen. Currently, it could cross a page boundary resulting in Xen being unable to map it: [ 0.567318] kernel BUG at arch/arm64/xen/../../arm/xen/enlighten.c:164! [ 0.574002] Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP Fix the issue by using __alloc_percpu and requesting alignment for the memory allocation. Signed-off-by: Stefano Stabellini Link: https://lore.kernel.org/r/alpine.DEB.2.22.394.2311221501340.2053963@ubuntu-linux-20-04-desktop Fixes: 24d5373dda7c ("arm/xen: Use alloc_percpu rather than __alloc_percpu") Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross --- arch/arm/xen/enlighten.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 9afdc4c4a5dc..a395b6c0aae2 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -484,7 +484,8 @@ static int __init xen_guest_init(void) * for secondary CPUs as they are brought up. * For uniformity we use VCPUOP_register_vcpu_info even on cpu0. */ - xen_vcpu_info = alloc_percpu(struct vcpu_info); + xen_vcpu_info = __alloc_percpu(sizeof(struct vcpu_info), + 1 << fls(sizeof(struct vcpu_info) - 1)); if (xen_vcpu_info == NULL) return -ENOMEM; From 4aa1d8f89b10cdc25a231dabf808d8935e0b137a Mon Sep 17 00:00:00 2001 From: Suman Ghosh Date: Tue, 21 Nov 2023 22:26:24 +0530 Subject: [PATCH 198/234] octeontx2-pf: Fix ntuple rule creation to direct packet to VF with higher Rx queue than its PF It is possible to add a ntuple rule which would like to direct packet to a VF whose number of queues are greater/less than its PF's queue numbers. For example a PF can have 2 Rx queues but a VF created on that PF can have 8 Rx queues. As of today, ntuple rule will reject rule because it is checking the requested queue number against PF's number of Rx queues. As a part of this fix if the action of a ntuple rule is to move a packet to a VF's queue then the check is removed. Also, a debug information is printed to aware user that it is user's responsibility to cross check if the requested queue number on that VF is a valid one. Fixes: f0a1913f8a6f ("octeontx2-pf: Add support for ethtool ntuple filters") Signed-off-by: Suman Ghosh Reviewed-by: Wojciech Drewek Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20231121165624.3664182-1-sumang@marvell.com Signed-off-by: Paolo Abeni --- .../marvell/octeontx2/nic/otx2_flows.c | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c index 4762dbea64a1..97a71e9b8563 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c @@ -1088,6 +1088,7 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc) struct ethhdr *eth_hdr; bool new = false; int err = 0; + u64 vf_num; u32 ring; if (!flow_cfg->max_flows) { @@ -1100,7 +1101,21 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc) if (!(pfvf->flags & OTX2_FLAG_NTUPLE_SUPPORT)) return -ENOMEM; - if (ring >= pfvf->hw.rx_queues && fsp->ring_cookie != RX_CLS_FLOW_DISC) + /* Number of queues on a VF can be greater or less than + * the PF's queue. Hence no need to check for the + * queue count. Hence no need to check queue count if PF + * is installing for its VF. Below is the expected vf_num value + * based on the ethtool commands. + * + * e.g. + * 1. ethtool -U ... action -1 ==> vf_num:255 + * 2. ethtool -U ... action ==> vf_num:0 + * 3. ethtool -U ... vf queue ==> + * vf_num:vf_idx+1 + */ + vf_num = ethtool_get_flow_spec_ring_vf(fsp->ring_cookie); + if (!is_otx2_vf(pfvf->pcifunc) && !vf_num && + ring >= pfvf->hw.rx_queues && fsp->ring_cookie != RX_CLS_FLOW_DISC) return -EINVAL; if (fsp->location >= otx2_get_maxflows(flow_cfg)) @@ -1182,6 +1197,9 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc) flow_cfg->nr_flows++; } + if (flow->is_vf) + netdev_info(pfvf->netdev, + "Make sure that VF's queue number is within its queue limit\n"); return 0; } From 818ad9cc90d4a7165caaee7e32800c50d0564ec3 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 21 Nov 2023 20:08:44 +0100 Subject: [PATCH 199/234] net: veth: fix ethtool stats reporting Fix a possible misalignment between page_pool stats and tx xdp_stats reported in veth_get_ethtool_stats routine. The issue can be reproduced configuring the veth pair with the following tx/rx queues: $ip link add v0 numtxqueues 2 numrxqueues 4 type veth peer name v1 \ numtxqueues 1 numrxqueues 1 and loading a simple XDP program on v0 that just returns XDP_PASS. In this case on v0 the page_pool stats overwrites tx xdp_stats for queue 1. Fix the issue incrementing pp_idx of dev->real_num_tx_queues * VETH_TQ_STATS_LEN since we always report xdp_stats for all tx queues in ethtool. Fixes: 4fc418053ec7 ("net: veth: add page_pool stats") Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/c5b5d0485016836448453f12846c7c4ab75b094a.1700593593.git.lorenzo@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/veth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 6cc352296c67..57efb3454c57 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -236,8 +236,8 @@ static void veth_get_ethtool_stats(struct net_device *dev, data[tx_idx + j] += *(u64 *)(base + offset); } } while (u64_stats_fetch_retry(&rq_stats->syncp, start)); - pp_idx = tx_idx + VETH_TQ_STATS_LEN; } + pp_idx = idx + dev->real_num_tx_queues * VETH_TQ_STATS_LEN; page_pool_stats: veth_get_page_pool_stats(dev, &data[pp_idx]); From 676ec53844cbdf2f47e68a076cdff7f0ec6cbe3f Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Wed, 22 Nov 2023 00:44:33 +0530 Subject: [PATCH 200/234] amd-xgbe: handle corner-case during sfp hotplug Force the mode change for SFI in Fixed PHY configurations. Fixed PHY configurations needs PLL to be enabled while doing mode set. When the SFP module isn't connected during boot, driver assumes AN is ON and attempts auto-negotiation. However, if the connected SFP comes up in Fixed PHY configuration the link will not come up as PLL isn't enabled while the initial mode set command is issued. So, force the mode change for SFI in Fixed PHY configuration to fix link issues. Fixes: e57f7a3feaef ("amd-xgbe: Prepare for working with more than one type of phy") Acked-by: Shyam Sundar S K Signed-off-by: Raju Rangoju Reviewed-by: Wojciech Drewek Signed-off-by: Paolo Abeni --- drivers/net/ethernet/amd/xgbe/xgbe-mdio.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c index 32d2c6fac652..4a2dc705b528 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c @@ -1193,7 +1193,19 @@ static int xgbe_phy_config_fixed(struct xgbe_prv_data *pdata) if (pdata->phy.duplex != DUPLEX_FULL) return -EINVAL; - xgbe_set_mode(pdata, mode); + /* Force the mode change for SFI in Fixed PHY config. + * Fixed PHY configs needs PLL to be enabled while doing mode set. + * When the SFP module isn't connected during boot, driver assumes + * AN is ON and attempts autonegotiation. However, if the connected + * SFP comes up in Fixed PHY config, the link will not come up as + * PLL isn't enabled while the initial mode set command is issued. + * So, force the mode change for SFI in Fixed PHY configuration to + * fix link issues. + */ + if (mode == XGBE_MODE_SFI) + xgbe_change_mode(pdata, mode); + else + xgbe_set_mode(pdata, mode); return 0; } From 7121205d5330c6a3cb3379348886d47c77b78d06 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Wed, 22 Nov 2023 00:44:34 +0530 Subject: [PATCH 201/234] amd-xgbe: handle the corner-case during tx completion The existing implementation uses software logic to accumulate tx completions until the specified time (1ms) is met and then poll them. However, there exists a tiny gap which leads to a race between resetting and checking the tx_activate flag. Due to this the tx completions are not reported to upper layer and tx queue timeout kicks-in restarting the device. To address this, introduce a tx cleanup mechanism as part of the periodic maintenance process. Fixes: c5aa9e3b8156 ("amd-xgbe: Initial AMD 10GbE platform driver") Acked-by: Shyam Sundar S K Signed-off-by: Raju Rangoju Reviewed-by: Wojciech Drewek Signed-off-by: Paolo Abeni --- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 614c0278419b..6b73648b3779 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -682,10 +682,24 @@ static void xgbe_service(struct work_struct *work) static void xgbe_service_timer(struct timer_list *t) { struct xgbe_prv_data *pdata = from_timer(pdata, t, service_timer); + struct xgbe_channel *channel; + unsigned int i; queue_work(pdata->dev_workqueue, &pdata->service_work); mod_timer(&pdata->service_timer, jiffies + HZ); + + if (!pdata->tx_usecs) + return; + + for (i = 0; i < pdata->channel_count; i++) { + channel = pdata->channel[i]; + if (!channel->tx_ring || channel->tx_timer_active) + break; + channel->tx_timer_active = 1; + mod_timer(&channel->tx_timer, + jiffies + usecs_to_jiffies(pdata->tx_usecs)); + } } static void xgbe_init_timers(struct xgbe_prv_data *pdata) From 7a2323ac24a50311f64a3a9b54ed5bef5821ecae Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Wed, 22 Nov 2023 00:44:35 +0530 Subject: [PATCH 202/234] amd-xgbe: propagate the correct speed and duplex status xgbe_get_link_ksettings() does not propagate correct speed and duplex information to ethtool during cable unplug. Due to which ethtool reports incorrect values for speed and duplex. Address this by propagating correct information. Fixes: 7c12aa08779c ("amd-xgbe: Move the PHY support into amd-xgbe") Acked-by: Shyam Sundar S K Signed-off-by: Raju Rangoju Reviewed-by: Wojciech Drewek Signed-off-by: Paolo Abeni --- drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c index 6e83ff59172a..32fab5e77246 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c @@ -314,10 +314,15 @@ static int xgbe_get_link_ksettings(struct net_device *netdev, cmd->base.phy_address = pdata->phy.address; - cmd->base.autoneg = pdata->phy.autoneg; - cmd->base.speed = pdata->phy.speed; - cmd->base.duplex = pdata->phy.duplex; + if (netif_carrier_ok(netdev)) { + cmd->base.speed = pdata->phy.speed; + cmd->base.duplex = pdata->phy.duplex; + } else { + cmd->base.speed = SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; + } + cmd->base.autoneg = pdata->phy.autoneg; cmd->base.port = PORT_NONE; XGBE_LM_COPY(cmd, supported, lks, supported); From 460e462d22542adfafd8a5bc979437df73f1cbf3 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 16 Nov 2023 12:52:29 +0000 Subject: [PATCH 203/234] kselftest/arm64: Fix output formatting for za-fork The za-fork test does not output a newline when reporting the result of the one test it runs, causing the counts printed by kselftest to be included in the test name. Add the newline. Fixes: 266679ffd867 ("kselftest/arm64: Convert za-fork to use kselftest.h") Cc: # 6.4.x Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20231116-arm64-fix-za-fork-output-v1-1-42c03d4f5759@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/za-fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/fp/za-fork.c b/tools/testing/selftests/arm64/fp/za-fork.c index b86cb1049497..587b94648222 100644 --- a/tools/testing/selftests/arm64/fp/za-fork.c +++ b/tools/testing/selftests/arm64/fp/za-fork.c @@ -85,7 +85,7 @@ int main(int argc, char **argv) */ ret = open("/proc/sys/abi/sme_default_vector_length", O_RDONLY, 0); if (ret >= 0) { - ksft_test_result(fork_test(), "fork_test"); + ksft_test_result(fork_test(), "fork_test\n"); } else { ksft_print_msg("SME not supported\n"); From 0ffb08b1a45bd6b7694e01da0e1d9e3e788418fb Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 21 Nov 2023 13:12:55 -0800 Subject: [PATCH 204/234] ice: remove ptp_tx ring parameter flag Before performing a Tx timestamp in ice_stamp(), the driver checks a ptp_tx ring variable to see if timestamping is enabled on that ring. This value is set for all rings whenever userspace configures Tx timestamping. Ostensibly this was done to avoid wasting cycles checking other fields when timestamping has not been enabled. However, for Tx timestamps we already get an individual per-SKB flag indicating whether userspace wants to request a timestamp on that packet. We do not gain much by also having a separate flag to check for whether timestamping was enabled. In fact, the driver currently fails to restore the field after a PF reset. Because of this, if a PF reset occurs, timestamps will be disabled. Since this flag doesn't add value in the hotpath, remove it and always provide a timestamp if the SKB flag has been set. A following change will fix the reset path to properly restore user timestamping configuration completely. This went unnoticed for some time because one of the most common applications using Tx timestamps, ptp4l, will reconfigure the socket as part of its fault recovery logic. Fixes: ea9b847cda64 ("ice: enable transmit timestamps for E810 devices") Signed-off-by: Jacob Keller Reviewed-by: Jesse Brandeburg Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Paolo Abeni --- drivers/net/ethernet/intel/ice/ice_ptp.c | 14 -------------- drivers/net/ethernet/intel/ice/ice_txrx.c | 3 --- drivers/net/ethernet/intel/ice/ice_txrx.h | 1 - 3 files changed, 18 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 1eddcbe89b0c..affd90622a68 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -280,20 +280,6 @@ static void ice_ptp_configure_tx_tstamp(struct ice_pf *pf, bool on) */ static void ice_set_tx_tstamp(struct ice_pf *pf, bool on) { - struct ice_vsi *vsi; - u16 i; - - vsi = ice_get_main_vsi(pf); - if (!vsi) - return; - - /* Set the timestamp enable flag for all the Tx rings */ - ice_for_each_txq(vsi, i) { - if (!vsi->tx_rings[i]) - continue; - vsi->tx_rings[i]->ptp_tx = on; - } - if (pf->ptp.tx_interrupt_mode == ICE_PTP_TX_INTERRUPT_SELF) ice_ptp_configure_tx_tstamp(pf, on); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 52d0a126eb61..9e97ea863068 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -2306,9 +2306,6 @@ ice_tstamp(struct ice_tx_ring *tx_ring, struct sk_buff *skb, if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))) return; - if (!tx_ring->ptp_tx) - return; - /* Tx timestamps cannot be sampled when doing TSO */ if (first->tx_flags & ICE_TX_FLAGS_TSO) return; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 166413fc33f4..daf7b9dbb143 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -380,7 +380,6 @@ struct ice_tx_ring { #define ICE_TX_FLAGS_RING_VLAN_L2TAG2 BIT(2) u8 flags; u8 dcb_tc; /* Traffic class of ring */ - u8 ptp_tx; } ____cacheline_internodealigned_in_smp; static inline bool ice_ring_uses_build_skb(struct ice_rx_ring *ring) From 7d606a1e2d0575b6c3a2600f43f90d1e409f9661 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 21 Nov 2023 13:12:56 -0800 Subject: [PATCH 205/234] ice: unify logic for programming PFINT_TSYN_MSK Commit d938a8cca88a ("ice: Auxbus devices & driver for E822 TS") modified how Tx timestamps are handled for E822 devices. On these devices, only the clock owner handles reading the Tx timestamp data from firmware. To do this, the PFINT_TSYN_MSK register is modified from the default value to one which enables reacting to a Tx timestamp on all PHY ports. The driver currently programs PFINT_TSYN_MSK in different places depending on whether the port is the clock owner or not. For the clock owner, the PFINT_TSYN_MSK value is programmed during ice_ptp_init_owner just before calling ice_ptp_tx_ena_intr to program the PHY ports. For the non-clock owner ports, the PFINT_TSYN_MSK is programmed during ice_ptp_init_port. If a large enough device reset occurs, the PFINT_TSYN_MSK register will be reset to the default value in which only the PHY associated directly with the PF will cause the Tx timestamp interrupt to trigger. The driver lacks logic to reprogram the PFINT_TSYN_MSK register after a device reset. For the E822 device, this results in the PF no longer responding to interrupts for other ports. This results in failure to deliver Tx timestamps to user space applications. Rename ice_ptp_configure_tx_tstamp to ice_ptp_cfg_tx_interrupt, and unify the logic for programming PFINT_TSYN_MSK and PFINT_OICR_ENA into one place. This function will program both registers according to the combination of user configuration and device requirements. This ensures that PFINT_TSYN_MSK is always restored when we configure the Tx timestamp interrupt. Fixes: d938a8cca88a ("ice: Auxbus devices & driver for E822 TS") Signed-off-by: Jacob Keller Reviewed-by: Jesse Brandeburg Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Paolo Abeni --- drivers/net/ethernet/intel/ice/ice_ptp.c | 60 ++++++++++++++---------- 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index affd90622a68..624d05b4bbd9 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -256,21 +256,42 @@ ice_verify_pin_e810t(struct ptp_clock_info *info, unsigned int pin, } /** - * ice_ptp_configure_tx_tstamp - Enable or disable Tx timestamp interrupt - * @pf: The PF pointer to search in - * @on: bool value for whether timestamp interrupt is enabled or disabled + * ice_ptp_cfg_tx_interrupt - Configure Tx timestamp interrupt for the device + * @pf: Board private structure + * + * Program the device to respond appropriately to the Tx timestamp interrupt + * cause. */ -static void ice_ptp_configure_tx_tstamp(struct ice_pf *pf, bool on) +static void ice_ptp_cfg_tx_interrupt(struct ice_pf *pf) { + struct ice_hw *hw = &pf->hw; + bool enable; u32 val; + switch (pf->ptp.tx_interrupt_mode) { + case ICE_PTP_TX_INTERRUPT_ALL: + /* React to interrupts across all quads. */ + wr32(hw, PFINT_TSYN_MSK + (0x4 * hw->pf_id), (u32)0x1f); + enable = true; + break; + case ICE_PTP_TX_INTERRUPT_NONE: + /* Do not react to interrupts on any quad. */ + wr32(hw, PFINT_TSYN_MSK + (0x4 * hw->pf_id), (u32)0x0); + enable = false; + break; + case ICE_PTP_TX_INTERRUPT_SELF: + default: + enable = pf->ptp.tstamp_config.tx_type == HWTSTAMP_TX_ON; + break; + } + /* Configure the Tx timestamp interrupt */ - val = rd32(&pf->hw, PFINT_OICR_ENA); - if (on) + val = rd32(hw, PFINT_OICR_ENA); + if (enable) val |= PFINT_OICR_TSYN_TX_M; else val &= ~PFINT_OICR_TSYN_TX_M; - wr32(&pf->hw, PFINT_OICR_ENA, val); + wr32(hw, PFINT_OICR_ENA, val); } /** @@ -280,10 +301,9 @@ static void ice_ptp_configure_tx_tstamp(struct ice_pf *pf, bool on) */ static void ice_set_tx_tstamp(struct ice_pf *pf, bool on) { - if (pf->ptp.tx_interrupt_mode == ICE_PTP_TX_INTERRUPT_SELF) - ice_ptp_configure_tx_tstamp(pf, on); - pf->ptp.tstamp_config.tx_type = on ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; + + ice_ptp_cfg_tx_interrupt(pf); } /** @@ -2789,15 +2809,7 @@ static int ice_ptp_init_owner(struct ice_pf *pf) /* Release the global hardware lock */ ice_ptp_unlock(hw); - if (pf->ptp.tx_interrupt_mode == ICE_PTP_TX_INTERRUPT_ALL) { - /* The clock owner for this device type handles the timestamp - * interrupt for all ports. - */ - ice_ptp_configure_tx_tstamp(pf, true); - - /* React on all quads interrupts for E82x */ - wr32(hw, PFINT_TSYN_MSK + (0x4 * hw->pf_id), (u32)0x1f); - + if (!ice_is_e810(hw)) { /* Enable quad interrupts */ err = ice_ptp_tx_ena_intr(pf, true, itr); if (err) @@ -2867,13 +2879,6 @@ static int ice_ptp_init_port(struct ice_pf *pf, struct ice_ptp_port *ptp_port) case ICE_PHY_E810: return ice_ptp_init_tx_e810(pf, &ptp_port->tx); case ICE_PHY_E822: - /* Non-owner PFs don't react to any interrupts on E82x, - * neither on own quad nor on others - */ - if (!ice_ptp_pf_handles_tx_interrupt(pf)) { - ice_ptp_configure_tx_tstamp(pf, false); - wr32(hw, PFINT_TSYN_MSK + (0x4 * hw->pf_id), (u32)0x0); - } kthread_init_delayed_work(&ptp_port->ov_work, ice_ptp_wait_for_offsets); @@ -3018,6 +3023,9 @@ void ice_ptp_init(struct ice_pf *pf) /* Start the PHY timestamping block */ ice_ptp_reset_phy_timestamping(pf); + /* Configure initial Tx interrupt settings */ + ice_ptp_cfg_tx_interrupt(pf); + set_bit(ICE_FLAG_PTP, pf->flags); err = ice_ptp_init_work(pf, ptp); if (err) From 7758017911a4f2578d54c318e8fe77bcb5899054 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 21 Nov 2023 13:12:57 -0800 Subject: [PATCH 206/234] ice: restore timestamp configuration after device reset The driver calls ice_ptp_cfg_timestamp() during ice_ptp_prepare_for_reset() to disable timestamping while the device is resetting. This operation destroys the user requested configuration. While the driver does call ice_ptp_cfg_timestamp in ice_rebuild() to restore some hardware settings after a reset, it unconditionally passes true or false, resulting in failure to restore previous user space configuration. This results in a device reset forcibly disabling timestamp configuration regardless of current user settings. This was not detected previously due to a quirk of the LinuxPTP ptp4l application. If ptp4l detects a missing timestamp, it enters a fault state and performs recovery logic which includes executing SIOCSHWTSTAMP again, restoring the now accidentally cleared configuration. Not every application does this, and for these applications, timestamps will mysteriously stop after a PF reset, without being restored until an application restart. Fix this by replacing ice_ptp_cfg_timestamp() with two new functions: 1) ice_ptp_disable_timestamp_mode() which unconditionally disables the timestamping logic in ice_ptp_prepare_for_reset() and ice_ptp_release() 2) ice_ptp_restore_timestamp_mode() which calls ice_ptp_restore_tx_interrupt() to restore Tx timestamping configuration, calls ice_set_rx_tstamp() to restore Rx timestamping configuration, and issues an immediate TSYN_TX interrupt to ensure that timestamps which may have occurred during the device reset get processed. Modify the ice_ptp_set_timestamp_mode to directly save the user configuration and then call ice_ptp_restore_timestamp_mode. This way, reset no longer destroys the saved user configuration. This obsoletes the ice_set_tx_tstamp() function which can now be safely removed. With this change, all devices should now restore Tx and Rx timestamping functionality correctly after a PF reset without application intervention. Fixes: 77a781155a65 ("ice: enable receive hardware timestamping") Fixes: ea9b847cda64 ("ice: enable transmit timestamps for E810 devices") Signed-off-by: Jacob Keller Reviewed-by: Jesse Brandeburg Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Paolo Abeni --- drivers/net/ethernet/intel/ice/ice_main.c | 12 +--- drivers/net/ethernet/intel/ice/ice_ptp.c | 76 ++++++++++++++--------- drivers/net/ethernet/intel/ice/ice_ptp.h | 5 +- 3 files changed, 52 insertions(+), 41 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 6607fa6fe556..fb9c93f37e84 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -7401,15 +7401,6 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) goto err_vsi_rebuild; } - /* configure PTP timestamping after VSI rebuild */ - if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) { - if (pf->ptp.tx_interrupt_mode == ICE_PTP_TX_INTERRUPT_SELF) - ice_ptp_cfg_timestamp(pf, false); - else if (pf->ptp.tx_interrupt_mode == ICE_PTP_TX_INTERRUPT_ALL) - /* for E82x PHC owner always need to have interrupts */ - ice_ptp_cfg_timestamp(pf, true); - } - err = ice_vsi_rebuild_by_type(pf, ICE_VSI_SWITCHDEV_CTRL); if (err) { dev_err(dev, "Switchdev CTRL VSI rebuild failed: %d\n", err); @@ -7461,6 +7452,9 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) ice_plug_aux_dev(pf); if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG)) ice_lag_rebuild(pf); + + /* Restore timestamp mode settings after VSI rebuild */ + ice_ptp_restore_timestamp_mode(pf); return; err_vsi_rebuild: diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 624d05b4bbd9..71f405f8a6fe 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -294,18 +294,6 @@ static void ice_ptp_cfg_tx_interrupt(struct ice_pf *pf) wr32(hw, PFINT_OICR_ENA, val); } -/** - * ice_set_tx_tstamp - Enable or disable Tx timestamping - * @pf: The PF pointer to search in - * @on: bool value for whether timestamps are enabled or disabled - */ -static void ice_set_tx_tstamp(struct ice_pf *pf, bool on) -{ - pf->ptp.tstamp_config.tx_type = on ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; - - ice_ptp_cfg_tx_interrupt(pf); -} - /** * ice_set_rx_tstamp - Enable or disable Rx timestamping * @pf: The PF pointer to search in @@ -317,7 +305,7 @@ static void ice_set_rx_tstamp(struct ice_pf *pf, bool on) u16 i; vsi = ice_get_main_vsi(pf); - if (!vsi) + if (!vsi || !vsi->rx_rings) return; /* Set the timestamp flag for all the Rx rings */ @@ -326,23 +314,50 @@ static void ice_set_rx_tstamp(struct ice_pf *pf, bool on) continue; vsi->rx_rings[i]->ptp_rx = on; } - - pf->ptp.tstamp_config.rx_filter = on ? HWTSTAMP_FILTER_ALL : - HWTSTAMP_FILTER_NONE; } /** - * ice_ptp_cfg_timestamp - Configure timestamp for init/deinit + * ice_ptp_disable_timestamp_mode - Disable current timestamp mode * @pf: Board private structure - * @ena: bool value to enable or disable time stamp * - * This function will configure timestamping during PTP initialization - * and deinitialization + * Called during preparation for reset to temporarily disable timestamping on + * the device. Called during remove to disable timestamping while cleaning up + * driver resources. */ -void ice_ptp_cfg_timestamp(struct ice_pf *pf, bool ena) +static void ice_ptp_disable_timestamp_mode(struct ice_pf *pf) { - ice_set_tx_tstamp(pf, ena); - ice_set_rx_tstamp(pf, ena); + struct ice_hw *hw = &pf->hw; + u32 val; + + val = rd32(hw, PFINT_OICR_ENA); + val &= ~PFINT_OICR_TSYN_TX_M; + wr32(hw, PFINT_OICR_ENA, val); + + ice_set_rx_tstamp(pf, false); +} + +/** + * ice_ptp_restore_timestamp_mode - Restore timestamp configuration + * @pf: Board private structure + * + * Called at the end of rebuild to restore timestamp configuration after + * a device reset. + */ +void ice_ptp_restore_timestamp_mode(struct ice_pf *pf) +{ + struct ice_hw *hw = &pf->hw; + bool enable_rx; + + ice_ptp_cfg_tx_interrupt(pf); + + enable_rx = pf->ptp.tstamp_config.rx_filter == HWTSTAMP_FILTER_ALL; + ice_set_rx_tstamp(pf, enable_rx); + + /* Trigger an immediate software interrupt to ensure that timestamps + * which occurred during reset are handled now. + */ + wr32(hw, PFINT_OICR, PFINT_OICR_TSYN_TX_M); + ice_flush(hw); } /** @@ -2043,10 +2058,10 @@ ice_ptp_set_timestamp_mode(struct ice_pf *pf, struct hwtstamp_config *config) { switch (config->tx_type) { case HWTSTAMP_TX_OFF: - ice_set_tx_tstamp(pf, false); + pf->ptp.tstamp_config.tx_type = HWTSTAMP_TX_OFF; break; case HWTSTAMP_TX_ON: - ice_set_tx_tstamp(pf, true); + pf->ptp.tstamp_config.tx_type = HWTSTAMP_TX_ON; break; default: return -ERANGE; @@ -2054,7 +2069,7 @@ ice_ptp_set_timestamp_mode(struct ice_pf *pf, struct hwtstamp_config *config) switch (config->rx_filter) { case HWTSTAMP_FILTER_NONE: - ice_set_rx_tstamp(pf, false); + pf->ptp.tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; break; case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: @@ -2070,12 +2085,15 @@ ice_ptp_set_timestamp_mode(struct ice_pf *pf, struct hwtstamp_config *config) case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: case HWTSTAMP_FILTER_NTP_ALL: case HWTSTAMP_FILTER_ALL: - ice_set_rx_tstamp(pf, true); + pf->ptp.tstamp_config.rx_filter = HWTSTAMP_FILTER_ALL; break; default: return -ERANGE; } + /* Immediately update the device timestamping mode */ + ice_ptp_restore_timestamp_mode(pf); + return 0; } @@ -2743,7 +2761,7 @@ void ice_ptp_prepare_for_reset(struct ice_pf *pf) clear_bit(ICE_FLAG_PTP, pf->flags); /* Disable timestamping for both Tx and Rx */ - ice_ptp_cfg_timestamp(pf, false); + ice_ptp_disable_timestamp_mode(pf); kthread_cancel_delayed_work_sync(&ptp->work); @@ -3061,7 +3079,7 @@ void ice_ptp_release(struct ice_pf *pf) return; /* Disable timestamping for both Tx and Rx */ - ice_ptp_cfg_timestamp(pf, false); + ice_ptp_disable_timestamp_mode(pf); ice_ptp_remove_auxbus_device(pf); diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h index 8f6f94392756..06a330867fc9 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp.h @@ -292,7 +292,7 @@ int ice_ptp_clock_index(struct ice_pf *pf); struct ice_pf; int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr); int ice_ptp_get_ts_config(struct ice_pf *pf, struct ifreq *ifr); -void ice_ptp_cfg_timestamp(struct ice_pf *pf, bool ena); +void ice_ptp_restore_timestamp_mode(struct ice_pf *pf); void ice_ptp_extts_event(struct ice_pf *pf); s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb); @@ -317,8 +317,7 @@ static inline int ice_ptp_get_ts_config(struct ice_pf *pf, struct ifreq *ifr) return -EOPNOTSUPP; } -static inline void ice_ptp_cfg_timestamp(struct ice_pf *pf, bool ena) { } - +static inline void ice_ptp_restore_timestamp_mode(struct ice_pf *pf) { } static inline void ice_ptp_extts_event(struct ice_pf *pf) { } static inline s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb) From 4e20655e503e3a478cd1682bf25e3202dd823da8 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Tue, 21 Nov 2023 13:13:36 -0800 Subject: [PATCH 207/234] i40e: Fix adding unsupported cloud filters If a VF tries to add unsupported cloud filter through virtchnl then i40e_add_del_cloud_filter(_big_buf) returns -ENOTSUPP but this error code is stored in 'ret' instead of 'aq_ret' that is used as error code sent back to VF. In this scenario where one of the mentioned functions fails the value of 'aq_ret' is zero so the VF will incorrectly receive a 'success'. Use 'aq_ret' to store return value and remove 'ret' local variable. Additionally fix the issue when filter allocation fails, in this case no notification is sent back to the VF. Fixes: e284fc280473 ("i40e: Add and delete cloud filter") Reviewed-by: Simon Horman Signed-off-by: Ivan Vecera Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20231121211338.3348677-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 08d7edccfb8d..3f99eb198245 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -3844,7 +3844,7 @@ static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg) struct i40e_pf *pf = vf->pf; struct i40e_vsi *vsi = NULL; int aq_ret = 0; - int i, ret; + int i; if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = -EINVAL; @@ -3868,8 +3868,10 @@ static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg) } cfilter = kzalloc(sizeof(*cfilter), GFP_KERNEL); - if (!cfilter) - return -ENOMEM; + if (!cfilter) { + aq_ret = -ENOMEM; + goto err_out; + } /* parse destination mac address */ for (i = 0; i < ETH_ALEN; i++) @@ -3917,13 +3919,13 @@ static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg) /* Adding cloud filter programmed as TC filter */ if (tcf.dst_port) - ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter, true); + aq_ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter, true); else - ret = i40e_add_del_cloud_filter(vsi, cfilter, true); - if (ret) { + aq_ret = i40e_add_del_cloud_filter(vsi, cfilter, true); + if (aq_ret) { dev_err(&pf->pdev->dev, "VF %d: Failed to add cloud filter, err %pe aq_err %s\n", - vf->vf_id, ERR_PTR(ret), + vf->vf_id, ERR_PTR(aq_ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); goto err_free; } From f0863888f6cfef33e3117dccfe94fa78edf76be4 Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Wed, 22 Nov 2023 00:16:42 +0300 Subject: [PATCH 208/234] vsock/test: fix SEQPACKET message bounds test Tune message length calculation to make this test work on machines where 'getpagesize()' returns >32KB. Now maximum message length is not hardcoded (on machines above it was smaller than 'getpagesize()' return value, thus we get negative value and test fails), but calculated at runtime and always bigger than 'getpagesize()' result. Reproduced on aarch64 with 64KB page size. Fixes: 5c338112e48a ("test/vsock: rework message bounds test") Signed-off-by: Arseniy Krasnov Reported-by: Bogdan Marcynkov Reviewed-by: Stefano Garzarella Link: https://lore.kernel.org/r/20231121211642.163474-1-avkrasnov@salutedevices.com Signed-off-by: Jakub Kicinski --- tools/testing/vsock/vsock_test.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 5b0e93f9996c..01fa816868bc 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -353,11 +353,12 @@ static void test_stream_msg_peek_server(const struct test_opts *opts) } #define SOCK_BUF_SIZE (2 * 1024 * 1024) -#define MAX_MSG_SIZE (32 * 1024) +#define MAX_MSG_PAGES 4 static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) { unsigned long curr_hash; + size_t max_msg_size; int page_size; int msg_count; int fd; @@ -373,7 +374,8 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) curr_hash = 0; page_size = getpagesize(); - msg_count = SOCK_BUF_SIZE / MAX_MSG_SIZE; + max_msg_size = MAX_MSG_PAGES * page_size; + msg_count = SOCK_BUF_SIZE / max_msg_size; for (int i = 0; i < msg_count; i++) { size_t buf_size; @@ -383,7 +385,7 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) /* Use "small" buffers and "big" buffers. */ if (i & 1) buf_size = page_size + - (rand() % (MAX_MSG_SIZE - page_size)); + (rand() % (max_msg_size - page_size)); else buf_size = 1 + (rand() % page_size); @@ -429,7 +431,6 @@ static void test_seqpacket_msg_bounds_server(const struct test_opts *opts) unsigned long remote_hash; unsigned long curr_hash; int fd; - char buf[MAX_MSG_SIZE]; struct msghdr msg = {0}; struct iovec iov = {0}; @@ -457,8 +458,13 @@ static void test_seqpacket_msg_bounds_server(const struct test_opts *opts) control_writeln("SRVREADY"); /* Wait, until peer sends whole data. */ control_expectln("SENDDONE"); - iov.iov_base = buf; - iov.iov_len = sizeof(buf); + iov.iov_len = MAX_MSG_PAGES * getpagesize(); + iov.iov_base = malloc(iov.iov_len); + if (!iov.iov_base) { + perror("malloc"); + exit(EXIT_FAILURE); + } + msg.msg_iov = &iov; msg.msg_iovlen = 1; @@ -483,6 +489,7 @@ static void test_seqpacket_msg_bounds_server(const struct test_opts *opts) curr_hash += hash_djb2(msg.msg_iov[0].iov_base, recv_size); } + free(iov.iov_base); close(fd); remote_hash = control_readulong(); From fd0413bbf8b11f56e8aa842783b0deda0dfe2926 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 21 Nov 2023 16:42:17 -0800 Subject: [PATCH 209/234] net: axienet: Fix check for partial TX checksum Due to a typo, the code checked the RX checksum feature in the TX path. Fixes: 8a3b7a252dca ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver") Signed-off-by: Samuel Holland Reviewed-by: Andrew Lunn Reviewed-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/20231122004219.3504219-1-samuel.holland@sifive.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 82d0d44b2b02..bf6e33990490 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -822,7 +822,7 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev) if (lp->features & XAE_FEATURE_FULL_TX_CSUM) { /* Tx Full Checksum Offload Enabled */ cur_p->app0 |= 2; - } else if (lp->features & XAE_FEATURE_PARTIAL_RX_CSUM) { + } else if (lp->features & XAE_FEATURE_PARTIAL_TX_CSUM) { csum_start_off = skb_transport_offset(skb); csum_index_off = csum_start_off + skb->csum_offset; /* Tx Partial Checksum Offload Enabled */ From 53f2cb491b500897a619ff6abd72f565933760f0 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 22 Nov 2023 22:44:47 +0100 Subject: [PATCH 210/234] tls: fix NULL deref on tls_sw_splice_eof() with empty record syzkaller discovered that if tls_sw_splice_eof() is executed as part of sendfile() when the plaintext/ciphertext sk_msg are empty, the send path gets confused because the empty ciphertext buffer does not have enough space for the encryption overhead. This causes tls_push_record() to go on the `split = true` path (which is only supposed to be used when interacting with an attached BPF program), and then get further confused and hit the tls_merge_open_record() path, which then assumes that there must be at least one populated buffer element, leading to a NULL deref. It is possible to have empty plaintext/ciphertext buffers if we previously bailed from tls_sw_sendmsg_locked() via the tls_trim_both_msgs() path. tls_sw_push_pending_record() already handles this case correctly; let's do the same check in tls_sw_splice_eof(). Fixes: df720d288dbb ("tls/sw: Use splice_eof() to flush") Cc: stable@vger.kernel.org Reported-by: syzbot+40d43509a099ea756317@syzkaller.appspotmail.com Signed-off-by: Jann Horn Link: https://lore.kernel.org/r/20231122214447.675768-1-jannh@google.com Signed-off-by: Jakub Kicinski --- net/tls/tls_sw.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index a78e8e722409..316f76187962 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1232,11 +1232,14 @@ void tls_sw_splice_eof(struct socket *sock) lock_sock(sk); retry: + /* same checks as in tls_sw_push_pending_record() */ rec = ctx->open_rec; if (!rec) goto unlock; msg_pl = &rec->msg_plaintext; + if (msg_pl->sg.size == 0) + goto unlock; /* Check the BPF advisor and perform transmission. */ ret = bpf_exec_tx_verdict(msg_pl, sk, false, TLS_RECORD_TYPE_DATA, From 37f0205538baf70beb57cdcb6c7d14aa13257926 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 22 Nov 2023 17:17:08 -0600 Subject: [PATCH 211/234] net: ipa: fix one GSI register field width The width of the R_LENGTH field of the EV_CH_E_CNTXT_1 GSI register is 24 bits (not 20 bits) starting with IPA v5.0. Fix this. Fixes: faf0678ec8a0 ("net: ipa: add IPA v5.0 GSI register definitions") Signed-off-by: Alex Elder Link: https://lore.kernel.org/r/20231122231708.896632-1-elder@linaro.org Signed-off-by: Jakub Kicinski --- drivers/net/ipa/reg/gsi_reg-v5.0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ipa/reg/gsi_reg-v5.0.c b/drivers/net/ipa/reg/gsi_reg-v5.0.c index d7b81a36d673..145eb0bd096d 100644 --- a/drivers/net/ipa/reg/gsi_reg-v5.0.c +++ b/drivers/net/ipa/reg/gsi_reg-v5.0.c @@ -78,7 +78,7 @@ REG_STRIDE_FIELDS(EV_CH_E_CNTXT_0, ev_ch_e_cntxt_0, 0x0001c000 + 0x12000 * GSI_EE_AP, 0x80); static const u32 reg_ev_ch_e_cntxt_1_fmask[] = { - [R_LENGTH] = GENMASK(19, 0), + [R_LENGTH] = GENMASK(23, 0), }; REG_STRIDE_FIELDS(EV_CH_E_CNTXT_1, ev_ch_e_cntxt_1, From 2be35a619482c1f4e5bc7a2d84049b8d7d171882 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 22 Nov 2023 19:06:24 -0800 Subject: [PATCH 212/234] tools: ynl: fix header path for nfsd The makefile dependency is trying to include the wrong header: : fatal error: ../../../../include/uapi//linux/nfsd.h: No such file or directory The guard also looks wrong. Fixes: f14122b2c2ac ("tools: ynl: Add source files for nfsd netlink protocol") Reviewed-by: Chuck Lever Link: https://lore.kernel.org/r/20231123030624.1611925-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/Makefile.deps | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps index 64d139400db1..3110f84dd029 100644 --- a/tools/net/ynl/Makefile.deps +++ b/tools/net/ynl/Makefile.deps @@ -18,4 +18,4 @@ CFLAGS_devlink:=$(call get_hdr_inc,_LINUX_DEVLINK_H_,devlink.h) CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h) CFLAGS_handshake:=$(call get_hdr_inc,_LINUX_HANDSHAKE_H,handshake.h) CFLAGS_netdev:=$(call get_hdr_inc,_LINUX_NETDEV_H,netdev.h) -CFLAGS_nfsd:=$(call get_hdr_inc,_LINUX_NFSD_H,nfsd.h) +CFLAGS_nfsd:=$(call get_hdr_inc,_LINUX_NFSD_NETLINK_H,nfsd_netlink.h) From 39f04b1406b23fcc129a67e70d6205d5a7322f38 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 22 Nov 2023 19:05:58 -0800 Subject: [PATCH 213/234] tools: ynl: fix duplicate op name in devlink MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't support CRUD-inspired message types in YNL too well. One aspect that currently trips us up is the fact that single message ID can be used in multiple commands (as the response). This leads to duplicate entries in the id-to-string tables: devlink-user.c:19:34: warning: initialized field overwritten [-Woverride-init] 19 | [DEVLINK_CMD_PORT_NEW] = "port-new", | ^~~~~~~~~~ devlink-user.c:19:34: note: (near initialization for ‘devlink_op_strmap[7]’) Fixes tag points at where the code was generated, the "real" problem is that the code generator does not support CRUD. Fixes: f2f9dd164db0 ("netlink: specs: devlink: add the remaining command to generate complete split_ops") Link: https://lore.kernel.org/r/20231123030558.1611831-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/generated/devlink-user.c | 2 +- tools/net/ynl/ynl-gen-c.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/net/ynl/generated/devlink-user.c b/tools/net/ynl/generated/devlink-user.c index bc5065bd99b2..c12ca87ca2bb 100644 --- a/tools/net/ynl/generated/devlink-user.c +++ b/tools/net/ynl/generated/devlink-user.c @@ -15,7 +15,7 @@ /* Enums */ static const char * const devlink_op_strmap[] = { [3] = "get", - [7] = "port-get", + // skip "port-get", duplicate reply value [DEVLINK_CMD_PORT_NEW] = "port-new", [13] = "sb-get", [17] = "sb-pool-get", diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index c4003a83cd5d..3bd6b928c14f 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -1505,6 +1505,12 @@ def put_op_name(family, cw): cw.block_start(line=f"static const char * const {map_name}[] =") for op_name, op in family.msgs.items(): if op.rsp_value: + # Make sure we don't add duplicated entries, if multiple commands + # produce the same response in legacy families. + if family.rsp_by_value[op.rsp_value] != op: + cw.p(f'// skip "{op_name}", duplicate reply value') + continue + if op.req_value == op.rsp_value: cw.p(f'[{op.enum_name}] = "{op_name}",') else: From da90e45d5afc4da2de7cd3ea7943d0f1baa47cc2 Mon Sep 17 00:00:00 2001 From: Asuna Yang Date: Wed, 22 Nov 2023 22:18:03 +0800 Subject: [PATCH 214/234] USB: serial: option: add Luat Air72*U series products Update the USB serial option driver support for Luat Air72*U series products. ID 1782:4e00 Spreadtrum Communications Inc. UNISOC-8910 T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 13 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1782 ProdID=4e00 Rev=00.00 S: Manufacturer=UNISOC S: Product=UNISOC-8910 C: #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=400mA I: If#= 0 Alt= 0 #EPs= 1 Cls=e0(wlcon) Sub=01 Prot=03 Driver=rndis_host E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=4096ms I: If#= 1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=rndis_host E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms If#= 2: AT If#= 3: PPP + AT If#= 4: Debug Co-developed-by: Yangyu Chen Signed-off-by: Yangyu Chen Signed-off-by: Asuna Yang Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 06b9b04c022a..4dffcfefd62d 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -609,6 +609,8 @@ static void option_instat_callback(struct urb *urb); #define UNISOC_VENDOR_ID 0x1782 /* TOZED LT70-C based on UNISOC SL8563 uses UNISOC's vendor ID */ #define TOZED_PRODUCT_LT70C 0x4055 +/* Luat Air72*U series based on UNISOC UIS8910 uses UNISOC's vendor ID */ +#define LUAT_PRODUCT_AIR720U 0x4e00 /* Device flags */ @@ -2273,6 +2275,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, TOZED_PRODUCT_LT70C, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, LUAT_PRODUCT_AIR720U, 0xff, 0, 0) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); From a15ccef82d3de9a37dc25898c60a394209368dc8 Mon Sep 17 00:00:00 2001 From: Ritvik Budhiraja Date: Tue, 21 Nov 2023 19:13:47 +0530 Subject: [PATCH 215/234] cifs: fix use after free for iface while disabling secondary channels We were deferencing iface after it has been released. Fix is to release after all dereference instances have been encountered. Signed-off-by: Ritvik Budhiraja Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202311110815.UJaeU3Tt-lkp@intel.com/ Signed-off-by: Steve French --- fs/smb/client/sess.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index 8b2d7c1ca428..816e01c5589b 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -332,10 +332,10 @@ cifs_disable_secondary_channels(struct cifs_ses *ses) if (iface) { spin_lock(&ses->iface_lock); - kref_put(&iface->refcount, release_iface); iface->num_channels--; if (iface->weight_fulfilled) iface->weight_fulfilled--; + kref_put(&iface->refcount, release_iface); spin_unlock(&ses->iface_lock); } From ed3e0a149b58ea8cfd10cc4f7cefb39877ff07ac Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 21 Nov 2023 20:12:52 -0300 Subject: [PATCH 216/234] smb: client: implement ->query_reparse_point() for SMB1 Reparse points are not limited to symlinks, so implement ->query_reparse_point() in order to handle different file types. Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/smb/client/cifspdu.h | 2 +- fs/smb/client/cifsproto.h | 9 ++ fs/smb/client/cifssmb.c | 191 +++++++++++++++----------------------- fs/smb/client/smb1ops.c | 49 ++-------- fs/smb/client/smb2ops.c | 35 ++++--- 5 files changed, 112 insertions(+), 174 deletions(-) diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h index a75220db5c1e..2a90134331a4 100644 --- a/fs/smb/client/cifspdu.h +++ b/fs/smb/client/cifspdu.h @@ -1356,7 +1356,7 @@ typedef struct smb_com_transaction_ioctl_rsp { __le32 DataDisplacement; __u8 SetupCount; /* 1 */ __le16 ReturnedDataLen; - __u16 ByteCount; + __le16 ByteCount; } __attribute__((packed)) TRANSACT_IOCTL_RSP; #define CIFS_ACL_OWNER 1 diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index d87e2c26cce2..8a739c10d634 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -458,6 +458,12 @@ extern int CIFSSMBUnixQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon, const unsigned char *searchName, char **syminfo, const struct nls_table *nls_codepage, int remap); +extern int cifs_query_reparse_point(const unsigned int xid, + struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, + const char *full_path, + u32 *tag, struct kvec *rsp, + int *rsp_buftype); extern int CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid, char **symlinkinfo, const struct nls_table *nls_codepage); @@ -659,6 +665,9 @@ void cifs_put_tcp_super(struct super_block *sb); int cifs_update_super_prepath(struct cifs_sb_info *cifs_sb, char *prefix); char *extract_hostname(const char *unc); char *extract_sharename(const char *unc); +int parse_reparse_point(struct reparse_data_buffer *buf, + u32 plen, struct cifs_sb_info *cifs_sb, + bool unicode, char **target_path); #ifdef CONFIG_CIFS_DFS_UPCALL static inline int get_dfs_path(const unsigned int xid, struct cifs_ses *ses, diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 25503f1a4fd2..bad91ba6c3a9 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -2690,136 +2690,97 @@ querySymLinkRetry: return rc; } -/* - * Recent Windows versions now create symlinks more frequently - * and they use the "reparse point" mechanism below. We can of course - * do symlinks nicely to Samba and other servers which support the - * CIFS Unix Extensions and we can also do SFU symlinks and "client only" - * "MF" symlinks optionally, but for recent Windows we really need to - * reenable the code below and fix the cifs_symlink callers to handle this. - * In the interim this code has been moved to its own config option so - * it is not compiled in by default until callers fixed up and more tested. - */ -int -CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon, - __u16 fid, char **symlinkinfo, - const struct nls_table *nls_codepage) +int cifs_query_reparse_point(const unsigned int xid, + struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, + const char *full_path, + u32 *tag, struct kvec *rsp, + int *rsp_buftype) { - int rc = 0; - int bytes_returned; - struct smb_com_transaction_ioctl_req *pSMB; - struct smb_com_transaction_ioctl_rsp *pSMBr; - bool is_unicode; - unsigned int sub_len; - char *sub_start; - struct reparse_symlink_data *reparse_buf; - struct reparse_posix_data *posix_buf; + struct cifs_open_parms oparms; + TRANSACT_IOCTL_REQ *io_req = NULL; + TRANSACT_IOCTL_RSP *io_rsp = NULL; + struct cifs_fid fid; __u32 data_offset, data_count; - char *end_of_smb; + __u8 *start, *end; + int io_rsp_len; + int oplock = 0; + int rc; - cifs_dbg(FYI, "In Windows reparse style QueryLink for fid %u\n", fid); - rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB, - (void **) &pSMBr); + cifs_tcon_dbg(FYI, "%s: path=%s\n", __func__, full_path); + + if (cap_unix(tcon->ses)) + return -EOPNOTSUPP; + + oparms = (struct cifs_open_parms) { + .tcon = tcon, + .cifs_sb = cifs_sb, + .desired_access = FILE_READ_ATTRIBUTES, + .create_options = cifs_create_options(cifs_sb, + OPEN_REPARSE_POINT), + .disposition = FILE_OPEN, + .path = full_path, + .fid = &fid, + }; + + rc = CIFS_open(xid, &oparms, &oplock, NULL); if (rc) return rc; - pSMB->TotalParameterCount = 0 ; - pSMB->TotalDataCount = 0; - pSMB->MaxParameterCount = cpu_to_le32(2); + rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, + (void **)&io_req, (void **)&io_rsp); + if (rc) + goto error; + + io_req->TotalParameterCount = 0; + io_req->TotalDataCount = 0; + io_req->MaxParameterCount = cpu_to_le32(2); /* BB find exact data count max from sess structure BB */ - pSMB->MaxDataCount = cpu_to_le32(CIFSMaxBufSize & 0xFFFFFF00); - pSMB->MaxSetupCount = 4; - pSMB->Reserved = 0; - pSMB->ParameterOffset = 0; - pSMB->DataCount = 0; - pSMB->DataOffset = 0; - pSMB->SetupCount = 4; - pSMB->SubCommand = cpu_to_le16(NT_TRANSACT_IOCTL); - pSMB->ParameterCount = pSMB->TotalParameterCount; - pSMB->FunctionCode = cpu_to_le32(FSCTL_GET_REPARSE_POINT); - pSMB->IsFsctl = 1; /* FSCTL */ - pSMB->IsRootFlag = 0; - pSMB->Fid = fid; /* file handle always le */ - pSMB->ByteCount = 0; + io_req->MaxDataCount = cpu_to_le32(CIFSMaxBufSize & 0xFFFFFF00); + io_req->MaxSetupCount = 4; + io_req->Reserved = 0; + io_req->ParameterOffset = 0; + io_req->DataCount = 0; + io_req->DataOffset = 0; + io_req->SetupCount = 4; + io_req->SubCommand = cpu_to_le16(NT_TRANSACT_IOCTL); + io_req->ParameterCount = io_req->TotalParameterCount; + io_req->FunctionCode = cpu_to_le32(FSCTL_GET_REPARSE_POINT); + io_req->IsFsctl = 1; + io_req->IsRootFlag = 0; + io_req->Fid = fid.netfid; + io_req->ByteCount = 0; - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, - (struct smb_hdr *) pSMBr, &bytes_returned, 0); - if (rc) { - cifs_dbg(FYI, "Send error in QueryReparseLinkInfo = %d\n", rc); - goto qreparse_out; - } + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *)io_req, + (struct smb_hdr *)io_rsp, &io_rsp_len, 0); + if (rc) + goto error; - data_offset = le32_to_cpu(pSMBr->DataOffset); - data_count = le32_to_cpu(pSMBr->DataCount); - if (get_bcc(&pSMBr->hdr) < 2 || data_offset > 512) { - /* BB also check enough total bytes returned */ - rc = -EIO; /* bad smb */ - goto qreparse_out; - } - if (!data_count || (data_count > 2048)) { + data_offset = le32_to_cpu(io_rsp->DataOffset); + data_count = le32_to_cpu(io_rsp->DataCount); + if (get_bcc(&io_rsp->hdr) < 2 || data_offset > 512 || + !data_count || data_count > 2048) { rc = -EIO; - cifs_dbg(FYI, "Invalid return data count on get reparse info ioctl\n"); - goto qreparse_out; + goto error; } - end_of_smb = 2 + get_bcc(&pSMBr->hdr) + (char *)&pSMBr->ByteCount; - reparse_buf = (struct reparse_symlink_data *) - ((char *)&pSMBr->hdr.Protocol + data_offset); - if ((char *)reparse_buf >= end_of_smb) { + + end = 2 + get_bcc(&io_rsp->hdr) + (__u8 *)&io_rsp->ByteCount; + start = (__u8 *)&io_rsp->hdr.Protocol + data_offset; + if (start >= end) { rc = -EIO; - goto qreparse_out; - } - if (reparse_buf->ReparseTag == cpu_to_le32(IO_REPARSE_TAG_NFS)) { - cifs_dbg(FYI, "NFS style reparse tag\n"); - posix_buf = (struct reparse_posix_data *)reparse_buf; - - if (posix_buf->InodeType != cpu_to_le64(NFS_SPECFILE_LNK)) { - cifs_dbg(FYI, "unsupported file type 0x%llx\n", - le64_to_cpu(posix_buf->InodeType)); - rc = -EOPNOTSUPP; - goto qreparse_out; - } - is_unicode = true; - sub_len = le16_to_cpu(reparse_buf->ReparseDataLength); - if (posix_buf->PathBuffer + sub_len > end_of_smb) { - cifs_dbg(FYI, "reparse buf beyond SMB\n"); - rc = -EIO; - goto qreparse_out; - } - *symlinkinfo = cifs_strndup_from_utf16(posix_buf->PathBuffer, - sub_len, is_unicode, nls_codepage); - goto qreparse_out; - } else if (reparse_buf->ReparseTag != - cpu_to_le32(IO_REPARSE_TAG_SYMLINK)) { - rc = -EOPNOTSUPP; - goto qreparse_out; + goto error; } - /* Reparse tag is NTFS symlink */ - sub_start = le16_to_cpu(reparse_buf->SubstituteNameOffset) + - reparse_buf->PathBuffer; - sub_len = le16_to_cpu(reparse_buf->SubstituteNameLength); - if (sub_start + sub_len > end_of_smb) { - cifs_dbg(FYI, "reparse buf beyond SMB\n"); - rc = -EIO; - goto qreparse_out; - } - if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) - is_unicode = true; - else - is_unicode = false; + *tag = le32_to_cpu(((struct reparse_data_buffer *)start)->ReparseTag); + rsp->iov_base = io_rsp; + rsp->iov_len = io_rsp_len; + *rsp_buftype = CIFS_LARGE_BUFFER; + CIFSSMBClose(xid, tcon, fid.netfid); + return 0; - /* BB FIXME investigate remapping reserved chars here */ - *symlinkinfo = cifs_strndup_from_utf16(sub_start, sub_len, is_unicode, - nls_codepage); - if (!*symlinkinfo) - rc = -ENOMEM; -qreparse_out: - cifs_buf_release(pSMB); - - /* - * Note: On -EAGAIN error only caller can retry on handle based calls - * since file handle passed in no longer valid. - */ +error: + cifs_buf_release(io_req); + CIFSSMBClose(xid, tcon, fid.netfid); return rc; } diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index 9bf8735cdd1e..6b4d8effa79d 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -979,18 +979,13 @@ static int cifs_query_symlink(const unsigned int xid, char **target_path, struct kvec *rsp_iov) { + struct reparse_data_buffer *buf; + TRANSACT_IOCTL_RSP *io = rsp_iov->iov_base; + bool unicode = !!(io->hdr.Flags2 & SMBFLG2_UNICODE); + u32 plen = le16_to_cpu(io->ByteCount); int rc; - int oplock = 0; - bool is_reparse_point = !!rsp_iov; - struct cifs_fid fid; - struct cifs_open_parms oparms; - cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path); - - if (is_reparse_point) { - cifs_dbg(VFS, "reparse points not handled for SMB1 symlinks\n"); - return -EOPNOTSUPP; - } + cifs_tcon_dbg(FYI, "%s: path=%s\n", __func__, full_path); /* Check for unix extensions */ if (cap_unix(tcon->ses)) { @@ -1001,37 +996,12 @@ static int cifs_query_symlink(const unsigned int xid, rc = cifs_unix_dfs_readlink(xid, tcon, full_path, target_path, cifs_sb->local_nls); - - goto out; + return rc; } - oparms = (struct cifs_open_parms) { - .tcon = tcon, - .cifs_sb = cifs_sb, - .desired_access = FILE_READ_ATTRIBUTES, - .create_options = cifs_create_options(cifs_sb, - OPEN_REPARSE_POINT), - .disposition = FILE_OPEN, - .path = full_path, - .fid = &fid, - }; - - rc = CIFS_open(xid, &oparms, &oplock, NULL); - if (rc) - goto out; - - rc = CIFSSMBQuerySymLink(xid, tcon, fid.netfid, target_path, - cifs_sb->local_nls); - if (rc) - goto out_close; - - convert_delimiter(*target_path, '/'); -out_close: - CIFSSMBClose(xid, tcon, fid.netfid); -out: - if (!rc) - cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path); - return rc; + buf = (struct reparse_data_buffer *)((__u8 *)&io->hdr.Protocol + + le32_to_cpu(io->DataOffset)); + return parse_reparse_point(buf, plen, cifs_sb, unicode, target_path); } static bool @@ -1214,6 +1184,7 @@ struct smb_version_operations smb1_operations = { .is_path_accessible = cifs_is_path_accessible, .can_echo = cifs_can_echo, .query_path_info = cifs_query_path_info, + .query_reparse_point = cifs_query_reparse_point, .query_file_info = cifs_query_file_info, .get_srv_inum = cifs_get_srv_inum, .set_path_size = CIFSSMBSetEOF, diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index a959ed2c9b22..e9c8cff0b1d2 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -2894,27 +2894,26 @@ parse_reparse_posix(struct reparse_posix_data *symlink_buf, return 0; } -static int -parse_reparse_symlink(struct reparse_symlink_data_buffer *symlink_buf, - u32 plen, char **target_path, - struct cifs_sb_info *cifs_sb) +static int parse_reparse_symlink(struct reparse_symlink_data_buffer *sym, + u32 plen, bool unicode, char **target_path, + struct cifs_sb_info *cifs_sb) { unsigned int sub_len; unsigned int sub_offset; /* We handle Symbolic Link reparse tag here. See: MS-FSCC 2.1.2.4 */ - sub_offset = le16_to_cpu(symlink_buf->SubstituteNameOffset); - sub_len = le16_to_cpu(symlink_buf->SubstituteNameLength); + sub_offset = le16_to_cpu(sym->SubstituteNameOffset); + sub_len = le16_to_cpu(sym->SubstituteNameLength); if (sub_offset + 20 > plen || sub_offset + sub_len + 20 > plen) { cifs_dbg(VFS, "srv returned malformed symlink buffer\n"); return -EIO; } - *target_path = cifs_strndup_from_utf16( - symlink_buf->PathBuffer + sub_offset, - sub_len, true, cifs_sb->local_nls); + *target_path = cifs_strndup_from_utf16(sym->PathBuffer + sub_offset, + sub_len, unicode, + cifs_sb->local_nls); if (!(*target_path)) return -ENOMEM; @@ -2924,19 +2923,17 @@ parse_reparse_symlink(struct reparse_symlink_data_buffer *symlink_buf, return 0; } -static int -parse_reparse_point(struct reparse_data_buffer *buf, - u32 plen, char **target_path, - struct cifs_sb_info *cifs_sb) +int parse_reparse_point(struct reparse_data_buffer *buf, + u32 plen, struct cifs_sb_info *cifs_sb, + bool unicode, char **target_path) { - if (plen < sizeof(struct reparse_data_buffer)) { + if (plen < sizeof(*buf)) { cifs_dbg(VFS, "reparse buffer is too small. Must be at least 8 bytes but was %d\n", plen); return -EIO; } - if (plen < le16_to_cpu(buf->ReparseDataLength) + - sizeof(struct reparse_data_buffer)) { + if (plen < le16_to_cpu(buf->ReparseDataLength) + sizeof(*buf)) { cifs_dbg(VFS, "srv returned invalid reparse buf length: %d\n", plen); return -EIO; @@ -2951,7 +2948,7 @@ parse_reparse_point(struct reparse_data_buffer *buf, case IO_REPARSE_TAG_SYMLINK: return parse_reparse_symlink( (struct reparse_symlink_data_buffer *)buf, - plen, target_path, cifs_sb); + plen, unicode, target_path, cifs_sb); default: cifs_dbg(VFS, "srv returned unknown symlink buffer tag:0x%08x\n", le32_to_cpu(buf->ReparseTag)); @@ -2970,11 +2967,11 @@ static int smb2_query_symlink(const unsigned int xid, struct smb2_ioctl_rsp *io = rsp_iov->iov_base; u32 plen = le32_to_cpu(io->OutputCount); - cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path); + cifs_tcon_dbg(FYI, "%s: path: %s\n", __func__, full_path); buf = (struct reparse_data_buffer *)((u8 *)io + le32_to_cpu(io->OutputOffset)); - return parse_reparse_point(buf, plen, target_path, cifs_sb); + return parse_reparse_point(buf, plen, cifs_sb, true, target_path); } static int smb2_query_reparse_point(const unsigned int xid, From 539aad7f14dab7f947e5ab81901c0b20513a50db Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 21 Nov 2023 20:12:53 -0300 Subject: [PATCH 217/234] smb: client: introduce ->parse_reparse_point() Parse reparse point into cifs_open_info_data structure and feed it through cifs_open_info_to_fattr(). Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 6 ++++-- fs/smb/client/inode.c | 23 +++++++++++++--------- fs/smb/client/smb1ops.c | 41 ++++++++++++++++++++++------------------ fs/smb/client/smb2ops.c | 28 ++++++++++++++------------- 4 files changed, 56 insertions(+), 42 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 6ffbd81bd109..111daa4ff261 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -395,8 +395,7 @@ struct smb_version_operations { struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, - char **target_path, - struct kvec *rsp_iov); + char **target_path); /* open a file for non-posix mounts */ int (*open)(const unsigned int xid, struct cifs_open_parms *oparms, __u32 *oplock, void *buf); @@ -551,6 +550,9 @@ struct smb_version_operations { bool (*is_status_io_timeout)(char *buf); /* Check for STATUS_NETWORK_NAME_DELETED */ bool (*is_network_name_deleted)(char *buf, struct TCP_Server_Info *srv); + int (*parse_reparse_point)(struct cifs_sb_info *cifs_sb, + struct kvec *rsp_iov, + struct cifs_open_info_data *data); }; struct smb_version_values { diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 86fbd3f847d6..dd482de3dc3f 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -459,8 +459,7 @@ static int cifs_get_unix_fattr(const unsigned char *full_path, return -EOPNOTSUPP; rc = server->ops->query_symlink(xid, tcon, cifs_sb, full_path, - &fattr->cf_symlink_target, - NULL); + &fattr->cf_symlink_target); cifs_dbg(FYI, "%s: query_symlink: %d\n", __func__, rc); } return rc; @@ -1035,22 +1034,28 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data, if (!rc) iov = &rsp_iov; } + + rc = -EOPNOTSUPP; switch ((data->reparse_tag = tag)) { case 0: /* SMB1 symlink */ - iov = NULL; - fallthrough; - case IO_REPARSE_TAG_NFS: - case IO_REPARSE_TAG_SYMLINK: - if (!data->symlink_target && server->ops->query_symlink) { + if (server->ops->query_symlink) { rc = server->ops->query_symlink(xid, tcon, cifs_sb, full_path, - &data->symlink_target, - iov); + &data->symlink_target); } break; case IO_REPARSE_TAG_MOUNT_POINT: cifs_create_junction_fattr(fattr, sb); + rc = 0; goto out; + default: + if (data->symlink_target) { + rc = 0; + } else if (server->ops->parse_reparse_point) { + rc = server->ops->parse_reparse_point(cifs_sb, + iov, data); + } + break; } cifs_open_info_to_fattr(fattr, data, sb); diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index 6b4d8effa79d..0dd599004e04 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -976,32 +976,36 @@ static int cifs_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, - char **target_path, - struct kvec *rsp_iov) + char **target_path) +{ + int rc; + + cifs_tcon_dbg(FYI, "%s: path=%s\n", __func__, full_path); + + if (!cap_unix(tcon->ses)) + return -EOPNOTSUPP; + + rc = CIFSSMBUnixQuerySymLink(xid, tcon, full_path, target_path, + cifs_sb->local_nls, cifs_remap(cifs_sb)); + if (rc == -EREMOTE) + rc = cifs_unix_dfs_readlink(xid, tcon, full_path, + target_path, cifs_sb->local_nls); + return rc; +} + +static int cifs_parse_reparse_point(struct cifs_sb_info *cifs_sb, + struct kvec *rsp_iov, + struct cifs_open_info_data *data) { struct reparse_data_buffer *buf; TRANSACT_IOCTL_RSP *io = rsp_iov->iov_base; bool unicode = !!(io->hdr.Flags2 & SMBFLG2_UNICODE); u32 plen = le16_to_cpu(io->ByteCount); - int rc; - - cifs_tcon_dbg(FYI, "%s: path=%s\n", __func__, full_path); - - /* Check for unix extensions */ - if (cap_unix(tcon->ses)) { - rc = CIFSSMBUnixQuerySymLink(xid, tcon, full_path, target_path, - cifs_sb->local_nls, - cifs_remap(cifs_sb)); - if (rc == -EREMOTE) - rc = cifs_unix_dfs_readlink(xid, tcon, full_path, - target_path, - cifs_sb->local_nls); - return rc; - } buf = (struct reparse_data_buffer *)((__u8 *)&io->hdr.Protocol + le32_to_cpu(io->DataOffset)); - return parse_reparse_point(buf, plen, cifs_sb, unicode, target_path); + return parse_reparse_point(buf, plen, cifs_sb, unicode, + &data->symlink_target); } static bool @@ -1200,6 +1204,7 @@ struct smb_version_operations smb1_operations = { .rename = CIFSSMBRename, .create_hardlink = CIFSCreateHardLink, .query_symlink = cifs_query_symlink, + .parse_reparse_point = cifs_parse_reparse_point, .open = cifs_open_file, .set_fid = cifs_set_fid, .close = cifs_close_file, diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index e9c8cff0b1d2..2955eaa51d4d 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -2949,6 +2949,12 @@ int parse_reparse_point(struct reparse_data_buffer *buf, return parse_reparse_symlink( (struct reparse_symlink_data_buffer *)buf, plen, unicode, target_path, cifs_sb); + case IO_REPARSE_TAG_LX_SYMLINK: + case IO_REPARSE_TAG_AF_UNIX: + case IO_REPARSE_TAG_LX_FIFO: + case IO_REPARSE_TAG_LX_CHR: + case IO_REPARSE_TAG_LX_BLK: + return 0; default: cifs_dbg(VFS, "srv returned unknown symlink buffer tag:0x%08x\n", le32_to_cpu(buf->ReparseTag)); @@ -2956,22 +2962,18 @@ int parse_reparse_point(struct reparse_data_buffer *buf, } } -static int smb2_query_symlink(const unsigned int xid, - struct cifs_tcon *tcon, - struct cifs_sb_info *cifs_sb, - const char *full_path, - char **target_path, - struct kvec *rsp_iov) +static int smb2_parse_reparse_point(struct cifs_sb_info *cifs_sb, + struct kvec *rsp_iov, + struct cifs_open_info_data *data) { struct reparse_data_buffer *buf; struct smb2_ioctl_rsp *io = rsp_iov->iov_base; u32 plen = le32_to_cpu(io->OutputCount); - cifs_tcon_dbg(FYI, "%s: path: %s\n", __func__, full_path); - buf = (struct reparse_data_buffer *)((u8 *)io + le32_to_cpu(io->OutputOffset)); - return parse_reparse_point(buf, plen, cifs_sb, true, target_path); + return parse_reparse_point(buf, plen, cifs_sb, + true, &data->symlink_target); } static int smb2_query_reparse_point(const unsigned int xid, @@ -5206,7 +5208,7 @@ struct smb_version_operations smb20_operations = { .unlink = smb2_unlink, .rename = smb2_rename_path, .create_hardlink = smb2_create_hardlink, - .query_symlink = smb2_query_symlink, + .parse_reparse_point = smb2_parse_reparse_point, .query_mf_symlink = smb3_query_mf_symlink, .create_mf_symlink = smb3_create_mf_symlink, .open = smb2_open_file, @@ -5308,7 +5310,7 @@ struct smb_version_operations smb21_operations = { .unlink = smb2_unlink, .rename = smb2_rename_path, .create_hardlink = smb2_create_hardlink, - .query_symlink = smb2_query_symlink, + .parse_reparse_point = smb2_parse_reparse_point, .query_mf_symlink = smb3_query_mf_symlink, .create_mf_symlink = smb3_create_mf_symlink, .open = smb2_open_file, @@ -5413,7 +5415,7 @@ struct smb_version_operations smb30_operations = { .unlink = smb2_unlink, .rename = smb2_rename_path, .create_hardlink = smb2_create_hardlink, - .query_symlink = smb2_query_symlink, + .parse_reparse_point = smb2_parse_reparse_point, .query_mf_symlink = smb3_query_mf_symlink, .create_mf_symlink = smb3_create_mf_symlink, .open = smb2_open_file, @@ -5527,7 +5529,7 @@ struct smb_version_operations smb311_operations = { .unlink = smb2_unlink, .rename = smb2_rename_path, .create_hardlink = smb2_create_hardlink, - .query_symlink = smb2_query_symlink, + .parse_reparse_point = smb2_parse_reparse_point, .query_mf_symlink = smb3_query_mf_symlink, .create_mf_symlink = smb3_create_mf_symlink, .open = smb2_open_file, From 45e724022e2704b5a5193fd96f378822b0448e07 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 21 Nov 2023 20:12:54 -0300 Subject: [PATCH 218/234] smb: client: set correct file type from NFS reparse points Handle all file types in NFS reparse points as specified in MS-FSCC 2.1.2.6 Network File System (NFS) Reparse Data Buffer. The client is now able to set all file types based on the parsed NFS reparse point, which used to support only symlinks. This works for SMB1+. Before patch: $ mount.cifs //srv/share /mnt -o ... $ ls -l /mnt ls: cannot access 'block': Operation not supported ls: cannot access 'char': Operation not supported ls: cannot access 'fifo': Operation not supported ls: cannot access 'sock': Operation not supported total 1 l????????? ? ? ? ? ? block l????????? ? ? ? ? ? char -rwxr-xr-x 1 root root 5 Nov 18 23:22 f0 l????????? ? ? ? ? ? fifo l--------- 1 root root 0 Nov 18 23:23 link -> f0 l????????? ? ? ? ? ? sock After patch: $ mount.cifs //srv/share /mnt -o ... $ ls -l /mnt total 1 brwxr-xr-x 1 root root 123, 123 Nov 18 00:34 block crwxr-xr-x 1 root root 1234, 1234 Nov 18 00:33 char -rwxr-xr-x 1 root root 5 Nov 18 23:22 f0 prwxr-xr-x 1 root root 0 Nov 18 23:23 fifo lrwxr-xr-x 1 root root 0 Nov 18 23:23 link -> f0 srwxr-xr-x 1 root root 0 Nov 19 2023 sock Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 8 +++- fs/smb/client/cifspdu.h | 2 +- fs/smb/client/cifsproto.h | 4 +- fs/smb/client/inode.c | 51 ++++++++++++++++++-- fs/smb/client/readdir.c | 6 ++- fs/smb/client/smb1ops.c | 3 +- fs/smb/client/smb2inode.c | 2 +- fs/smb/client/smb2ops.c | 99 ++++++++++++++++++++------------------- 8 files changed, 115 insertions(+), 60 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 111daa4ff261..7558167f603c 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -191,7 +191,13 @@ struct cifs_open_info_data { bool reparse_point; bool symlink; }; - __u32 reparse_tag; + struct { + __u32 tag; + union { + struct reparse_data_buffer *buf; + struct reparse_posix_data *posix; + }; + } reparse; char *symlink_target; union { struct smb2_file_all_info fi; diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h index 2a90134331a4..83ccc51a54d0 100644 --- a/fs/smb/client/cifspdu.h +++ b/fs/smb/client/cifspdu.h @@ -1509,7 +1509,7 @@ struct reparse_posix_data { __le16 ReparseDataLength; __u16 Reserved; __le64 InodeType; /* LNK, FIFO, CHR etc. */ - char PathBuffer[]; + __u8 DataBuffer[]; } __attribute__((packed)); struct cifs_quota_data { diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 8a739c10d634..c00f84420559 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -210,7 +210,7 @@ int cifs_get_inode_info(struct inode **inode, const char *full_path, const struct cifs_fid *fid); bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, - u32 tag); + struct cifs_open_info_data *data); extern int smb311_posix_get_inode_info(struct inode **pinode, const char *search_path, struct super_block *sb, unsigned int xid); extern int cifs_get_inode_info_unix(struct inode **pinode, @@ -667,7 +667,7 @@ char *extract_hostname(const char *unc); char *extract_sharename(const char *unc); int parse_reparse_point(struct reparse_data_buffer *buf, u32 plen, struct cifs_sb_info *cifs_sb, - bool unicode, char **target_path); + bool unicode, struct cifs_open_info_data *data); #ifdef CONFIG_CIFS_DFS_UPCALL static inline int get_dfs_path(const unsigned int xid, struct cifs_ses *ses, diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index dd482de3dc3f..47f49be69ced 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -721,10 +721,51 @@ static void smb311_posix_info_to_fattr(struct cifs_fattr *fattr, fattr->cf_mode, fattr->cf_uniqueid, fattr->cf_nlink); } +static inline dev_t nfs_mkdev(struct reparse_posix_data *buf) +{ + u64 v = le64_to_cpu(*(__le64 *)buf->DataBuffer); + + return MKDEV(v >> 32, v & 0xffffffff); +} + bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, - u32 tag) + struct cifs_open_info_data *data) { + struct reparse_posix_data *buf = data->reparse.posix; + u32 tag = data->reparse.tag; + + if (tag == IO_REPARSE_TAG_NFS && buf) { + switch (le64_to_cpu(buf->InodeType)) { + case NFS_SPECFILE_CHR: + fattr->cf_mode |= S_IFCHR | cifs_sb->ctx->file_mode; + fattr->cf_dtype = DT_CHR; + fattr->cf_rdev = nfs_mkdev(buf); + break; + case NFS_SPECFILE_BLK: + fattr->cf_mode |= S_IFBLK | cifs_sb->ctx->file_mode; + fattr->cf_dtype = DT_BLK; + fattr->cf_rdev = nfs_mkdev(buf); + break; + case NFS_SPECFILE_FIFO: + fattr->cf_mode |= S_IFIFO | cifs_sb->ctx->file_mode; + fattr->cf_dtype = DT_FIFO; + break; + case NFS_SPECFILE_SOCK: + fattr->cf_mode |= S_IFSOCK | cifs_sb->ctx->file_mode; + fattr->cf_dtype = DT_SOCK; + break; + case NFS_SPECFILE_LNK: + fattr->cf_mode = S_IFLNK | cifs_sb->ctx->file_mode; + fattr->cf_dtype = DT_LNK; + break; + default: + WARN_ON_ONCE(1); + return false; + } + return true; + } + switch (tag) { case IO_REPARSE_TAG_LX_SYMLINK: fattr->cf_mode |= S_IFLNK | cifs_sb->ctx->file_mode; @@ -790,7 +831,7 @@ static void cifs_open_info_to_fattr(struct cifs_fattr *fattr, fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks); if (cifs_open_data_reparse(data) && - cifs_reparse_point_to_fattr(cifs_sb, fattr, data->reparse_tag)) + cifs_reparse_point_to_fattr(cifs_sb, fattr, data)) goto out_reparse; if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { @@ -855,7 +896,7 @@ cifs_get_file_info(struct file *filp) data.adjust_tz = false; if (data.symlink_target) { data.symlink = true; - data.reparse_tag = IO_REPARSE_TAG_SYMLINK; + data.reparse.tag = IO_REPARSE_TAG_SYMLINK; } cifs_open_info_to_fattr(&fattr, &data, inode->i_sb); break; @@ -1024,7 +1065,7 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data, struct cifs_sb_info *cifs_sb = CIFS_SB(sb); struct kvec rsp_iov, *iov = NULL; int rsp_buftype = CIFS_NO_BUFFER; - u32 tag = data->reparse_tag; + u32 tag = data->reparse.tag; int rc = 0; if (!tag && server->ops->query_reparse_point) { @@ -1036,7 +1077,7 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data, } rc = -EOPNOTSUPP; - switch ((data->reparse_tag = tag)) { + switch ((data->reparse.tag = tag)) { case 0: /* SMB1 symlink */ if (server->ops->query_symlink) { rc = server->ops->query_symlink(xid, tcon, diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c index 47fc22de8d20..d30ea2005eb3 100644 --- a/fs/smb/client/readdir.c +++ b/fs/smb/client/readdir.c @@ -153,6 +153,10 @@ static bool reparse_file_needs_reval(const struct cifs_fattr *fattr) static void cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) { + struct cifs_open_info_data data = { + .reparse = { .tag = fattr->cf_cifstag, }, + }; + fattr->cf_uid = cifs_sb->ctx->linux_uid; fattr->cf_gid = cifs_sb->ctx->linux_gid; @@ -165,7 +169,7 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) * reasonably map some of them to directories vs. files vs. symlinks */ if ((fattr->cf_cifsattrs & ATTR_REPARSE) && - cifs_reparse_point_to_fattr(cifs_sb, fattr, fattr->cf_cifstag)) + cifs_reparse_point_to_fattr(cifs_sb, fattr, &data)) goto out_reparse; if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index 0dd599004e04..64e25233e85d 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -1004,8 +1004,7 @@ static int cifs_parse_reparse_point(struct cifs_sb_info *cifs_sb, buf = (struct reparse_data_buffer *)((__u8 *)&io->hdr.Protocol + le32_to_cpu(io->DataOffset)); - return parse_reparse_point(buf, plen, cifs_sb, unicode, - &data->symlink_target); + return parse_reparse_point(buf, plen, cifs_sb, unicode, data); } static bool diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index 0b89f7008ac0..c94940af5d4b 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -555,7 +555,7 @@ static int parse_create_response(struct cifs_open_info_data *data, break; } data->reparse_point = reparse_point; - data->reparse_tag = tag; + data->reparse.tag = tag; return rc; } diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 2955eaa51d4d..f01a929a7a3a 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -2866,89 +2866,95 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, return rc; } -static int -parse_reparse_posix(struct reparse_posix_data *symlink_buf, - u32 plen, char **target_path, - struct cifs_sb_info *cifs_sb) +/* See MS-FSCC 2.1.2.6 for the 'NFS' style reparse tags */ +static int parse_reparse_posix(struct reparse_posix_data *buf, + struct cifs_sb_info *cifs_sb, + struct cifs_open_info_data *data) { unsigned int len; + u64 type; - /* See MS-FSCC 2.1.2.6 for the 'NFS' style reparse tags */ - len = le16_to_cpu(symlink_buf->ReparseDataLength); - - if (le64_to_cpu(symlink_buf->InodeType) != NFS_SPECFILE_LNK) { - cifs_dbg(VFS, "%lld not a supported symlink type\n", - le64_to_cpu(symlink_buf->InodeType)); + switch ((type = le64_to_cpu(buf->InodeType))) { + case NFS_SPECFILE_LNK: + len = le16_to_cpu(buf->ReparseDataLength); + data->symlink_target = cifs_strndup_from_utf16(buf->DataBuffer, + len, true, + cifs_sb->local_nls); + if (!data->symlink_target) + return -ENOMEM; + convert_delimiter(data->symlink_target, '/'); + cifs_dbg(FYI, "%s: target path: %s\n", + __func__, data->symlink_target); + break; + case NFS_SPECFILE_CHR: + case NFS_SPECFILE_BLK: + case NFS_SPECFILE_FIFO: + case NFS_SPECFILE_SOCK: + break; + default: + cifs_dbg(VFS, "%s: unhandled inode type: 0x%llx\n", + __func__, type); return -EOPNOTSUPP; } - - *target_path = cifs_strndup_from_utf16( - symlink_buf->PathBuffer, - len, true, cifs_sb->local_nls); - if (!(*target_path)) - return -ENOMEM; - - convert_delimiter(*target_path, '/'); - cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path); - return 0; } static int parse_reparse_symlink(struct reparse_symlink_data_buffer *sym, - u32 plen, bool unicode, char **target_path, - struct cifs_sb_info *cifs_sb) + u32 plen, bool unicode, + struct cifs_sb_info *cifs_sb, + struct cifs_open_info_data *data) { - unsigned int sub_len; - unsigned int sub_offset; + unsigned int len; + unsigned int offs; /* We handle Symbolic Link reparse tag here. See: MS-FSCC 2.1.2.4 */ - sub_offset = le16_to_cpu(sym->SubstituteNameOffset); - sub_len = le16_to_cpu(sym->SubstituteNameLength); - if (sub_offset + 20 > plen || - sub_offset + sub_len + 20 > plen) { + offs = le16_to_cpu(sym->SubstituteNameOffset); + len = le16_to_cpu(sym->SubstituteNameLength); + if (offs + 20 > plen || offs + len + 20 > plen) { cifs_dbg(VFS, "srv returned malformed symlink buffer\n"); return -EIO; } - *target_path = cifs_strndup_from_utf16(sym->PathBuffer + sub_offset, - sub_len, unicode, - cifs_sb->local_nls); - if (!(*target_path)) + data->symlink_target = cifs_strndup_from_utf16(sym->PathBuffer + offs, + len, unicode, + cifs_sb->local_nls); + if (!data->symlink_target) return -ENOMEM; - convert_delimiter(*target_path, '/'); - cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path); + convert_delimiter(data->symlink_target, '/'); + cifs_dbg(FYI, "%s: target path: %s\n", __func__, data->symlink_target); return 0; } int parse_reparse_point(struct reparse_data_buffer *buf, u32 plen, struct cifs_sb_info *cifs_sb, - bool unicode, char **target_path) + bool unicode, struct cifs_open_info_data *data) { if (plen < sizeof(*buf)) { - cifs_dbg(VFS, "reparse buffer is too small. Must be at least 8 bytes but was %d\n", - plen); + cifs_dbg(VFS, "%s: reparse buffer is too small. Must be at least 8 bytes but was %d\n", + __func__, plen); return -EIO; } if (plen < le16_to_cpu(buf->ReparseDataLength) + sizeof(*buf)) { - cifs_dbg(VFS, "srv returned invalid reparse buf length: %d\n", - plen); + cifs_dbg(VFS, "%s: invalid reparse buf length: %d\n", + __func__, plen); return -EIO; } + data->reparse.buf = buf; + /* See MS-FSCC 2.1.2 */ switch (le32_to_cpu(buf->ReparseTag)) { case IO_REPARSE_TAG_NFS: - return parse_reparse_posix( - (struct reparse_posix_data *)buf, - plen, target_path, cifs_sb); + return parse_reparse_posix((struct reparse_posix_data *)buf, + cifs_sb, data); case IO_REPARSE_TAG_SYMLINK: return parse_reparse_symlink( (struct reparse_symlink_data_buffer *)buf, - plen, unicode, target_path, cifs_sb); + plen, unicode, cifs_sb, data); case IO_REPARSE_TAG_LX_SYMLINK: case IO_REPARSE_TAG_AF_UNIX: case IO_REPARSE_TAG_LX_FIFO: @@ -2956,8 +2962,8 @@ int parse_reparse_point(struct reparse_data_buffer *buf, case IO_REPARSE_TAG_LX_BLK: return 0; default: - cifs_dbg(VFS, "srv returned unknown symlink buffer tag:0x%08x\n", - le32_to_cpu(buf->ReparseTag)); + cifs_dbg(VFS, "%s: unhandled reparse tag: 0x%08x\n", + __func__, le32_to_cpu(buf->ReparseTag)); return -EOPNOTSUPP; } } @@ -2972,8 +2978,7 @@ static int smb2_parse_reparse_point(struct cifs_sb_info *cifs_sb, buf = (struct reparse_data_buffer *)((u8 *)io + le32_to_cpu(io->OutputOffset)); - return parse_reparse_point(buf, plen, cifs_sb, - true, &data->symlink_target); + return parse_reparse_point(buf, plen, cifs_sb, true, data); } static int smb2_query_reparse_point(const unsigned int xid, From b0348e459c836abdb0f4b967e006d15c77cf1c87 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 21 Nov 2023 20:12:55 -0300 Subject: [PATCH 219/234] smb: client: introduce cifs_sfu_make_node() Remove duplicate code and add new helper for creating special files in SFU (Services for UNIX) format that can be shared by SMB1+ code. Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/smb/client/cifsproto.h | 3 ++ fs/smb/client/smb1ops.c | 80 ++++------------------------------- fs/smb/client/smb2ops.c | 89 ++++++++++++++++++--------------------- 3 files changed, 52 insertions(+), 120 deletions(-) diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index c00f84420559..46feaa0880bd 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -668,6 +668,9 @@ char *extract_sharename(const char *unc); int parse_reparse_point(struct reparse_data_buffer *buf, u32 plen, struct cifs_sb_info *cifs_sb, bool unicode, struct cifs_open_info_data *data); +int cifs_sfu_make_node(unsigned int xid, struct inode *inode, + struct dentry *dentry, struct cifs_tcon *tcon, + const char *full_path, umode_t mode, dev_t dev); #ifdef CONFIG_CIFS_DFS_UPCALL static inline int get_dfs_path(const unsigned int xid, struct cifs_ses *ses, diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index 64e25233e85d..a9eaba8083b0 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -1041,15 +1041,7 @@ cifs_make_node(unsigned int xid, struct inode *inode, { struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct inode *newinode = NULL; - int rc = -EPERM; - struct cifs_open_info_data buf = {}; - struct cifs_io_parms io_parms; - __u32 oplock = 0; - struct cifs_fid fid; - struct cifs_open_parms oparms; - unsigned int bytes_written; - struct win_dev *pdev; - struct kvec iov[2]; + int rc; if (tcon->unix_ext) { /* @@ -1083,74 +1075,18 @@ cifs_make_node(unsigned int xid, struct inode *inode, d_instantiate(dentry, newinode); return rc; } - /* - * SMB1 SFU emulation: should work with all servers, but only - * support block and char device (no socket & fifo) + * Check if mounted with mount parm 'sfu' mount parm. + * SFU emulation should work with all servers, but only + * supports block and char device (no socket & fifo), + * and was used by default in earlier versions of Windows */ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) - return rc; - - if (!S_ISCHR(mode) && !S_ISBLK(mode)) - return rc; - - cifs_dbg(FYI, "sfu compat create special file\n"); - - oparms = (struct cifs_open_parms) { - .tcon = tcon, - .cifs_sb = cifs_sb, - .desired_access = GENERIC_WRITE, - .create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR | - CREATE_OPTION_SPECIAL), - .disposition = FILE_CREATE, - .path = full_path, - .fid = &fid, - }; - - if (tcon->ses->server->oplocks) - oplock = REQ_OPLOCK; - else - oplock = 0; - rc = tcon->ses->server->ops->open(xid, &oparms, &oplock, &buf); - if (rc) - return rc; - - /* - * BB Do not bother to decode buf since no local inode yet to put - * timestamps in, but we can reuse it safely. - */ - - pdev = (struct win_dev *)&buf.fi; - io_parms.pid = current->tgid; - io_parms.tcon = tcon; - io_parms.offset = 0; - io_parms.length = sizeof(struct win_dev); - iov[1].iov_base = &buf.fi; - iov[1].iov_len = sizeof(struct win_dev); - if (S_ISCHR(mode)) { - memcpy(pdev->type, "IntxCHR", 8); - pdev->major = cpu_to_le64(MAJOR(dev)); - pdev->minor = cpu_to_le64(MINOR(dev)); - rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms, - &bytes_written, iov, 1); - } else if (S_ISBLK(mode)) { - memcpy(pdev->type, "IntxBLK", 8); - pdev->major = cpu_to_le64(MAJOR(dev)); - pdev->minor = cpu_to_le64(MINOR(dev)); - rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms, - &bytes_written, iov, 1); - } - tcon->ses->server->ops->close(xid, tcon, &fid); - d_drop(dentry); - - /* FIXME: add code here to set EAs */ - - cifs_free_open_info(&buf); - return rc; + return -EPERM; + return cifs_sfu_make_node(xid, inode, dentry, tcon, + full_path, mode, dev); } - - struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index f01a929a7a3a..82ab62fd0040 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -5068,41 +5068,24 @@ smb2_next_header(char *buf) return le32_to_cpu(hdr->NextCommand); } -static int -smb2_make_node(unsigned int xid, struct inode *inode, - struct dentry *dentry, struct cifs_tcon *tcon, - const char *full_path, umode_t mode, dev_t dev) +int cifs_sfu_make_node(unsigned int xid, struct inode *inode, + struct dentry *dentry, struct cifs_tcon *tcon, + const char *full_path, umode_t mode, dev_t dev) { - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); - int rc = -EPERM; struct cifs_open_info_data buf = {}; - struct cifs_io_parms io_parms = {0}; - __u32 oplock = 0; - struct cifs_fid fid; + struct TCP_Server_Info *server = tcon->ses->server; struct cifs_open_parms oparms; + struct cifs_io_parms io_parms = {}; + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_fid fid; unsigned int bytes_written; struct win_dev *pdev; struct kvec iov[2]; - - /* - * Check if mounted with mount parm 'sfu' mount parm. - * SFU emulation should work with all servers, but only - * supports block and char device (no socket & fifo), - * and was used by default in earlier versions of Windows - */ - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) - return rc; - - /* - * TODO: Add ability to create instead via reparse point. Windows (e.g. - * their current NFS server) uses this approach to expose special files - * over SMB2/SMB3 and Samba will do this with SMB3.1.1 POSIX Extensions - */ + __u32 oplock = server->oplocks ? REQ_OPLOCK : 0; + int rc; if (!S_ISCHR(mode) && !S_ISBLK(mode) && !S_ISFIFO(mode)) - return rc; - - cifs_dbg(FYI, "sfu compat create special file\n"); + return -EPERM; oparms = (struct cifs_open_parms) { .tcon = tcon, @@ -5115,11 +5098,7 @@ smb2_make_node(unsigned int xid, struct inode *inode, .fid = &fid, }; - if (tcon->ses->server->oplocks) - oplock = REQ_OPLOCK; - else - oplock = 0; - rc = tcon->ses->server->ops->open(xid, &oparms, &oplock, &buf); + rc = server->ops->open(xid, &oparms, &oplock, &buf); if (rc) return rc; @@ -5127,42 +5106,56 @@ smb2_make_node(unsigned int xid, struct inode *inode, * BB Do not bother to decode buf since no local inode yet to put * timestamps in, but we can reuse it safely. */ - pdev = (struct win_dev *)&buf.fi; io_parms.pid = current->tgid; io_parms.tcon = tcon; - io_parms.offset = 0; - io_parms.length = sizeof(struct win_dev); - iov[1].iov_base = &buf.fi; - iov[1].iov_len = sizeof(struct win_dev); + io_parms.length = sizeof(*pdev); + iov[1].iov_base = pdev; + iov[1].iov_len = sizeof(*pdev); if (S_ISCHR(mode)) { memcpy(pdev->type, "IntxCHR", 8); pdev->major = cpu_to_le64(MAJOR(dev)); pdev->minor = cpu_to_le64(MINOR(dev)); - rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms, - &bytes_written, iov, 1); } else if (S_ISBLK(mode)) { memcpy(pdev->type, "IntxBLK", 8); pdev->major = cpu_to_le64(MAJOR(dev)); pdev->minor = cpu_to_le64(MINOR(dev)); - rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms, - &bytes_written, iov, 1); } else if (S_ISFIFO(mode)) { memcpy(pdev->type, "LnxFIFO", 8); - pdev->major = 0; - pdev->minor = 0; - rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms, - &bytes_written, iov, 1); } - tcon->ses->server->ops->close(xid, tcon, &fid); + + rc = server->ops->sync_write(xid, &fid, &io_parms, + &bytes_written, iov, 1); + server->ops->close(xid, tcon, &fid); d_drop(dentry); - /* FIXME: add code here to set EAs */ - cifs_free_open_info(&buf); return rc; } +static int smb2_make_node(unsigned int xid, struct inode *inode, + struct dentry *dentry, struct cifs_tcon *tcon, + const char *full_path, umode_t mode, dev_t dev) +{ + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + + /* + * Check if mounted with mount parm 'sfu' mount parm. + * SFU emulation should work with all servers, but only + * supports block and char device (no socket & fifo), + * and was used by default in earlier versions of Windows + */ + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) + return -EPERM; + /* + * TODO: Add ability to create instead via reparse point. Windows (e.g. + * their current NFS server) uses this approach to expose special files + * over SMB2/SMB3 and Samba will do this with SMB3.1.1 POSIX Extensions + */ + return cifs_sfu_make_node(xid, inode, dentry, tcon, + full_path, mode, dev); +} + #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY struct smb_version_operations smb20_operations = { .compare_fids = smb2_compare_fids, From c0a8574204054effad6ac83cc75c02576e2985fe Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 19 Nov 2023 14:32:34 +0900 Subject: [PATCH 220/234] arm64: add dependency between vmlinuz.efi and Image A common issue in Makefile is a race in parallel building. You need to be careful to prevent multiple threads from writing to the same file simultaneously. Commit 3939f3345050 ("ARM: 8418/1: add boot image dependencies to not generate invalid images") addressed such a bad scenario. A similar symptom occurs with the following command: $ make -j$(nproc) ARCH=arm64 Image vmlinuz.efi [ snip ] SORTTAB vmlinux OBJCOPY arch/arm64/boot/Image OBJCOPY arch/arm64/boot/Image AS arch/arm64/boot/zboot-header.o PAD arch/arm64/boot/vmlinux.bin GZIP arch/arm64/boot/vmlinuz OBJCOPY arch/arm64/boot/vmlinuz.o LD arch/arm64/boot/vmlinuz.efi.elf OBJCOPY arch/arm64/boot/vmlinuz.efi The log "OBJCOPY arch/arm64/boot/Image" is displayed twice. It indicates that two threads simultaneously enter arch/arm64/boot/ and write to arch/arm64/boot/Image. It occasionally leads to a build failure: $ make -j$(nproc) ARCH=arm64 Image vmlinuz.efi [ snip ] SORTTAB vmlinux OBJCOPY arch/arm64/boot/Image PAD arch/arm64/boot/vmlinux.bin truncate: Invalid number: 'arch/arm64/boot/vmlinux.bin' make[2]: *** [drivers/firmware/efi/libstub/Makefile.zboot:13: arch/arm64/boot/vmlinux.bin] Error 1 make[2]: *** Deleting file 'arch/arm64/boot/vmlinux.bin' make[1]: *** [arch/arm64/Makefile:163: vmlinuz.efi] Error 2 make[1]: *** Waiting for unfinished jobs.... make: *** [Makefile:234: __sub-make] Error 2 vmlinuz.efi depends on Image, but such a dependency is not specified in arch/arm64/Makefile. Signed-off-by: Masahiro Yamada Acked-by: Ard Biesheuvel Reviewed-by: SImon Glass Link: https://lore.kernel.org/r/20231119053234.2367621-1-masahiroy@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 4bd85cc0d32b..9a2d3723cd0f 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -158,7 +158,7 @@ endif all: $(notdir $(KBUILD_IMAGE)) - +vmlinuz.efi: Image Image vmlinuz.efi: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ From bca4104b00fec60be330cd32818dd5c70db3d469 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 21 Nov 2023 12:41:26 +0100 Subject: [PATCH 221/234] lockdep: Fix block chain corruption Kent reported an occasional KASAN splat in lockdep. Mark then noted: > I suspect the dodgy access is to chain_block_buckets[-1], which hits the last 4 > bytes of the redzone and gets (incorrectly/misleadingly) attributed to > nr_large_chain_blocks. That would mean @size == 0, at which point size_to_bucket() returns -1 and the above happens. alloc_chain_hlocks() has 'size - req', for the first with the precondition 'size >= rq', which allows the 0. This code is trying to split a block, del_chain_block() takes what we need, and add_chain_block() puts back the remainder, except in the above case the remainder is 0 sized and things go sideways. Fixes: 810507fe6fd5 ("locking/lockdep: Reuse freed chain_hlocks entries") Reported-by: Kent Overstreet Signed-off-by: Peter Zijlstra (Intel) Tested-by: Kent Overstreet Link: https://lkml.kernel.org/r/20231121114126.GH8262@noisy.programming.kicks-ass.net --- kernel/locking/lockdep.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index e85b5ad3e206..151bd3de5936 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -3497,7 +3497,8 @@ static int alloc_chain_hlocks(int req) size = chain_block_size(curr); if (likely(size >= req)) { del_chain_block(0, size, chain_block_next(curr)); - add_chain_block(curr + req, size - req); + if (size > req) + add_chain_block(curr + req, size - req); return curr; } } From 0167236e7d66c5e1e85d902a6abc2529b7544539 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 26 Oct 2023 01:25:07 +0100 Subject: [PATCH 222/234] afs: Return ENOENT if no cell DNS record can be found Make AFS return error ENOENT if no cell SRV or AFSDB DNS record (or cellservdb config file record) can be found rather than returning EDESTADDRREQ. Also add cell name lookup info to the cursor dump. Fixes: d5c32c89b208 ("afs: Fix cell DNS lookup") Reported-by: Markus Suvanto Link: https://bugzilla.kernel.org/show_bug.cgi?id=216637 Signed-off-by: David Howells Reviewed-by: Marc Dionne cc: linux-afs@lists.infradead.org --- fs/afs/vl_rotate.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c index 488e58490b16..eb415ce56360 100644 --- a/fs/afs/vl_rotate.c +++ b/fs/afs/vl_rotate.c @@ -58,6 +58,12 @@ static bool afs_start_vl_iteration(struct afs_vl_cursor *vc) } /* Status load is ordered after lookup counter load */ + if (cell->dns_status == DNS_LOOKUP_GOT_NOT_FOUND) { + pr_warn("No record of cell %s\n", cell->name); + vc->error = -ENOENT; + return false; + } + if (cell->dns_source == DNS_RECORD_UNAVAILABLE) { vc->error = -EDESTADDRREQ; return false; @@ -285,6 +291,7 @@ failed: */ static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc) { + struct afs_cell *cell = vc->cell; static int count; int i; @@ -294,6 +301,9 @@ static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc) rcu_read_lock(); pr_notice("EDESTADDR occurred\n"); + pr_notice("CELL: %s err=%d\n", cell->name, cell->error); + pr_notice("DNS: src=%u st=%u lc=%x\n", + cell->dns_source, cell->dns_status, cell->dns_lookup_count); pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n", vc->untried, vc->index, vc->nr_iterations, vc->flags, vc->error); From b590eb41be766c5a63acc7e8896a042f7a4e8293 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 1 Nov 2023 22:03:28 +0000 Subject: [PATCH 223/234] afs: Fix file locking on R/O volumes to operate in local mode AFS doesn't really do locking on R/O volumes as fileservers don't maintain state with each other and thus a lock on a R/O volume file on one fileserver will not be be visible to someone looking at the same file on another fileserver. Further, the server may return an error if you try it. Fix this by doing what other AFS clients do and handle filelocking on R/O volume files entirely within the client and don't touch the server. Fixes: 6c6c1d63c243 ("afs: Provide mount-time configurable byte-range file locking emulation") Signed-off-by: David Howells Reviewed-by: Marc Dionne cc: linux-afs@lists.infradead.org --- fs/afs/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/afs/super.c b/fs/afs/super.c index 95d713074dc8..e95fb4cb4fcd 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -407,6 +407,8 @@ static int afs_validate_fc(struct fs_context *fc) return PTR_ERR(volume); ctx->volume = volume; + if (volume->type != AFSVL_RWVOL) + ctx->flock_mode = afs_flock_mode_local; } return 0; From 68516f60c1d8b0a71e516d630f66b99cb50e0150 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2023 16:24:00 +0000 Subject: [PATCH 224/234] afs: Mark a superblock for an R/O or Backup volume as SB_RDONLY Mark a superblock that is for for an R/O or Backup volume as SB_RDONLY when mounting it. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- fs/afs/super.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/afs/super.c b/fs/afs/super.c index e95fb4cb4fcd..a01a0fb2cdbb 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -407,8 +407,10 @@ static int afs_validate_fc(struct fs_context *fc) return PTR_ERR(volume); ctx->volume = volume; - if (volume->type != AFSVL_RWVOL) + if (volume->type != AFSVL_RWVOL) { ctx->flock_mode = afs_flock_mode_local; + fc->sb_flags |= SB_RDONLY; + } } return 0; From e11d4cccd094a7cd4696c8c42e672c76c092dad5 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 20 Nov 2023 15:37:50 +0100 Subject: [PATCH 225/234] parisc: Mark ex_table entries 32-bit aligned in assembly.h Add an align statement to tell the linker that all ex_table entries and as such the whole ex_table section should be 32-bit aligned in vmlinux and modules. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v6.0+ --- arch/parisc/include/asm/assembly.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h index 75677b526b2b..74d17d7e759d 100644 --- a/arch/parisc/include/asm/assembly.h +++ b/arch/parisc/include/asm/assembly.h @@ -574,6 +574,7 @@ */ #define ASM_EXCEPTIONTABLE_ENTRY(fault_addr, except_addr) \ .section __ex_table,"aw" ! \ + .align 4 ! \ .word (fault_addr - .), (except_addr - .) ! \ .previous From a80aeb86542a50aa8521729ea4cc731ee7174f03 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 20 Nov 2023 15:39:03 +0100 Subject: [PATCH 226/234] parisc: Mark ex_table entries 32-bit aligned in uaccess.h Add an align statement to tell the linker that all ex_table entries and as such the whole ex_table section should be 32-bit aligned in vmlinux and modules. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v6.0+ --- arch/parisc/include/asm/uaccess.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index 2bf660eabe42..4165079898d9 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -41,6 +41,7 @@ struct exception_table_entry { #define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr )\ ".section __ex_table,\"aw\"\n" \ + ".align 4\n" \ ".word (" #fault_addr " - .), (" #except_addr " - .)\n\t" \ ".previous\n" From 33f806da2df68606f77d7b892cd1298ba3d463e8 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 20 Nov 2023 23:10:20 +0100 Subject: [PATCH 227/234] parisc: Mark altinstructions read-only and 32-bit aligned Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v6.0+ --- arch/parisc/include/asm/alternative.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/parisc/include/asm/alternative.h b/arch/parisc/include/asm/alternative.h index 1ed45fd085d3..1eb488f25b83 100644 --- a/arch/parisc/include/asm/alternative.h +++ b/arch/parisc/include/asm/alternative.h @@ -34,7 +34,8 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end, /* Alternative SMP implementation. */ #define ALTERNATIVE(cond, replacement) "!0:" \ - ".section .altinstructions, \"aw\" !" \ + ".section .altinstructions, \"a\" !" \ + ".align 4 !" \ ".word (0b-4-.) !" \ ".hword 1, " __stringify(cond) " !" \ ".word " __stringify(replacement) " !" \ @@ -44,7 +45,8 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end, /* to replace one single instructions by a new instruction */ #define ALTERNATIVE(from, to, cond, replacement)\ - .section .altinstructions, "aw" ! \ + .section .altinstructions, "a" ! \ + .align 4 ! \ .word (from - .) ! \ .hword (to - from)/4, cond ! \ .word replacement ! \ @@ -52,7 +54,8 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end, /* to replace multiple instructions by new code */ #define ALTERNATIVE_CODE(from, num_instructions, cond, new_instr_ptr)\ - .section .altinstructions, "aw" ! \ + .section .altinstructions, "a" ! \ + .align 4 ! \ .word (from - .) ! \ .hword -num_instructions, cond ! \ .word (new_instr_ptr - .) ! \ From 07eecff8ae78df7f28800484d31337e1f9bfca3a Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 20 Nov 2023 23:14:39 +0100 Subject: [PATCH 228/234] parisc: Mark jump_table naturally aligned The jump_table stores two 32-bit words and one 32- (on 32-bit kernel) or one 64-bit word (on 64-bit kernel). Ensure that the last word is always 64-bit aligned on a 64-bit kernel by aligning the whole structure on sizeof(long). Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v6.0+ --- arch/parisc/include/asm/jump_label.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/parisc/include/asm/jump_label.h b/arch/parisc/include/asm/jump_label.h index af2a598bc0f8..94428798b6aa 100644 --- a/arch/parisc/include/asm/jump_label.h +++ b/arch/parisc/include/asm/jump_label.h @@ -15,10 +15,12 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran asm_volatile_goto("1:\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" + ".align %1\n\t" ".word 1b - ., %l[l_yes] - .\n\t" __stringify(ASM_ULONG_INSN) " %c0 - .\n\t" ".popsection\n\t" - : : "i" (&((char *)key)[branch]) : : l_yes); + : : "i" (&((char *)key)[branch]), "i" (sizeof(long)) + : : l_yes); return false; l_yes: @@ -30,10 +32,12 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool asm_volatile_goto("1:\n\t" "b,n %l[l_yes]\n\t" ".pushsection __jump_table, \"aw\"\n\t" + ".align %1\n\t" ".word 1b - ., %l[l_yes] - .\n\t" __stringify(ASM_ULONG_INSN) " %c0 - .\n\t" ".popsection\n\t" - : : "i" (&((char *)key)[branch]) : : l_yes); + : : "i" (&((char *)key)[branch]), "i" (sizeof(long)) + : : l_yes); return false; l_yes: From b28fc0d8739c03e7b6c44914a9d00d4c6dddc0ea Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 25 Nov 2023 09:11:56 +0100 Subject: [PATCH 229/234] parisc: Mark lock_aligned variables 16-byte aligned on SMP On parisc we need 16-byte alignment for variables which are used for locking. Mark the __lock_aligned attribute acordingly so that the .data..lock_aligned section will get that alignment in the generated object files. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v6.0+ --- arch/parisc/include/asm/ldcw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/include/asm/ldcw.h b/arch/parisc/include/asm/ldcw.h index ee9e071859b2..47ebc4c91eaf 100644 --- a/arch/parisc/include/asm/ldcw.h +++ b/arch/parisc/include/asm/ldcw.h @@ -55,7 +55,7 @@ }) #ifdef CONFIG_SMP -# define __lock_aligned __section(".data..lock_aligned") +# define __lock_aligned __section(".data..lock_aligned") __aligned(16) #endif #endif /* __PARISC_LDCW_H */ From c9fcb2b65c2849e8ff3be23fd8828312fb68dc19 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 25 Nov 2023 09:16:02 +0100 Subject: [PATCH 230/234] parisc: Ensure 32-bit alignment on parisc unwind section Make sure the .PARISC.unwind section will be 32-bit aligned. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v6.0+ --- arch/parisc/kernel/vmlinux.lds.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index 58694d1989c2..548051b0b4af 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -130,6 +130,7 @@ SECTIONS RO_DATA(8) /* unwind info */ + . = ALIGN(4); .PARISC.unwind : { __start___unwind = .; *(.PARISC.unwind) From fe76a1349f235969381832c83d703bc911021eb6 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 20 Nov 2023 23:30:49 +0100 Subject: [PATCH 231/234] parisc: Use natural CPU alignment for bug_table Make sure that the __bug_table section gets 32- or 64-bit aligned, depending if a 32- or 64-bit kernel is being built. Mark it non-writeable and use .blockz instead of the .org assembler directive to pad the struct. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v6.0+ --- arch/parisc/include/asm/bug.h | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/arch/parisc/include/asm/bug.h b/arch/parisc/include/asm/bug.h index 4b6d60b94124..b9cad0bb4461 100644 --- a/arch/parisc/include/asm/bug.h +++ b/arch/parisc/include/asm/bug.h @@ -28,13 +28,15 @@ do { \ asm volatile("\n" \ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ - "\t.pushsection __bug_table,\"aw\"\n" \ + "\t.pushsection __bug_table,\"a\"\n" \ + "\t.align %4\n" \ "2:\t" ASM_WORD_INSN "1b, %c0\n" \ - "\t.short %c1, %c2\n" \ - "\t.org 2b+%c3\n" \ + "\t.short %1, %2\n" \ + "\t.blockz %3-2*%4-2*2\n" \ "\t.popsection" \ : : "i" (__FILE__), "i" (__LINE__), \ - "i" (0), "i" (sizeof(struct bug_entry)) ); \ + "i" (0), "i" (sizeof(struct bug_entry)), \ + "i" (sizeof(long)) ); \ unreachable(); \ } while(0) @@ -51,27 +53,31 @@ do { \ asm volatile("\n" \ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ - "\t.pushsection __bug_table,\"aw\"\n" \ + "\t.pushsection __bug_table,\"a\"\n" \ + "\t.align %4\n" \ "2:\t" ASM_WORD_INSN "1b, %c0\n" \ - "\t.short %c1, %c2\n" \ - "\t.org 2b+%c3\n" \ + "\t.short %1, %2\n" \ + "\t.blockz %3-2*%4-2*2\n" \ "\t.popsection" \ : : "i" (__FILE__), "i" (__LINE__), \ "i" (BUGFLAG_WARNING|(flags)), \ - "i" (sizeof(struct bug_entry)) ); \ + "i" (sizeof(struct bug_entry)), \ + "i" (sizeof(long)) ); \ } while(0) #else #define __WARN_FLAGS(flags) \ do { \ asm volatile("\n" \ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ - "\t.pushsection __bug_table,\"aw\"\n" \ + "\t.pushsection __bug_table,\"a\"\n" \ + "\t.align %2\n" \ "2:\t" ASM_WORD_INSN "1b\n" \ - "\t.short %c0\n" \ - "\t.org 2b+%c1\n" \ + "\t.short %0\n" \ + "\t.blockz %1-%2-2\n" \ "\t.popsection" \ : : "i" (BUGFLAG_WARNING|(flags)), \ - "i" (sizeof(struct bug_entry)) ); \ + "i" (sizeof(struct bug_entry)), \ + "i" (sizeof(long)) ); \ } while(0) #endif From e5f3e299a2b1e9c3ece24a38adfc089aef307e8a Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 23 Nov 2023 20:28:27 +0100 Subject: [PATCH 232/234] parisc: Drop the HP-UX ENOSYM and EREMOTERELEASE error codes Those return codes are only defined for the parisc architecture and are leftovers from when we wanted to be HP-UX compatible. They are not returned by any Linux kernel syscall but do trigger problems with the glibc strerrorname_np() and strerror() functions as reported in glibc issue #31080. There is no need to keep them, so simply remove them. Signed-off-by: Helge Deller Reported-by: Bruno Haible Closes: https://sourceware.org/bugzilla/show_bug.cgi?id=31080 Cc: stable@vger.kernel.org --- arch/parisc/include/uapi/asm/errno.h | 2 -- lib/errname.c | 6 ------ tools/arch/parisc/include/uapi/asm/errno.h | 2 -- 3 files changed, 10 deletions(-) diff --git a/arch/parisc/include/uapi/asm/errno.h b/arch/parisc/include/uapi/asm/errno.h index 87245c584784..8d94739d75c6 100644 --- a/arch/parisc/include/uapi/asm/errno.h +++ b/arch/parisc/include/uapi/asm/errno.h @@ -75,7 +75,6 @@ /* We now return you to your regularly scheduled HPUX. */ -#define ENOSYM 215 /* symbol does not exist in executable */ #define ENOTSOCK 216 /* Socket operation on non-socket */ #define EDESTADDRREQ 217 /* Destination address required */ #define EMSGSIZE 218 /* Message too long */ @@ -101,7 +100,6 @@ #define ETIMEDOUT 238 /* Connection timed out */ #define ECONNREFUSED 239 /* Connection refused */ #define EREFUSED ECONNREFUSED /* for HP's NFS apparently */ -#define EREMOTERELEASE 240 /* Remote peer released connection */ #define EHOSTDOWN 241 /* Host is down */ #define EHOSTUNREACH 242 /* No route to host */ diff --git a/lib/errname.c b/lib/errname.c index dd1b998552cd..4f9112b38f3a 100644 --- a/lib/errname.c +++ b/lib/errname.c @@ -111,9 +111,6 @@ static const char *names_0[] = { E(ENOSPC), E(ENOSR), E(ENOSTR), -#ifdef ENOSYM - E(ENOSYM), -#endif E(ENOSYS), E(ENOTBLK), E(ENOTCONN), @@ -144,9 +141,6 @@ static const char *names_0[] = { #endif E(EREMOTE), E(EREMOTEIO), -#ifdef EREMOTERELEASE - E(EREMOTERELEASE), -#endif E(ERESTART), E(ERFKILL), E(EROFS), diff --git a/tools/arch/parisc/include/uapi/asm/errno.h b/tools/arch/parisc/include/uapi/asm/errno.h index 87245c584784..8d94739d75c6 100644 --- a/tools/arch/parisc/include/uapi/asm/errno.h +++ b/tools/arch/parisc/include/uapi/asm/errno.h @@ -75,7 +75,6 @@ /* We now return you to your regularly scheduled HPUX. */ -#define ENOSYM 215 /* symbol does not exist in executable */ #define ENOTSOCK 216 /* Socket operation on non-socket */ #define EDESTADDRREQ 217 /* Destination address required */ #define EMSGSIZE 218 /* Message too long */ @@ -101,7 +100,6 @@ #define ETIMEDOUT 238 /* Connection timed out */ #define ECONNREFUSED 239 /* Connection refused */ #define EREFUSED ECONNREFUSED /* for HP's NFS apparently */ -#define EREMOTERELEASE 240 /* Remote peer released connection */ #define EHOSTDOWN 241 /* Host is down */ #define EHOSTUNREACH 242 /* No route to host */ From 43266838515d30dc0c45d5c7e6e7edacee6cce92 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 23 Nov 2023 21:57:19 +0100 Subject: [PATCH 233/234] parisc: Reduce size of the bug_table on 64-bit kernel by half Enable GENERIC_BUG_RELATIVE_POINTERS which will store 32-bit relative offsets to the bug address and the source file name instead of 64-bit absolute addresses. This effectively reduces the size of the bug_table[] array by half on 64-bit kernels. Signed-off-by: Helge Deller --- arch/parisc/Kconfig | 7 +++++-- arch/parisc/include/asm/bug.h | 34 +++++++++++++++++----------------- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index a7c9c0e69e5a..d14ccc948a29 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -115,9 +115,12 @@ config ARCH_HAS_ILOG2_U64 default n config GENERIC_BUG - bool - default y + def_bool y depends on BUG + select GENERIC_BUG_RELATIVE_POINTERS if 64BIT + +config GENERIC_BUG_RELATIVE_POINTERS + bool config GENERIC_HWEIGHT bool diff --git a/arch/parisc/include/asm/bug.h b/arch/parisc/include/asm/bug.h index b9cad0bb4461..1641ff9a8b83 100644 --- a/arch/parisc/include/asm/bug.h +++ b/arch/parisc/include/asm/bug.h @@ -17,26 +17,27 @@ #define PARISC_BUG_BREAK_ASM "break 0x1f, 0x1fff" #define PARISC_BUG_BREAK_INSN 0x03ffe01f /* PARISC_BUG_BREAK_ASM */ -#if defined(CONFIG_64BIT) -#define ASM_WORD_INSN ".dword\t" +#ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS +# define __BUG_REL(val) ".word " __stringify(val) " - ." #else -#define ASM_WORD_INSN ".word\t" +# define __BUG_REL(val) ".word " __stringify(val) #endif + #ifdef CONFIG_DEBUG_BUGVERBOSE #define BUG() \ do { \ asm volatile("\n" \ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ "\t.pushsection __bug_table,\"a\"\n" \ - "\t.align %4\n" \ - "2:\t" ASM_WORD_INSN "1b, %c0\n" \ + "\t.align 4\n" \ + "2:\t" __BUG_REL(1b) "\n" \ + "\t" __BUG_REL(%c0) "\n" \ "\t.short %1, %2\n" \ - "\t.blockz %3-2*%4-2*2\n" \ + "\t.blockz %3-2*4-2*2\n" \ "\t.popsection" \ : : "i" (__FILE__), "i" (__LINE__), \ - "i" (0), "i" (sizeof(struct bug_entry)), \ - "i" (sizeof(long)) ); \ + "i" (0), "i" (sizeof(struct bug_entry)) ); \ unreachable(); \ } while(0) @@ -54,15 +55,15 @@ asm volatile("\n" \ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ "\t.pushsection __bug_table,\"a\"\n" \ - "\t.align %4\n" \ - "2:\t" ASM_WORD_INSN "1b, %c0\n" \ + "\t.align 4\n" \ + "2:\t" __BUG_REL(1b) "\n" \ + "\t" __BUG_REL(%c0) "\n" \ "\t.short %1, %2\n" \ - "\t.blockz %3-2*%4-2*2\n" \ + "\t.blockz %3-2*4-2*2\n" \ "\t.popsection" \ : : "i" (__FILE__), "i" (__LINE__), \ "i" (BUGFLAG_WARNING|(flags)), \ - "i" (sizeof(struct bug_entry)), \ - "i" (sizeof(long)) ); \ + "i" (sizeof(struct bug_entry)) ); \ } while(0) #else #define __WARN_FLAGS(flags) \ @@ -71,13 +72,12 @@ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ "\t.pushsection __bug_table,\"a\"\n" \ "\t.align %2\n" \ - "2:\t" ASM_WORD_INSN "1b\n" \ + "2:\t" __BUG_REL(1b) "\n" \ "\t.short %0\n" \ - "\t.blockz %1-%2-2\n" \ + "\t.blockz %1-4-2\n" \ "\t.popsection" \ : : "i" (BUGFLAG_WARNING|(flags)), \ - "i" (sizeof(struct bug_entry)), \ - "i" (sizeof(long)) ); \ + "i" (sizeof(struct bug_entry)) ); \ } while(0) #endif From 2cc14f52aeb78ce3f29677c2de1f06c0e91471ab Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 26 Nov 2023 19:59:33 -0800 Subject: [PATCH 234/234] Linux 6.7-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 724c79bebe72..99db546fbb45 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 7 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION*