From 30b3f68715595dee7fe4d9bd91a2252c3becdf0a Mon Sep 17 00:00:00 2001 From: Caleb Connolly Date: Sun, 7 Mar 2021 15:12:22 -0800 Subject: [PATCH 001/182] Input: s6sy761 - fix coordinate read bit shift The touch coordinate register contains the following: byte 3 byte 2 byte 1 +--------+--------+ +-----------------+ +-----------------+ | | | | | | | | X[3:0] | Y[3:0] | | Y[11:4] | | X[11:4] | | | | | | | | +--------+--------+ +-----------------+ +-----------------+ Bytes 2 and 1 need to be shifted left by 4 bits, the least significant nibble of each is stored in byte 3. Currently they are only being shifted by 3 causing the reported coordinates to be incorrect. This matches downstream examples, and has been confirmed on my device (OnePlus 7 Pro). Fixes: 0145a7141e59 ("Input: add support for the Samsung S6SY761 touchscreen") Signed-off-by: Caleb Connolly Reviewed-by: Andi Shyti Link: https://lore.kernel.org/r/20210305185710.225168-1-caleb@connolly.tech Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/s6sy761.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/touchscreen/s6sy761.c b/drivers/input/touchscreen/s6sy761.c index b63d7fdf0cd2..85a1f465c097 100644 --- a/drivers/input/touchscreen/s6sy761.c +++ b/drivers/input/touchscreen/s6sy761.c @@ -145,8 +145,8 @@ static void s6sy761_report_coordinates(struct s6sy761_data *sdata, u8 major = event[4]; u8 minor = event[5]; u8 z = event[6] & S6SY761_MASK_Z; - u16 x = (event[1] << 3) | ((event[3] & S6SY761_MASK_X) >> 4); - u16 y = (event[2] << 3) | (event[3] & S6SY761_MASK_Y); + u16 x = (event[1] << 4) | ((event[3] & S6SY761_MASK_X) >> 4); + u16 y = (event[2] << 4) | (event[3] & S6SY761_MASK_Y); input_mt_slot(sdata->input, tid); From 36b87cf302a4f13f8b4344bcf98f67405a145e2f Mon Sep 17 00:00:00 2001 From: Shou-Chieh Hsu Date: Tue, 2 Mar 2021 11:58:01 +0800 Subject: [PATCH 002/182] HID: google: add don USB id Add 1 additional hammer-like device. Signed-off-by: Shou-Chieh Hsu Signed-off-by: Jiri Kosina --- drivers/hid/hid-google-hammer.c | 2 ++ drivers/hid/hid-ids.h | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/hid/hid-google-hammer.c b/drivers/hid/hid-google-hammer.c index d9319622da44..e60c31dd05ff 100644 --- a/drivers/hid/hid-google-hammer.c +++ b/drivers/hid/hid-google-hammer.c @@ -573,6 +573,8 @@ static void hammer_remove(struct hid_device *hdev) } static const struct hid_device_id hammer_devices[] = { + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, + USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_DON) }, { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_HAMMER) }, { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index e42aaae3138f..0b4929258478 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -493,6 +493,7 @@ #define USB_DEVICE_ID_GOOGLE_MASTERBALL 0x503c #define USB_DEVICE_ID_GOOGLE_MAGNEMITE 0x503d #define USB_DEVICE_ID_GOOGLE_MOONBALL 0x5044 +#define USB_DEVICE_ID_GOOGLE_DON 0x5050 #define USB_VENDOR_ID_GOTOP 0x08f2 #define USB_DEVICE_ID_SUPER_Q2 0x007f From 9a0b44fbfea1932196a4879b44a37dd182e984c5 Mon Sep 17 00:00:00 2001 From: Luke D Jones Date: Fri, 19 Feb 2021 10:10:02 +1300 Subject: [PATCH 003/182] HID: asus: Add support for 2021 ASUS N-Key keyboard Some new 2021 version of ASUS gamer laptops are using an updated N-Key keyboard with the PID of 0x19b6. This version is using the same init sequence and brightness control as the 0x1866 keyboard. Signed-off-by: Luke D Jones Signed-off-by: Jiri Kosina --- drivers/hid/hid-asus.c | 3 +++ drivers/hid/hid-ids.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index 1dfe184ebf5a..2ab22b925941 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c @@ -1221,6 +1221,9 @@ static const struct hid_device_id asus_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_NKEY_KEYBOARD), QUIRK_USE_KBD_BACKLIGHT | QUIRK_ROG_NKEY_KEYBOARD }, + { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, + USB_DEVICE_ID_ASUSTEK_ROG_NKEY_KEYBOARD2), + QUIRK_USE_KBD_BACKLIGHT | QUIRK_ROG_NKEY_KEYBOARD }, { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_T100TA_KEYBOARD), QUIRK_T100_KEYBOARD | QUIRK_NO_CONSUMER_USAGES }, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 0b4929258478..67fd8a2f5aba 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -194,6 +194,7 @@ #define USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD2 0x1837 #define USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD3 0x1822 #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_KEYBOARD 0x1866 +#define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_KEYBOARD2 0x19b6 #define USB_DEVICE_ID_ASUSTEK_FX503VD_KEYBOARD 0x1869 #define USB_VENDOR_ID_ATEN 0x0557 From fa8ba6e5dc0e78e409e503ddcfceef5dd96527f4 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Thu, 4 Mar 2021 05:19:57 -0800 Subject: [PATCH 004/182] HID: alps: fix error return code in alps_input_configured() When input_register_device() fails, no error return code is assigned. To fix this bug, ret is assigned with -ENOENT as error return code. Reported-by: TOTE Robot Signed-off-by: Jia-Ju Bai Signed-off-by: Jiri Kosina --- drivers/hid/hid-alps.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/hid-alps.c b/drivers/hid/hid-alps.c index 3feaece13ade..6b665931147d 100644 --- a/drivers/hid/hid-alps.c +++ b/drivers/hid/hid-alps.c @@ -761,6 +761,7 @@ static int alps_input_configured(struct hid_device *hdev, struct hid_input *hi) if (input_register_device(data->input2)) { input_free_device(input2); + ret = -ENOENT; goto exit; } } From 2a2b09c867fdac63f430a45051e7bd0c46edc381 Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Sat, 9 Jan 2021 17:36:58 -0500 Subject: [PATCH 005/182] HID cp2112: fix support for multiple gpiochips In lk 5.11.0-rc2 connecting a USB based Silicon Labs HID to I2C bridge evaluation board (CP2112EK) causes this warning: gpio gpiochip0: (cp2112_gpio): detected irqchip that is shared with multiple gpiochips: please fix the driver Simply copy what other gpio related drivers do to fix this particular warning: replicate the struct irq_chip object in each device instance rather than have a static object which makes that object (incorrectly) shared by each device. Signed-off-by: Douglas Gilbert Signed-off-by: Jiri Kosina --- drivers/hid/hid-cp2112.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/hid/hid-cp2112.c b/drivers/hid/hid-cp2112.c index 21e15627a461..477baa30889c 100644 --- a/drivers/hid/hid-cp2112.c +++ b/drivers/hid/hid-cp2112.c @@ -161,6 +161,7 @@ struct cp2112_device { atomic_t read_avail; atomic_t xfer_avail; struct gpio_chip gc; + struct irq_chip irq; u8 *in_out_buffer; struct mutex lock; @@ -1175,16 +1176,6 @@ static int cp2112_gpio_irq_type(struct irq_data *d, unsigned int type) return 0; } -static struct irq_chip cp2112_gpio_irqchip = { - .name = "cp2112-gpio", - .irq_startup = cp2112_gpio_irq_startup, - .irq_shutdown = cp2112_gpio_irq_shutdown, - .irq_ack = cp2112_gpio_irq_ack, - .irq_mask = cp2112_gpio_irq_mask, - .irq_unmask = cp2112_gpio_irq_unmask, - .irq_set_type = cp2112_gpio_irq_type, -}; - static int __maybe_unused cp2112_allocate_irq(struct cp2112_device *dev, int pin) { @@ -1339,8 +1330,17 @@ static int cp2112_probe(struct hid_device *hdev, const struct hid_device_id *id) dev->gc.can_sleep = 1; dev->gc.parent = &hdev->dev; + dev->irq.name = "cp2112-gpio"; + dev->irq.irq_startup = cp2112_gpio_irq_startup; + dev->irq.irq_shutdown = cp2112_gpio_irq_shutdown; + dev->irq.irq_ack = cp2112_gpio_irq_ack; + dev->irq.irq_mask = cp2112_gpio_irq_mask; + dev->irq.irq_unmask = cp2112_gpio_irq_unmask; + dev->irq.irq_set_type = cp2112_gpio_irq_type; + dev->irq.flags = IRQCHIP_MASK_ON_SUSPEND; + girq = &dev->gc.irq; - girq->chip = &cp2112_gpio_irqchip; + girq->chip = &dev->irq; /* The event comes from the outside so no parent handler */ girq->parent_handler = NULL; girq->num_parents = 0; From e29c62ffb008829dc8bcc0a2ec438adc25a8255e Mon Sep 17 00:00:00 2001 From: Jiapeng Zhong Date: Wed, 20 Jan 2021 15:34:30 +0800 Subject: [PATCH 006/182] HID: wacom: Assign boolean values to a bool variable Fix the following coccicheck warnings: ./drivers/hid/wacom_wac.c:2536:2-6: WARNING: Assignment of 0/1 to bool variable. Reported-by: Abaci Robot Signed-off-by: Jiapeng Zhong Signed-off-by: Jiri Kosina --- drivers/hid/wacom_wac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 44d715c12f6a..bdd9ba577150 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -2533,7 +2533,7 @@ static void wacom_wac_finger_slot(struct wacom_wac *wacom_wac, !wacom_wac->shared->is_touch_on) { if (!wacom_wac->shared->touch_down) return; - prox = 0; + prox = false; } wacom_wac->hid_data.num_received++; From a9e54f4b62dcfed4432a5a89b1cd5903737f6e83 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 28 Jan 2021 13:12:17 +0100 Subject: [PATCH 007/182] AMD_SFH: Removed unused activecontrolstatus member from the amd_mp2_dev struct This value is only used once inside amd_mp2_get_sensor_num(), so there is no need to store this in the amd_mp2_dev struct, amd_mp2_get_sensor_num() can simple use a local variable for this. Fixes: 4f567b9f8141 ("SFH: PCIe driver to add support of AMD sensor fusion hub") Signed-off-by: Hans de Goede Acked-by: Sandeep Singh --- drivers/hid/amd-sfh-hid/amd_sfh_pcie.c | 6 ++++-- drivers/hid/amd-sfh-hid/amd_sfh_pcie.h | 1 - 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c index dbac16641662..f3cdb4ea33da 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c @@ -76,9 +76,11 @@ void amd_stop_all_sensors(struct amd_mp2_dev *privdata) int amd_mp2_get_sensor_num(struct amd_mp2_dev *privdata, u8 *sensor_id) { int activestatus, num_of_sensors = 0; + u32 activecontrolstatus; + + activecontrolstatus = readl(privdata->mmio + AMD_P2C_MSG3); + activestatus = activecontrolstatus >> 4; - privdata->activecontrolstatus = readl(privdata->mmio + AMD_P2C_MSG3); - activestatus = privdata->activecontrolstatus >> 4; if (ACEL_EN & activestatus) sensor_id[num_of_sensors++] = accel_idx; diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h index 8f8d19b2cfe5..489415f7c22c 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h +++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h @@ -61,7 +61,6 @@ struct amd_mp2_dev { struct pci_dev *pdev; struct amdtp_cl_data *cl_data; void __iomem *mmio; - u32 activecontrolstatus; }; struct amd_mp2_sensor_info { From 952f7d10c6b1685c6700fb24cf4ecbcf26ede77e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 28 Jan 2021 13:12:18 +0100 Subject: [PATCH 008/182] AMD_SFH: Add sensor_mask module parameter Add a sensor_mask module parameter which can be used to override the sensor-mask read from the activestatus bits of the AMD_P2C_MSG3 registers. Some BIOS-es do not program the activestatus bits, leading to the AMD-SFH driver not registering any HID devices even though the laptop in question does actually have sensors. While at it also fix the wrong indentation of the MAGNO_EN define. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=199715 BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1651886 Fixes: 4f567b9f8141 ("SFH: PCIe driver to add support of AMD sensor fusion hub") Suggested-by: Richard Neumann Signed-off-by: Hans de Goede Acked-by: Sandeep Singh --- drivers/hid/amd-sfh-hid/amd_sfh_pcie.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c index f3cdb4ea33da..ab0a9443e252 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c @@ -22,9 +22,13 @@ #define ACEL_EN BIT(0) #define GYRO_EN BIT(1) -#define MAGNO_EN BIT(2) +#define MAGNO_EN BIT(2) #define ALS_EN BIT(19) +static int sensor_mask_override = -1; +module_param_named(sensor_mask, sensor_mask_override, int, 0444); +MODULE_PARM_DESC(sensor_mask, "override the detected sensors mask"); + void amd_start_sensor(struct amd_mp2_dev *privdata, struct amd_mp2_sensor_info info) { union sfh_cmd_param cmd_param; @@ -78,8 +82,12 @@ int amd_mp2_get_sensor_num(struct amd_mp2_dev *privdata, u8 *sensor_id) int activestatus, num_of_sensors = 0; u32 activecontrolstatus; - activecontrolstatus = readl(privdata->mmio + AMD_P2C_MSG3); - activestatus = activecontrolstatus >> 4; + if (sensor_mask_override >= 0) { + activestatus = sensor_mask_override; + } else { + activecontrolstatus = readl(privdata->mmio + AMD_P2C_MSG3); + activestatus = activecontrolstatus >> 4; + } if (ACEL_EN & activestatus) sensor_id[num_of_sensors++] = accel_idx; From 25615e454a0ec198254f17d2ed79b607cb755d0e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 28 Jan 2021 13:12:19 +0100 Subject: [PATCH 009/182] AMD_SFH: Add DMI quirk table for BIOS-es which don't set the activestatus bits Some BIOS-es do not initialize the activestatus bits of the AMD_P2C_MSG3 register. This cause the AMD_SFH driver to not register any sensors even though the laptops in question do have sensors. Add a DMI quirk-table for specifying sensor-mask overrides based on DMI match, to make the sensors work OOTB on these laptop models. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=199715 BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1651886 Fixes: 4f567b9f8141 ("SFH: PCIe driver to add support of AMD sensor fusion hub") Signed-off-by: Hans de Goede Acked-by: Sandeep Singh --- drivers/hid/amd-sfh-hid/amd_sfh_pcie.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c index ab0a9443e252..ddecc84fd6f0 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -77,11 +78,34 @@ void amd_stop_all_sensors(struct amd_mp2_dev *privdata) writel(cmd_base.ul, privdata->mmio + AMD_C2P_MSG0); } +static const struct dmi_system_id dmi_sensor_mask_overrides[] = { + { + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "HP ENVY x360 Convertible 13-ag0xxx"), + }, + .driver_data = (void *)(ACEL_EN | MAGNO_EN), + }, + { + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "HP ENVY x360 Convertible 15-cp0xxx"), + }, + .driver_data = (void *)(ACEL_EN | MAGNO_EN), + }, + { } +}; + int amd_mp2_get_sensor_num(struct amd_mp2_dev *privdata, u8 *sensor_id) { int activestatus, num_of_sensors = 0; + const struct dmi_system_id *dmi_id; u32 activecontrolstatus; + if (sensor_mask_override == -1) { + dmi_id = dmi_first_match(dmi_sensor_mask_overrides); + if (dmi_id) + sensor_mask_override = (long)dmi_id->driver_data; + } + if (sensor_mask_override >= 0) { activestatus = sensor_mask_override; } else { From 2d8aaa1720c6128ce263a2afcd3f8ee2e5551af8 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 8 Mar 2021 11:40:30 -0800 Subject: [PATCH 010/182] Input: n64joy - fix return value check in n64joy_probe() In case of error, the function devm_platform_ioremap_resource() returns ERR_PTR() and never returns NULL. The NULL test in the return value check should be replaced with IS_ERR(). Fixes: 3bdffa8ffb45 ("Input: Add N64 controller driver") Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Reviewed-by: Lauri Kasanen Link: https://lore.kernel.org/r/20210308122856.2177071-1-weiyongjun1@huawei.com Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/n64joy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/joystick/n64joy.c b/drivers/input/joystick/n64joy.c index 8bcc529942bc..9dbca366613e 100644 --- a/drivers/input/joystick/n64joy.c +++ b/drivers/input/joystick/n64joy.c @@ -252,8 +252,8 @@ static int __init n64joy_probe(struct platform_device *pdev) mutex_init(&priv->n64joy_mutex); priv->reg_base = devm_platform_ioremap_resource(pdev, 0); - if (!priv->reg_base) { - err = -EINVAL; + if (IS_ERR(priv->reg_base)) { + err = PTR_ERR(priv->reg_base); goto fail; } From 2fb164f0ce95e504e2688b4f984893c29ebd19ab Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Tue, 9 Mar 2021 01:01:07 +0100 Subject: [PATCH 011/182] mtd: rawnand: mtk: Fix WAITRDY break condition and timeout This fixes NAND_OP_WAITRDY_INSTR operation in the driver. Without this change the driver waits till the system is busy, but we should wait till the busy flag is cleared. The readl_poll_timeout() function gets a break condition, not a wait condition. In addition fix the timeout. The timeout_ms is given in ms, but the readl_poll_timeout() function takes the timeout in us. Multiple the given timeout by 1000 to convert it. Without this change, the driver does not work at all, it doesn't even identify the NAND chip. Fixes: 5197360f9e09 ("mtd: rawnand: mtk: Convert the driver to exec_op()") Signed-off-by: Hauke Mehrtens Reviewed-by: Boris Brezillon Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20210309000107.1368404-1-hauke@hauke-m.de --- drivers/mtd/nand/raw/mtk_nand.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/raw/mtk_nand.c b/drivers/mtd/nand/raw/mtk_nand.c index 57f1f1708994..5c5c92132287 100644 --- a/drivers/mtd/nand/raw/mtk_nand.c +++ b/drivers/mtd/nand/raw/mtk_nand.c @@ -488,8 +488,8 @@ static int mtk_nfc_exec_instr(struct nand_chip *chip, return 0; case NAND_OP_WAITRDY_INSTR: return readl_poll_timeout(nfc->regs + NFI_STA, status, - status & STA_BUSY, 20, - instr->ctx.waitrdy.timeout_ms); + !(status & STA_BUSY), 20, + instr->ctx.waitrdy.timeout_ms * 1000); default: break; } From 0c9fdcdba68208270ae85d39600ea97da1718344 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Thu, 14 Jan 2021 19:29:28 +0800 Subject: [PATCH 012/182] soc: qcom: geni: shield geni_icc_get() for ACPI boot Currently, GENI devices like i2c-qcom-geni fails to probe in ACPI boot, if interconnect support is enabled. That's because interconnect driver only supports DT right now. As interconnect is not necessarily required for basic function of GENI devices, let's shield geni_icc_get() call, and then all other ICC calls become nop due to NULL icc_path, so that GENI devices keep working for ACPI boot. Reviewed-by: Bjorn Andersson Signed-off-by: Shawn Guo Link: https://lore.kernel.org/r/20210114112928.11368-1-shawn.guo@linaro.org Signed-off-by: Bjorn Andersson --- drivers/soc/qcom/qcom-geni-se.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/soc/qcom/qcom-geni-se.c b/drivers/soc/qcom/qcom-geni-se.c index f42954e2c98e..c7c03ccfe888 100644 --- a/drivers/soc/qcom/qcom-geni-se.c +++ b/drivers/soc/qcom/qcom-geni-se.c @@ -760,6 +760,9 @@ int geni_icc_get(struct geni_se *se, const char *icc_ddr) int i, err; const char *icc_names[] = {"qup-core", "qup-config", icc_ddr}; + if (has_acpi_companion(se->dev)) + return 0; + for (i = 0; i < ARRAY_SIZE(se->icc_paths); i++) { if (!icc_names[i]) continue; From 1cbd44666216278bbb6a55bcb6b9283702171c77 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sun, 7 Mar 2021 06:06:28 +0200 Subject: [PATCH 013/182] dmaengine: xilinx: dpdma: Fix descriptor issuing on video group When multiple channels are part of a video group, the transfer is triggered only when all channels in the group are ready. The logic to do so is incorrect, as it causes the descriptors for all channels but the last one in a group to not being pushed to the hardware. Fix it. Signed-off-by: Laurent Pinchart Link: https://lore.kernel.org/r/20210307040629.29308-2-laurent.pinchart@ideasonboard.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dpdma.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/dma/xilinx/xilinx_dpdma.c b/drivers/dma/xilinx/xilinx_dpdma.c index 55df63dead8d..d504112c609e 100644 --- a/drivers/dma/xilinx/xilinx_dpdma.c +++ b/drivers/dma/xilinx/xilinx_dpdma.c @@ -839,6 +839,7 @@ static void xilinx_dpdma_chan_queue_transfer(struct xilinx_dpdma_chan *chan) struct xilinx_dpdma_tx_desc *desc; struct virt_dma_desc *vdesc; u32 reg, channels; + bool first_frame; lockdep_assert_held(&chan->lock); @@ -852,14 +853,6 @@ static void xilinx_dpdma_chan_queue_transfer(struct xilinx_dpdma_chan *chan) chan->running = true; } - if (chan->video_group) - channels = xilinx_dpdma_chan_video_group_ready(chan); - else - channels = BIT(chan->id); - - if (!channels) - return; - vdesc = vchan_next_desc(&chan->vchan); if (!vdesc) return; @@ -884,13 +877,26 @@ static void xilinx_dpdma_chan_queue_transfer(struct xilinx_dpdma_chan *chan) FIELD_PREP(XILINX_DPDMA_CH_DESC_START_ADDRE_MASK, upper_32_bits(sw_desc->dma_addr))); - if (chan->first_frame) + first_frame = chan->first_frame; + chan->first_frame = false; + + if (chan->video_group) { + channels = xilinx_dpdma_chan_video_group_ready(chan); + /* + * Trigger the transfer only when all channels in the group are + * ready. + */ + if (!channels) + return; + } else { + channels = BIT(chan->id); + } + + if (first_frame) reg = XILINX_DPDMA_GBL_TRIG_MASK(channels); else reg = XILINX_DPDMA_GBL_RETRIG_MASK(channels); - chan->first_frame = false; - dpdma_write(xdev->reg, XILINX_DPDMA_GBL, reg); } From 868833fbffbe51c487df4f95d4de9194264a4b30 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sun, 7 Mar 2021 06:06:29 +0200 Subject: [PATCH 014/182] dmaengine: xilinx: dpdma: Fix race condition in done IRQ The active descriptor pointer is accessed from different contexts, including different interrupt handlers, and its access must be protected by the channel's lock. This wasn't done in the done IRQ handler. Fix it. Signed-off-by: Laurent Pinchart Link: https://lore.kernel.org/r/20210307040629.29308-3-laurent.pinchart@ideasonboard.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dpdma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/dma/xilinx/xilinx_dpdma.c b/drivers/dma/xilinx/xilinx_dpdma.c index d504112c609e..70b29bd079c9 100644 --- a/drivers/dma/xilinx/xilinx_dpdma.c +++ b/drivers/dma/xilinx/xilinx_dpdma.c @@ -1048,13 +1048,14 @@ static int xilinx_dpdma_chan_stop(struct xilinx_dpdma_chan *chan) */ static void xilinx_dpdma_chan_done_irq(struct xilinx_dpdma_chan *chan) { - struct xilinx_dpdma_tx_desc *active = chan->desc.active; + struct xilinx_dpdma_tx_desc *active; unsigned long flags; spin_lock_irqsave(&chan->lock, flags); xilinx_dpdma_debugfs_desc_done_irq(chan); + active = chan->desc.active; if (active) vchan_cyclic_callback(&active->vdesc); else From 276559d8d02c2709281578976ca2f53bc62063d4 Mon Sep 17 00:00:00 2001 From: Ping Cheng Date: Thu, 11 Mar 2021 11:30:09 -0800 Subject: [PATCH 015/182] HID: wacom: set EV_KEY and EV_ABS only for non-HID_GENERIC type of devices Valid HID_GENERIC type of devices set EV_KEY and EV_ABS by wacom_map_usage. When *_input_capabilities are reached, those devices should already have their proper EV_* set. EV_KEY and EV_ABS only need to be set for non-HID_GENERIC type of devices in *_input_capabilities. Devices that don't support HID descitoprs will pass back to hid-input for registration without being accidentally rejected by the introduction of patch: "Input: refuse to register absolute devices without absinfo" Fixes: 6ecfe51b4082 ("Input: refuse to register absolute devices without absinfo") Signed-off-by: Ping Cheng Reviewed-by: Jason Gerecke Tested-by: Juan Garrido CC: stable@vger.kernel.org Signed-off-by: Jiri Kosina --- drivers/hid/wacom_wac.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index bdd9ba577150..2d70dc4bea65 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -3574,8 +3574,6 @@ int wacom_setup_pen_input_capabilities(struct input_dev *input_dev, { struct wacom_features *features = &wacom_wac->features; - input_dev->evbit[0] |= BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS); - if (!(features->device_type & WACOM_DEVICETYPE_PEN)) return -ENODEV; @@ -3590,6 +3588,7 @@ int wacom_setup_pen_input_capabilities(struct input_dev *input_dev, return 0; } + input_dev->evbit[0] |= BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS); __set_bit(BTN_TOUCH, input_dev->keybit); __set_bit(ABS_MISC, input_dev->absbit); @@ -3742,8 +3741,6 @@ int wacom_setup_touch_input_capabilities(struct input_dev *input_dev, { struct wacom_features *features = &wacom_wac->features; - input_dev->evbit[0] |= BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS); - if (!(features->device_type & WACOM_DEVICETYPE_TOUCH)) return -ENODEV; @@ -3756,6 +3753,7 @@ int wacom_setup_touch_input_capabilities(struct input_dev *input_dev, /* setup has already been done */ return 0; + input_dev->evbit[0] |= BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS); __set_bit(BTN_TOUCH, input_dev->keybit); if (features->touch_max == 1) { From 9de82caad0282205d4c38a39456bce58e3219540 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 22 Mar 2021 12:44:58 +0100 Subject: [PATCH 016/182] dax: avoid -Wempty-body warnings gcc warns about an empty body in an else statement: drivers/dax/bus.c: In function 'do_id_store': drivers/dax/bus.c:94:48: error: suggest braces around empty body in an 'else' statement [-Werror=empty-body] 94 | /* nothing to remove */; | ^ drivers/dax/bus.c:99:43: error: suggest braces around empty body in an 'else' statement [-Werror=empty-body] 99 | /* dax_id already added */; | ^ In both of these cases, the 'else' exists only to have a place to add a comment, but that comment doesn't really explain that much either, so the easiest way to shut up that warning is to just remove the else. Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20210322114514.3490752-1-arnd@kernel.org Signed-off-by: Dan Williams --- drivers/dax/bus.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index 452e85ae87a8..5aee26e1bbd6 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -90,13 +90,11 @@ static ssize_t do_id_store(struct device_driver *drv, const char *buf, list_add(&dax_id->list, &dax_drv->ids); } else rc = -ENOMEM; - } else - /* nothing to remove */; + } } else if (action == ID_REMOVE) { list_del(&dax_id->list); kfree(dax_id); - } else - /* dax_id already added */; + } mutex_unlock(&dax_bus_lock); if (rc < 0) From daa58c8eec0a65ac8e2e77ff3ea8a233d8eec954 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 23 Mar 2021 09:56:34 -0700 Subject: [PATCH 017/182] Input: i8042 - fix Pegatron C15B ID entry The Zenbook Flip entry that was added overwrites a previous one because of a typo: In file included from drivers/input/serio/i8042.h:23, from drivers/input/serio/i8042.c:131: drivers/input/serio/i8042-x86ia64io.h:591:28: error: initialized field overwritten [-Werror=override-init] 591 | .matches = { | ^ drivers/input/serio/i8042-x86ia64io.h:591:28: note: (near initialization for 'i8042_dmi_noselftest_table[0].matches') Add the missing separator between the two. Fixes: b5d6e7ab7fe7 ("Input: i8042 - add ASUS Zenbook Flip to noselftest list") Signed-off-by: Arnd Bergmann Reviewed-by: Hans de Goede Reviewed-by: Marcos Paulo de Souza Link: https://lore.kernel.org/r/20210323130623.2302402-1-arnd@kernel.org Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-x86ia64io.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index 9119e12a5778..a5a003553646 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -588,6 +588,7 @@ static const struct dmi_system_id i8042_dmi_noselftest_table[] = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), DMI_MATCH(DMI_CHASSIS_TYPE, "10"), /* Notebook */ }, + }, { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), DMI_MATCH(DMI_CHASSIS_TYPE, "31"), /* Convertible Notebook */ From 69d5ff3e9e51e23d5d81bf48480aa5671be67a71 Mon Sep 17 00:00:00 2001 From: Fabian Vogt Date: Tue, 23 Mar 2021 10:45:55 -0700 Subject: [PATCH 018/182] Input: nspire-keypad - enable interrupts only when opened The driver registers an interrupt handler in _probe, but didn't configure them until later when the _open function is called. In between, the keypad can fire an IRQ due to touchpad activity, which the handler ignores. This causes the kernel to disable the interrupt, blocking the keypad from working. Fix this by disabling interrupts before registering the handler. Additionally, disable them in _close, so that they're only enabled while open. Fixes: fc4f31461892 ("Input: add TI-Nspire keypad support") Signed-off-by: Fabian Vogt Link: https://lore.kernel.org/r/3383725.iizBOSrK1V@linux-e202.suse.de Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/nspire-keypad.c | 56 ++++++++++++++------------ 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/drivers/input/keyboard/nspire-keypad.c b/drivers/input/keyboard/nspire-keypad.c index 63d5e488137d..e9fa1423f136 100644 --- a/drivers/input/keyboard/nspire-keypad.c +++ b/drivers/input/keyboard/nspire-keypad.c @@ -93,9 +93,15 @@ static irqreturn_t nspire_keypad_irq(int irq, void *dev_id) return IRQ_HANDLED; } -static int nspire_keypad_chip_init(struct nspire_keypad *keypad) +static int nspire_keypad_open(struct input_dev *input) { + struct nspire_keypad *keypad = input_get_drvdata(input); unsigned long val = 0, cycles_per_us, delay_cycles, row_delay_cycles; + int error; + + error = clk_prepare_enable(keypad->clk); + if (error) + return error; cycles_per_us = (clk_get_rate(keypad->clk) / 1000000); if (cycles_per_us == 0) @@ -121,30 +127,6 @@ static int nspire_keypad_chip_init(struct nspire_keypad *keypad) keypad->int_mask = 1 << 1; writel(keypad->int_mask, keypad->reg_base + KEYPAD_INTMSK); - /* Disable GPIO interrupts to prevent hanging on touchpad */ - /* Possibly used to detect touchpad events */ - writel(0, keypad->reg_base + KEYPAD_UNKNOWN_INT); - /* Acknowledge existing interrupts */ - writel(~0, keypad->reg_base + KEYPAD_UNKNOWN_INT_STS); - - return 0; -} - -static int nspire_keypad_open(struct input_dev *input) -{ - struct nspire_keypad *keypad = input_get_drvdata(input); - int error; - - error = clk_prepare_enable(keypad->clk); - if (error) - return error; - - error = nspire_keypad_chip_init(keypad); - if (error) { - clk_disable_unprepare(keypad->clk); - return error; - } - return 0; } @@ -152,6 +134,11 @@ static void nspire_keypad_close(struct input_dev *input) { struct nspire_keypad *keypad = input_get_drvdata(input); + /* Disable interrupts */ + writel(0, keypad->reg_base + KEYPAD_INTMSK); + /* Acknowledge existing interrupts */ + writel(~0, keypad->reg_base + KEYPAD_INT); + clk_disable_unprepare(keypad->clk); } @@ -210,6 +197,25 @@ static int nspire_keypad_probe(struct platform_device *pdev) return -ENOMEM; } + error = clk_prepare_enable(keypad->clk); + if (error) { + dev_err(&pdev->dev, "failed to enable clock\n"); + return error; + } + + /* Disable interrupts */ + writel(0, keypad->reg_base + KEYPAD_INTMSK); + /* Acknowledge existing interrupts */ + writel(~0, keypad->reg_base + KEYPAD_INT); + + /* Disable GPIO interrupts to prevent hanging on touchpad */ + /* Possibly used to detect touchpad events */ + writel(0, keypad->reg_base + KEYPAD_UNKNOWN_INT); + /* Acknowledge existing GPIO interrupts */ + writel(~0, keypad->reg_base + KEYPAD_UNKNOWN_INT_STS); + + clk_disable_unprepare(keypad->clk); + input_set_drvdata(input, keypad); input->id.bustype = BUS_HOST; From 4b154b941f0ed49f901ac2f96e92ee07ff81d8d8 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 12 Mar 2021 13:58:08 +0100 Subject: [PATCH 019/182] arm64: tegra: Add unit-address for ACONNECT on Tegra186 The ACONNECT device tree node has a unit-address on all other SoC generations and there's really no reason not to have it on Tegra186. Reviewed-by: Jon Hunter Signed-off-by: Thierry Reding --- arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts | 2 +- arch/arm64/boot/dts/nvidia/tegra186.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts b/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts index 9f5f5e1fa82e..683743f81849 100644 --- a/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts +++ b/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts @@ -10,7 +10,7 @@ / { model = "NVIDIA Jetson TX2 Developer Kit"; compatible = "nvidia,p2771-0000", "nvidia,tegra186"; - aconnect { + aconnect@2900000 { status = "okay"; dma-controller@2930000 { diff --git a/arch/arm64/boot/dts/nvidia/tegra186.dtsi b/arch/arm64/boot/dts/nvidia/tegra186.dtsi index 02b26b39cedc..9f75bbf00cf7 100644 --- a/arch/arm64/boot/dts/nvidia/tegra186.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra186.dtsi @@ -73,7 +73,7 @@ ethernet@2490000 { snps,rxpbl = <8>; }; - aconnect { + aconnect@2900000 { compatible = "nvidia,tegra186-aconnect", "nvidia,tegra210-aconnect"; clocks = <&bpmp TEGRA186_CLK_APE>, From 75c82a25b59576c4b5a3fd0073744af865b1571f Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Fri, 12 Mar 2021 10:15:51 +0000 Subject: [PATCH 020/182] arm64: tegra: Set fw_devlink=on for Jetson TX2 Commit 5d25c476f252 ("Revert "arm64: tegra: Disable the ACONNECT for Jetson TX2"") re-enabled the Tegra ADMA and ACONNECT drivers to support audio on Jetson TX2. However, this revert was dependent upon commit e590474768f1 ("driver core: Set fw_devlink=on by default") and without this commit, enabling the ACONNECT is causing resume from system suspend to fail on Jetson TX2. Resume fails because the ACONNECT driver is being resumed before the BPMP driver, and the ACONNECT driver is attempting to power on a power-domain that is provided by the BPMP. Commit e590474768f1 ("driver core: Set fw_devlink=on by default") has since been temporarily reverted while some issues are being investigated. This is causing resume from system suspend on Jetson TX2 to fail again. Rather than disable the ACONNECT driver again, fix this by setting fw_devlink is set to 'on' for Jetson TX2 in the bootargs specified in device-tree. Fixes: 5d25c476f252 ("Revert arm64: tegra: Disable the ACONNECT for Jetson TX2") Signed-off-by: Jon Hunter Signed-off-by: Thierry Reding --- arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi b/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi index fd9177447711..fcd71bfc6707 100644 --- a/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi @@ -23,7 +23,7 @@ aliases { }; chosen { - bootargs = "earlycon console=ttyS0,115200n8"; + bootargs = "earlycon console=ttyS0,115200n8 fw_devlink=on"; stdout-path = "serial0:115200n8"; }; From bb05e11fff09d341047ce1984aa0d975ed8613e7 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Fri, 12 Mar 2021 09:42:35 +0000 Subject: [PATCH 021/182] arm64: tegra: Fix mmc0 alias for Jetson Xavier NX There are two variants of the Jetson Xavier NX platform; one has an eMMC and one as a micro SD-card slot. The SDHCI controller used by each variant is different, however, the current device-tree for both Xavier NX boards have the same SDHCI controller defined as 'mmc0' in the device-tree alias node. Fix this by correcting the 'mmc0' alias for the SD-card variant. Fixes: 3f9efbbe57bc ("arm64: tegra: Add support for Jetson Xavier NX") Signed-off-by: Jon Hunter Signed-off-by: Thierry Reding --- arch/arm64/boot/dts/nvidia/tegra194-p3668-0000.dtsi | 4 ++++ arch/arm64/boot/dts/nvidia/tegra194-p3668-0001.dtsi | 4 ++++ arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi | 1 - 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p3668-0000.dtsi b/arch/arm64/boot/dts/nvidia/tegra194-p3668-0000.dtsi index 7da3d48cb410..14da4206ea66 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194-p3668-0000.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194-p3668-0000.dtsi @@ -5,6 +5,10 @@ / { model = "NVIDIA Jetson Xavier NX (SD-card)"; compatible = "nvidia,p3668-0000", "nvidia,tegra194"; + aliases { + mmc0 = "/bus@0/mmc@3400000"; + }; + bus@0 { /* SDMMC1 (SD/MMC) */ mmc@3400000 { diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p3668-0001.dtsi b/arch/arm64/boot/dts/nvidia/tegra194-p3668-0001.dtsi index b7808648cfe4..f5a9ebbfb12f 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194-p3668-0001.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194-p3668-0001.dtsi @@ -5,6 +5,10 @@ / { model = "NVIDIA Jetson Xavier NX (eMMC)"; compatible = "nvidia,p3668-0001", "nvidia,tegra194"; + aliases { + mmc0 = "/bus@0/mmc@3460000"; + }; + bus@0 { /* SDMMC4 (eMMC) */ mmc@3460000 { diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi b/arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi index 4f12721c332b..f16b0aa8a374 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi @@ -14,7 +14,6 @@ aliases { i2c5 = "/bus@0/i2c@31c0000"; i2c6 = "/bus@0/i2c@c250000"; i2c7 = "/bus@0/i2c@31e0000"; - mmc0 = "/bus@0/mmc@3460000"; rtc0 = "/bpmp/i2c/pmic@3c"; rtc1 = "/bus@0/rtc@c2a0000"; serial0 = &tcu; From a3efe3f6d0eb64363f74af4b0e8ba6d19415cef2 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 24 Mar 2021 12:23:58 +0200 Subject: [PATCH 022/182] ARM: OMAP2+: Fix warning for omap_init_time_of() Fix warning: no previous prototype for 'omap_init_time_of'. Fixes: e69b4e1a7577 ("ARM: OMAP2+: Add omap_init_time_of()") Reported-by: kernel test robot Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/board-generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c index 7290f033fd2d..1610c567a6a3 100644 --- a/arch/arm/mach-omap2/board-generic.c +++ b/arch/arm/mach-omap2/board-generic.c @@ -33,7 +33,7 @@ static void __init __maybe_unused omap_generic_init(void) } /* Clocks are needed early, see drivers/clocksource for the rest */ -void __init __maybe_unused omap_init_time_of(void) +static void __init __maybe_unused omap_init_time_of(void) { omap_clk_init(); timer_probe(); From a1ebdb3741993f853865d1bd8f77881916ad53a7 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 24 Mar 2021 15:10:32 +0200 Subject: [PATCH 023/182] ARM: dts: Fix swapped mmc order for omap3 Also some omap3 devices like n900 seem to have eMMC and micro-sd swapped around with commit 21b2cec61c04 ("mmc: Set PROBE_PREFER_ASYNCHRONOUS for drivers that existed in v4.4"). Let's fix the issue with aliases as discussed on the mailing lists. While the mmc aliases should be board specific, let's first fix the issue with minimal changes. Cc: Aaro Koskinen Cc: Peter Ujfalusi Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap3.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/boot/dts/omap3.dtsi b/arch/arm/boot/dts/omap3.dtsi index 9dcae1f2bc99..c5b9da0d7e6c 100644 --- a/arch/arm/boot/dts/omap3.dtsi +++ b/arch/arm/boot/dts/omap3.dtsi @@ -24,6 +24,9 @@ aliases { i2c0 = &i2c1; i2c1 = &i2c2; i2c2 = &i2c3; + mmc0 = &mmc1; + mmc1 = &mmc2; + mmc2 = &mmc3; serial0 = &uart1; serial1 = &uart2; serial2 = &uart3; From d624833f5984d484c5e3196f34b926f9e71dafee Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 17 Feb 2021 20:26:23 +0100 Subject: [PATCH 024/182] ARM: 9063/1: mm: reduce maximum number of CPUs if DEBUG_KMAP_LOCAL is enabled The debugging code for kmap_local() doubles the number of per-CPU fixmap slots allocated for kmap_local(), in order to use half of them as guard regions. This causes the fixmap region to grow downwards beyond the start of its reserved window if the supported number of CPUs is large, and collide with the newly added virtual DT mapping right below it, which is obviously not good. One manifestation of this is EFI boot on a kernel built with NR_CPUS=32 and CONFIG_DEBUG_KMAP_LOCAL=y, which may pass the FDT in highmem, resulting in block entries below the fixmap region that the fixmap code misidentifies as fixmap table entries, and subsequently tries to dereference using a phys-to-virt translation that is only valid for lowmem. This results in a cryptic splat such as the one below. ftrace: allocating 45548 entries in 89 pages 8<--- cut here --- Unable to handle kernel paging request at virtual address fc6006f0 pgd = (ptrval) [fc6006f0] *pgd=80000040207003, *pmd=00000000 Internal error: Oops: a06 [#1] SMP ARM Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 5.11.0+ #382 Hardware name: Generic DT based system PC is at cpu_ca15_set_pte_ext+0x24/0x30 LR is at __set_fixmap+0xe4/0x118 pc : [] lr : [] psr: 400000d3 sp : c1601ed8 ip : 00400000 fp : 00800000 r10: 0000071f r9 : 00421000 r8 : 00c00000 r7 : 00c00000 r6 : 0000071f r5 : ffade000 r4 : 4040171f r3 : 00c00000 r2 : 4040171f r1 : c041ac78 r0 : fc6006f0 Flags: nZcv IRQs off FIQs off Mode SVC_32 ISA ARM Segment none Control: 30c5387d Table: 40203000 DAC: 00000001 Process swapper (pid: 0, stack limit = 0x(ptrval)) So let's limit CONFIG_NR_CPUS to 16 when CONFIG_DEBUG_KMAP_LOCAL=y. Also, fix the BUILD_BUG_ON() check that was supposed to catch this, by checking whether the region grows below the start address rather than above the end address. Fixes: 2a15ba82fa6ca3f3 ("ARM: highmem: Switch to generic kmap atomic") Reported-by: Peter Robinson Tested-by: Peter Robinson Signed-off-by: Ard Biesheuvel Acked-by: Thomas Gleixner Reviewed-by: Linus Walleij Signed-off-by: Russell King --- arch/arm/Kconfig | 8 +++++++- arch/arm/mm/mmu.c | 3 +-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 853aab5ab327..becc6d684051 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1292,9 +1292,15 @@ config KASAN_SHADOW_OFFSET config NR_CPUS int "Maximum number of CPUs (2-32)" - range 2 32 + range 2 16 if DEBUG_KMAP_LOCAL + range 2 32 if !DEBUG_KMAP_LOCAL depends on SMP default "4" + help + The maximum number of CPUs that the kernel can support. + Up to 32 CPUs can be supported, or up to 16 if kmap_local() + debugging is enabled, which uses half of the per-CPU fixmap + slots as guard regions. config HOTPLUG_CPU bool "Support for hot-pluggable CPUs" diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index a25b660c3017..c1e12aab67b8 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -387,8 +387,7 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) pte_t *pte = pte_offset_fixmap(pmd_off_k(vaddr), vaddr); /* Make sure fixmap region does not exceed available allocation. */ - BUILD_BUG_ON(FIXADDR_START + (__end_of_fixed_addresses * PAGE_SIZE) > - FIXADDR_END); + BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) < FIXADDR_START); BUG_ON(idx >= __end_of_fixed_addresses); /* we only support device mappings until pgprot_kernel has been set */ From 45c2f70cba3a7eff34574103b2e2b901a5f771aa Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Thu, 11 Mar 2021 13:32:16 +0100 Subject: [PATCH 025/182] ARM: 9069/1: NOMMU: Fix conversion for_each_membock() to for_each_mem_range() for_each_mem_range() uses a loop variable, yet looking into code it is not just iteration counter but more complex entity which encodes information about memblock. Thus condition i == 0 looks fragile. Indeed, it broke boot of R-class platforms since it never took i == 0 path (due to i was set to 1). Fix that with restoring original flag check. Fixes: b10d6bca8720 ("arch, drivers: replace for_each_membock() with for_each_mem_range()") Signed-off-by: Vladimir Murzin Acked-by: Mike Rapoport Signed-off-by: Russell King --- arch/arm/mm/pmsa-v7.c | 4 +++- arch/arm/mm/pmsa-v8.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/arm/mm/pmsa-v7.c b/arch/arm/mm/pmsa-v7.c index 88950e41a3a9..59d916ccdf25 100644 --- a/arch/arm/mm/pmsa-v7.c +++ b/arch/arm/mm/pmsa-v7.c @@ -235,6 +235,7 @@ void __init pmsav7_adjust_lowmem_bounds(void) phys_addr_t mem_end; phys_addr_t reg_start, reg_end; unsigned int mem_max_regions; + bool first = true; int num; u64 i; @@ -263,7 +264,7 @@ void __init pmsav7_adjust_lowmem_bounds(void) #endif for_each_mem_range(i, ®_start, ®_end) { - if (i == 0) { + if (first) { phys_addr_t phys_offset = PHYS_OFFSET; /* @@ -275,6 +276,7 @@ void __init pmsav7_adjust_lowmem_bounds(void) mem_start = reg_start; mem_end = reg_end; specified_mem_size = mem_end - mem_start; + first = false; } else { /* * memblock auto merges contiguous blocks, remove diff --git a/arch/arm/mm/pmsa-v8.c b/arch/arm/mm/pmsa-v8.c index 2de019f7503e..8359748a19a1 100644 --- a/arch/arm/mm/pmsa-v8.c +++ b/arch/arm/mm/pmsa-v8.c @@ -95,10 +95,11 @@ void __init pmsav8_adjust_lowmem_bounds(void) { phys_addr_t mem_end; phys_addr_t reg_start, reg_end; + bool first = true; u64 i; for_each_mem_range(i, ®_start, ®_end) { - if (i == 0) { + if (first) { phys_addr_t phys_offset = PHYS_OFFSET; /* @@ -107,6 +108,7 @@ void __init pmsav8_adjust_lowmem_bounds(void) if (reg_start != phys_offset) panic("First memory bank must be contiguous from PHYS_OFFSET"); mem_end = reg_end; + first = false; } else { /* * memblock auto merges contiguous blocks, remove From 30e3b4f256b4e366a61658c294f6a21b8626dda7 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 25 Mar 2021 10:26:21 +0000 Subject: [PATCH 026/182] ARM: footbridge: fix PCI interrupt mapping Since commit 30fdfb929e82 ("PCI: Add a call to pci_assign_irq() in pci_device_probe()"), the PCI code will call the IRQ mapping function whenever a PCI driver is probed. If these are marked as __init, this causes an oops if a PCI driver is loaded or bound after the kernel has initialised. Fixes: 30fdfb929e82 ("PCI: Add a call to pci_assign_irq() in pci_device_probe()") Signed-off-by: Russell King --- arch/arm/mach-footbridge/cats-pci.c | 4 ++-- arch/arm/mach-footbridge/ebsa285-pci.c | 4 ++-- arch/arm/mach-footbridge/netwinder-pci.c | 2 +- arch/arm/mach-footbridge/personal-pci.c | 5 ++--- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/arm/mach-footbridge/cats-pci.c b/arch/arm/mach-footbridge/cats-pci.c index 0b2fd7e2e9b4..90b1e9be430e 100644 --- a/arch/arm/mach-footbridge/cats-pci.c +++ b/arch/arm/mach-footbridge/cats-pci.c @@ -15,14 +15,14 @@ #include /* cats host-specific stuff */ -static int irqmap_cats[] __initdata = { IRQ_PCI, IRQ_IN0, IRQ_IN1, IRQ_IN3 }; +static int irqmap_cats[] = { IRQ_PCI, IRQ_IN0, IRQ_IN1, IRQ_IN3 }; static u8 cats_no_swizzle(struct pci_dev *dev, u8 *pin) { return 0; } -static int __init cats_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +static int cats_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { if (dev->irq >= 255) return -1; /* not a valid interrupt. */ diff --git a/arch/arm/mach-footbridge/ebsa285-pci.c b/arch/arm/mach-footbridge/ebsa285-pci.c index 6f28aaa9ca79..c3f280d08fa7 100644 --- a/arch/arm/mach-footbridge/ebsa285-pci.c +++ b/arch/arm/mach-footbridge/ebsa285-pci.c @@ -14,9 +14,9 @@ #include #include -static int irqmap_ebsa285[] __initdata = { IRQ_IN3, IRQ_IN1, IRQ_IN0, IRQ_PCI }; +static int irqmap_ebsa285[] = { IRQ_IN3, IRQ_IN1, IRQ_IN0, IRQ_PCI }; -static int __init ebsa285_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +static int ebsa285_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { if (dev->vendor == PCI_VENDOR_ID_CONTAQ && dev->device == PCI_DEVICE_ID_CONTAQ_82C693) diff --git a/arch/arm/mach-footbridge/netwinder-pci.c b/arch/arm/mach-footbridge/netwinder-pci.c index 9473aa0305e5..e8304392074b 100644 --- a/arch/arm/mach-footbridge/netwinder-pci.c +++ b/arch/arm/mach-footbridge/netwinder-pci.c @@ -18,7 +18,7 @@ * We now use the slot ID instead of the device identifiers to select * which interrupt is routed where. */ -static int __init netwinder_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +static int netwinder_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { switch (slot) { case 0: /* host bridge */ diff --git a/arch/arm/mach-footbridge/personal-pci.c b/arch/arm/mach-footbridge/personal-pci.c index 4391e433a4b2..9d19aa98a663 100644 --- a/arch/arm/mach-footbridge/personal-pci.c +++ b/arch/arm/mach-footbridge/personal-pci.c @@ -14,13 +14,12 @@ #include #include -static int irqmap_personal_server[] __initdata = { +static int irqmap_personal_server[] = { IRQ_IN0, IRQ_IN1, IRQ_IN2, IRQ_IN3, 0, 0, 0, IRQ_DOORBELLHOST, IRQ_DMA1, IRQ_DMA2, IRQ_PCI }; -static int __init personal_server_map_irq(const struct pci_dev *dev, u8 slot, - u8 pin) +static int personal_server_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { unsigned char line; From 405fa9e9d8664e830982c5fbcb70f9ba8656bafc Mon Sep 17 00:00:00 2001 From: Sameer Pujar Date: Mon, 15 Mar 2021 23:10:15 +0530 Subject: [PATCH 027/182] arm64: tegra: Move clocks from RT5658 endpoint to device node An endpoint is not a device and it is recommended to use clocks property in device node. RT5658 Codec binding already specifies the usage of clocks property. Thus move the clocks from endpoint to device node. Fixes: 5b4f6323096a ("arm64: tegra: Audio graph sound card for Jetson AGX Xavier") Suggested-by: Rob Herring Signed-off-by: Sameer Pujar Signed-off-by: Thierry Reding --- arch/arm64/boot/dts/nvidia/tegra194-p2972-0000.dts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p2972-0000.dts b/arch/arm64/boot/dts/nvidia/tegra194-p2972-0000.dts index 2888efc42ba1..d618f197a1d3 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194-p2972-0000.dts +++ b/arch/arm64/boot/dts/nvidia/tegra194-p2972-0000.dts @@ -651,6 +651,8 @@ rt5658: audio-codec@1a { reg = <0x1a>; interrupt-parent = <&gpio>; interrupts = ; + clocks = <&bpmp TEGRA194_CLK_AUD_MCLK>; + clock-names = "mclk"; realtek,jd-src = <2>; sound-name-prefix = "CVB-RT"; @@ -658,7 +660,6 @@ port { rt5658_ep: endpoint { remote-endpoint = <&i2s1_dap_ep>; mclk-fs = <256>; - clocks = <&bpmp TEGRA194_CLK_AUD_MCLK>; }; }; }; From 781bab3238c21c8cc6d1999a6ee43de76252fdfd Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Fri, 26 Mar 2021 15:19:27 -0700 Subject: [PATCH 028/182] Input: elants_i2c - fix division by zero if firmware reports zero phys size Touchscreen firmware of ASUS Transformer TF700T reports zeros for the phys size. Hence check whether the size is zero and don't set the resolution in this case. Reported-by: Jasper Korten Signed-off-by: Dmitry Osipenko Link: https://lore.kernel.org/r/20210302100824.3423-1-digetx@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/elants_i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/elants_i2c.c b/drivers/input/touchscreen/elants_i2c.c index 4c2b579f6c8b..78172c31529a 100644 --- a/drivers/input/touchscreen/elants_i2c.c +++ b/drivers/input/touchscreen/elants_i2c.c @@ -1441,7 +1441,7 @@ static int elants_i2c_probe(struct i2c_client *client, touchscreen_parse_properties(ts->input, true, &ts->prop); - if (ts->chip_id == EKTF3624) { + if (ts->chip_id == EKTF3624 && ts->phy_x && ts->phy_y) { /* calculate resolution from size */ ts->x_res = DIV_ROUND_CLOSEST(ts->prop.max_x, ts->phy_x); ts->y_res = DIV_ROUND_CLOSEST(ts->prop.max_y, ts->phy_y); From 56cfe6f820a6315291eb5a1b82bb49633b993d3b Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 28 Mar 2021 22:57:48 -0700 Subject: [PATCH 029/182] Input: elants_i2c - drop zero-checking of ABS_MT_TOUCH_MAJOR resolution Drop unnecessary zero-checking of ABS_MT_TOUCH_MAJOR resolution since there is no difference between setting resolution to 0 vs not setting it at all. This change makes code cleaner a tad. Suggested-by: Dmitry Torokhov Signed-off-by: Dmitry Osipenko Link: https://lore.kernel.org/r/20210328235507.19240-1-digetx@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/elants_i2c.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/input/touchscreen/elants_i2c.c b/drivers/input/touchscreen/elants_i2c.c index 78172c31529a..5f7706febcb0 100644 --- a/drivers/input/touchscreen/elants_i2c.c +++ b/drivers/input/touchscreen/elants_i2c.c @@ -1449,8 +1449,7 @@ static int elants_i2c_probe(struct i2c_client *client, input_abs_set_res(ts->input, ABS_MT_POSITION_X, ts->x_res); input_abs_set_res(ts->input, ABS_MT_POSITION_Y, ts->y_res); - if (ts->major_res > 0) - input_abs_set_res(ts->input, ABS_MT_TOUCH_MAJOR, ts->major_res); + input_abs_set_res(ts->input, ABS_MT_TOUCH_MAJOR, ts->major_res); error = input_mt_init_slots(ts->input, MAX_CONTACT_NUM, INPUT_MT_DIRECT | INPUT_MT_DROP_UNUSED); From 0e07e25b481aa021e4b48085ecb8a049e9614510 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 30 Mar 2021 16:24:11 +0200 Subject: [PATCH 030/182] netfilter: flowtable: fix NAT IPv6 offload mangling Fix out-of-bound access in the address array. Fixes: 5c27d8d76ce8 ("netfilter: nf_flow_table_offload: add IPv6 support") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_offload.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 2a6993fa40d7..1c5460e7bce8 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -305,12 +305,12 @@ static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule, const __be32 *addr, const __be32 *mask) { struct flow_action_entry *entry; - int i; + int i, j; - for (i = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32)) { + for (i = 0, j = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32), j++) { entry = flow_action_entry_next(flow_rule); flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6, - offset + i, &addr[i], mask); + offset + i, &addr[j], mask); } } From fbea31808ca124dd73ff6bb1e67c9af4607c3e32 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 31 Mar 2021 01:04:45 +0200 Subject: [PATCH 031/182] netfilter: conntrack: do not print icmpv6 as unknown via /proc /proc/net/nf_conntrack shows icmpv6 as unknown. Fixes: 09ec82f5af99 ("netfilter: conntrack: remove protocol name from l4proto struct") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_standalone.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 0ee702d374b0..c6c0cb465664 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -266,6 +266,7 @@ static const char* l4proto_name(u16 proto) case IPPROTO_GRE: return "gre"; case IPPROTO_SCTP: return "sctp"; case IPPROTO_UDPLITE: return "udplite"; + case IPPROTO_ICMPV6: return "icmpv6"; } return "unknown"; From fc85dc42a38405099f97aa2af709fe9504a82508 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 31 Mar 2021 09:27:41 +0300 Subject: [PATCH 032/182] ARM: OMAP2+: Fix uninitialized sr_inst Fix uninitialized sr_inst. Fixes: fbfa463be8dc ("ARM: OMAP2+: Fix smartreflex init regression after dropping legacy data") Reported-by: kernel test robot Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/sr_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/sr_device.c b/arch/arm/mach-omap2/sr_device.c index 17b66f0d0dee..605925684b0a 100644 --- a/arch/arm/mach-omap2/sr_device.c +++ b/arch/arm/mach-omap2/sr_device.c @@ -188,7 +188,7 @@ static const char * const dra7_sr_instances[] = { int __init omap_devinit_smartreflex(void) { - const char * const *sr_inst; + const char * const *sr_inst = NULL; int i, nr_sr = 0; if (soc_is_omap44xx()) { From 23cf00ddd2e1aacf1873e43f5e0c519c120daf7a Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Mon, 29 Mar 2021 14:41:12 +0300 Subject: [PATCH 033/182] gpio: sysfs: Obey valid_mask Do not allow exporting GPIOs which are set invalid by the driver's valid mask. Fixes: 726cb3ba4969 ("gpiolib: Support 'gpio-reserved-ranges' property") Signed-off-by: Matti Vaittinen Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-sysfs.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpio/gpiolib-sysfs.c b/drivers/gpio/gpiolib-sysfs.c index 26c5466b8179..ae49bb23c6ed 100644 --- a/drivers/gpio/gpiolib-sysfs.c +++ b/drivers/gpio/gpiolib-sysfs.c @@ -458,6 +458,8 @@ static ssize_t export_store(struct class *class, long gpio; struct gpio_desc *desc; int status; + struct gpio_chip *gc; + int offset; status = kstrtol(buf, 0, &gpio); if (status < 0) @@ -469,6 +471,12 @@ static ssize_t export_store(struct class *class, pr_warn("%s: invalid GPIO %ld\n", __func__, gpio); return -EINVAL; } + gc = desc->gdev->chip; + offset = gpio_chip_hwgpio(desc); + if (!gpiochip_line_is_valid(gc, offset)) { + pr_warn("%s: GPIO %ld masked\n", __func__, gpio); + return -EINVAL; + } /* No extra locking here; FLAG_SYSFS just signifies that the * request and export were done by on behalf of userspace, so From 185f2e5f51c2029efd9dd26cceb968a44fe053c6 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 1 Apr 2021 09:51:10 -0700 Subject: [PATCH 034/182] arm64: fix inline asm in load_unaligned_zeropad() The inline asm's addr operand is marked as input-only, however in the case where an exception is taken it may be modified by the BIC instruction on the exception path. Fix the problem by using a temporary register as the destination register for the BIC instruction. Signed-off-by: Peter Collingbourne Cc: stable@vger.kernel.org Link: https://linux-review.googlesource.com/id/I84538c8a2307d567b4f45bb20b715451005f9617 Link: https://lore.kernel.org/r/20210401165110.3952103-1-pcc@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/word-at-a-time.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h index 3333950b5909..ea487218db79 100644 --- a/arch/arm64/include/asm/word-at-a-time.h +++ b/arch/arm64/include/asm/word-at-a-time.h @@ -53,7 +53,7 @@ static inline unsigned long find_zero(unsigned long mask) */ static inline unsigned long load_unaligned_zeropad(const void *addr) { - unsigned long ret, offset; + unsigned long ret, tmp; /* Load word from unaligned pointer addr */ asm( @@ -61,9 +61,9 @@ static inline unsigned long load_unaligned_zeropad(const void *addr) "2:\n" " .pushsection .fixup,\"ax\"\n" " .align 2\n" - "3: and %1, %2, #0x7\n" - " bic %2, %2, #0x7\n" - " ldr %0, [%2]\n" + "3: bic %1, %2, #0x7\n" + " ldr %0, [%1]\n" + " and %1, %2, #0x7\n" " lsl %1, %1, #0x3\n" #ifndef __AARCH64EB__ " lsr %0, %0, %1\n" @@ -73,7 +73,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr) " b 2b\n" " .popsection\n" _ASM_EXTABLE(1b, 3b) - : "=&r" (ret), "=&r" (offset) + : "=&r" (ret), "=&r" (tmp) : "r" (addr), "Q" (*(unsigned long *)addr)); return ret; From 6eff5721933c08c3b76d6126aee24d8f134518ef Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 1 Apr 2021 07:33:14 -0700 Subject: [PATCH 035/182] cxl/mem: Use sysfs_emit() for attribute show routines While none the CXL sysfs attributes are threatening to overrun a PAGE_SIZE of output, it is good form to use the recommended helpers. Fixes: b39cb1052a5c ("cxl/mem: Register CXL memX devices") Reported-by: Jason Gunthorpe Reviewed-by: Ben Widawsky Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/161728759424.2474381.11231441014951343463.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/mem.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 244cb7d89678..832582033683 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -1066,7 +1066,7 @@ static ssize_t firmware_version_show(struct device *dev, struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_mem *cxlm = cxlmd->cxlm; - return sprintf(buf, "%.16s\n", cxlm->firmware_version); + return sysfs_emit(buf, "%.16s\n", cxlm->firmware_version); } static DEVICE_ATTR_RO(firmware_version); @@ -1076,7 +1076,7 @@ static ssize_t payload_max_show(struct device *dev, struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_mem *cxlm = cxlmd->cxlm; - return sprintf(buf, "%zu\n", cxlm->payload_size); + return sysfs_emit(buf, "%zu\n", cxlm->payload_size); } static DEVICE_ATTR_RO(payload_max); @@ -1087,7 +1087,7 @@ static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr, struct cxl_mem *cxlm = cxlmd->cxlm; unsigned long long len = range_len(&cxlm->ram_range); - return sprintf(buf, "%#llx\n", len); + return sysfs_emit(buf, "%#llx\n", len); } static struct device_attribute dev_attr_ram_size = @@ -1100,7 +1100,7 @@ static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr, struct cxl_mem *cxlm = cxlmd->cxlm; unsigned long long len = range_len(&cxlm->pmem_range); - return sprintf(buf, "%#llx\n", len); + return sysfs_emit(buf, "%#llx\n", len); } static struct device_attribute dev_attr_pmem_size = From 5877515912cc4f0d67071b7cee15076ebef24708 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 1 Apr 2021 07:33:19 -0700 Subject: [PATCH 036/182] cxl/mem: Fix synchronization mechanism for device removal vs ioctl operations The percpu_ref to gate whether cxl_memdev_ioctl() is free to use the driver context (@cxlm) to issue I/O is overkill, implemented incorrectly (missing a device reference before accessing the percpu_ref), and the complexities of shutting down a percpu_ref contributed to a bug in the error unwind in cxl_mem_add_memdev() (missing put_device() to be fixed separately). Use an rwsem to explicitly synchronize the usage of cxlmd->cxlm, and add the missing reference counting for cxlmd in cxl_memdev_open() and cxl_memdev_release_file(). Fixes: b39cb1052a5c ("cxl/mem: Register CXL memX devices") Reported-by: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/161728759948.2474381.17481500816783671817.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/mem.c | 101 ++++++++++++++++++++++++---------------------- 1 file changed, 52 insertions(+), 49 deletions(-) diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 832582033683..438f0861c46c 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -96,21 +96,18 @@ struct mbox_cmd { * @dev: driver core device object * @cdev: char dev core object for ioctl operations * @cxlm: pointer to the parent device driver data - * @ops_active: active user of @cxlm in ops handlers - * @ops_dead: completion when all @cxlm ops users have exited * @id: id number of this memdev instance. */ struct cxl_memdev { struct device dev; struct cdev cdev; struct cxl_mem *cxlm; - struct percpu_ref ops_active; - struct completion ops_dead; int id; }; static int cxl_mem_major; static DEFINE_IDA(cxl_memdev_ida); +static DECLARE_RWSEM(cxl_memdev_rwsem); static struct dentry *cxl_debugfs; static bool cxl_raw_allow_all; @@ -776,26 +773,43 @@ static long __cxl_memdev_ioctl(struct cxl_memdev *cxlmd, unsigned int cmd, static long cxl_memdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct cxl_memdev *cxlmd; - struct inode *inode; - int rc = -ENOTTY; + struct cxl_memdev *cxlmd = file->private_data; + int rc = -ENXIO; - inode = file_inode(file); - cxlmd = container_of(inode->i_cdev, typeof(*cxlmd), cdev); - - if (!percpu_ref_tryget_live(&cxlmd->ops_active)) - return -ENXIO; - - rc = __cxl_memdev_ioctl(cxlmd, cmd, arg); - - percpu_ref_put(&cxlmd->ops_active); + down_read(&cxl_memdev_rwsem); + if (cxlmd->cxlm) + rc = __cxl_memdev_ioctl(cxlmd, cmd, arg); + up_read(&cxl_memdev_rwsem); return rc; } +static int cxl_memdev_open(struct inode *inode, struct file *file) +{ + struct cxl_memdev *cxlmd = + container_of(inode->i_cdev, typeof(*cxlmd), cdev); + + get_device(&cxlmd->dev); + file->private_data = cxlmd; + + return 0; +} + +static int cxl_memdev_release_file(struct inode *inode, struct file *file) +{ + struct cxl_memdev *cxlmd = + container_of(inode->i_cdev, typeof(*cxlmd), cdev); + + put_device(&cxlmd->dev); + + return 0; +} + static const struct file_operations cxl_memdev_fops = { .owner = THIS_MODULE, .unlocked_ioctl = cxl_memdev_ioctl, + .open = cxl_memdev_open, + .release = cxl_memdev_release_file, .compat_ioctl = compat_ptr_ioctl, .llseek = noop_llseek, }; @@ -1049,7 +1063,6 @@ static void cxl_memdev_release(struct device *dev) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); - percpu_ref_exit(&cxlmd->ops_active); ida_free(&cxl_memdev_ida, cxlmd->id); kfree(cxlmd); } @@ -1150,26 +1163,23 @@ static const struct device_type cxl_memdev_type = { .groups = cxl_memdev_attribute_groups, }; -static void cxlmdev_unregister(void *_cxlmd) +static void cxl_memdev_shutdown(struct cxl_memdev *cxlmd) +{ + down_write(&cxl_memdev_rwsem); + cxlmd->cxlm = NULL; + up_write(&cxl_memdev_rwsem); +} + +static void cxl_memdev_unregister(void *_cxlmd) { struct cxl_memdev *cxlmd = _cxlmd; struct device *dev = &cxlmd->dev; - percpu_ref_kill(&cxlmd->ops_active); cdev_device_del(&cxlmd->cdev, dev); - wait_for_completion(&cxlmd->ops_dead); - cxlmd->cxlm = NULL; + cxl_memdev_shutdown(cxlmd); put_device(dev); } -static void cxlmdev_ops_active_release(struct percpu_ref *ref) -{ - struct cxl_memdev *cxlmd = - container_of(ref, typeof(*cxlmd), ops_active); - - complete(&cxlmd->ops_dead); -} - static int cxl_mem_add_memdev(struct cxl_mem *cxlm) { struct pci_dev *pdev = cxlm->pdev; @@ -1181,17 +1191,6 @@ static int cxl_mem_add_memdev(struct cxl_mem *cxlm) cxlmd = kzalloc(sizeof(*cxlmd), GFP_KERNEL); if (!cxlmd) return -ENOMEM; - init_completion(&cxlmd->ops_dead); - - /* - * @cxlm is deallocated when the driver unbinds so operations - * that are using it need to hold a live reference. - */ - cxlmd->cxlm = cxlm; - rc = percpu_ref_init(&cxlmd->ops_active, cxlmdev_ops_active_release, 0, - GFP_KERNEL); - if (rc) - goto err_ref; rc = ida_alloc_range(&cxl_memdev_ida, 0, CXL_MEM_MAX_DEVS, GFP_KERNEL); if (rc < 0) @@ -1209,23 +1208,27 @@ static int cxl_mem_add_memdev(struct cxl_mem *cxlm) cdev = &cxlmd->cdev; cdev_init(cdev, &cxl_memdev_fops); + /* + * Activate ioctl operations, no cxl_memdev_rwsem manipulation + * needed as this is ordered with cdev_add() publishing the device. + */ + cxlmd->cxlm = cxlm; + rc = cdev_device_add(cdev, dev); if (rc) goto err_add; - return devm_add_action_or_reset(dev->parent, cxlmdev_unregister, cxlmd); + return devm_add_action_or_reset(dev->parent, cxl_memdev_unregister, + cxlmd); err_add: + /* + * The cdev was briefly live, shutdown any ioctl operations that + * saw that state. + */ + cxl_memdev_shutdown(cxlmd); ida_free(&cxl_memdev_ida, cxlmd->id); err_id: - /* - * Theoretically userspace could have already entered the fops, - * so flush ops_active. - */ - percpu_ref_kill(&cxlmd->ops_active); - wait_for_completion(&cxlmd->ops_dead); - percpu_ref_exit(&cxlmd->ops_active); -err_ref: kfree(cxlmd); return rc; From 1c3333a28d4532cfc37d4d25bfc76654a0c76643 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 1 Apr 2021 07:33:25 -0700 Subject: [PATCH 037/182] cxl/mem: Do not rely on device_add() side effects for dev_set_name() failures While device_add() will happen to catch dev_set_name() failures it is a broken pattern to follow given that the core may try to fall back to a different name. Add explicit checking for dev_set_name() failures to be cleaned up by put_device(). Skip cdev_device_add() and proceed directly to put_device() if the name set fails. This type of bug is easier to see if 'alloc' is split from 'add' operations that require put_device() on failure. So cxl_memdev_alloc() is split out as a result. Fixes: b39cb1052a5c ("cxl/mem: Register CXL memX devices") Reported-by: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/161728760514.2474381.1163928273337158134.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/mem.c | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 438f0861c46c..da93b633531f 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -1180,7 +1180,7 @@ static void cxl_memdev_unregister(void *_cxlmd) put_device(dev); } -static int cxl_mem_add_memdev(struct cxl_mem *cxlm) +static struct cxl_memdev *cxl_memdev_alloc(struct cxl_mem *cxlm) { struct pci_dev *pdev = cxlm->pdev; struct cxl_memdev *cxlmd; @@ -1190,11 +1190,11 @@ static int cxl_mem_add_memdev(struct cxl_mem *cxlm) cxlmd = kzalloc(sizeof(*cxlmd), GFP_KERNEL); if (!cxlmd) - return -ENOMEM; + return ERR_PTR(-ENOMEM); rc = ida_alloc_range(&cxl_memdev_ida, 0, CXL_MEM_MAX_DEVS, GFP_KERNEL); if (rc < 0) - goto err_id; + goto err; cxlmd->id = rc; dev = &cxlmd->dev; @@ -1203,10 +1203,31 @@ static int cxl_mem_add_memdev(struct cxl_mem *cxlm) dev->bus = &cxl_bus_type; dev->devt = MKDEV(cxl_mem_major, cxlmd->id); dev->type = &cxl_memdev_type; - dev_set_name(dev, "mem%d", cxlmd->id); cdev = &cxlmd->cdev; cdev_init(cdev, &cxl_memdev_fops); + return cxlmd; + +err: + kfree(cxlmd); + return ERR_PTR(rc); +} + +static int cxl_mem_add_memdev(struct cxl_mem *cxlm) +{ + struct cxl_memdev *cxlmd; + struct device *dev; + struct cdev *cdev; + int rc; + + cxlmd = cxl_memdev_alloc(cxlm); + if (IS_ERR(cxlmd)) + return PTR_ERR(cxlmd); + + dev = &cxlmd->dev; + rc = dev_set_name(dev, "mem%d", cxlmd->id); + if (rc) + goto err; /* * Activate ioctl operations, no cxl_memdev_rwsem manipulation @@ -1214,23 +1235,21 @@ static int cxl_mem_add_memdev(struct cxl_mem *cxlm) */ cxlmd->cxlm = cxlm; + cdev = &cxlmd->cdev; rc = cdev_device_add(cdev, dev); if (rc) - goto err_add; + goto err; return devm_add_action_or_reset(dev->parent, cxl_memdev_unregister, cxlmd); -err_add: +err: /* * The cdev was briefly live, shutdown any ioctl operations that * saw that state. */ cxl_memdev_shutdown(cxlmd); - ida_free(&cxl_memdev_ida, cxlmd->id); -err_id: - kfree(cxlmd); - + put_device(dev); return rc; } From 7eda6457a9ca4dc9754e1158c3794e4487ea4392 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 1 Apr 2021 07:33:30 -0700 Subject: [PATCH 038/182] cxl/mem: Disable cxl device power management There is no power management of cxl virtual devices, disable device-power-management and runtime-power-management to prevent userspace from growing expectations of those attributes appearing. They can be added back in the future if needed. Reviewed-by: Ben Widawsky Link: https://lore.kernel.org/r/161728761025.2474381.808344500111924819.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/mem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index da93b633531f..52f7da49c560 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -1203,6 +1203,7 @@ static struct cxl_memdev *cxl_memdev_alloc(struct cxl_mem *cxlm) dev->bus = &cxl_bus_type; dev->devt = MKDEV(cxl_mem_major, cxlmd->id); dev->type = &cxl_memdev_type; + device_set_pm_not_required(dev); cdev = &cxlmd->cdev; cdev_init(cdev, &cxl_memdev_fops); From 392be0bda730df3c71241b2a16bbecac78ee627d Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 24 Mar 2021 15:16:35 +0100 Subject: [PATCH 039/182] cxl/mem: Force array size of mem_commands[] to CXL_MEM_COMMAND_ID_MAX Typically the mem_commands[] array is in sync with 'enum { CXL_CMDS }'. Current code works well. However, the array size of mem_commands[] may not strictly be the same as CXL_MEM_COMMAND_ID_MAX. E.g. if a new CXL_CMD() is added that is guarded by #ifdefs, the array could be shorter. This could lead then further to an out-of-bounds array access in cxl_validate_cmd_from_user(). Fix this by forcing the array size to CXL_MEM_COMMAND_ID_MAX. This also adds range checks for array items in mem_commands[] at compile time. Signed-off-by: Robert Richter Link: https://lore.kernel.org/r/20210324141635.22335-1-rrichter@amd.com Signed-off-by: Dan Williams --- drivers/cxl/mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 52f7da49c560..e3003f49b329 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -166,7 +166,7 @@ struct cxl_mem_command { * table will be validated against the user's input. For example, if size_in is * 0, and the user passed in 1, it is an error. */ -static struct cxl_mem_command mem_commands[] = { +static struct cxl_mem_command mem_commands[CXL_MEM_COMMAND_ID_MAX] = { CXL_CMD(IDENTIFY, 0, 0x43, CXL_CMD_FLAG_FORCE_ENABLE), #ifdef CONFIG_CXL_MEM_RAW_COMMANDS CXL_CMD(RAW, ~0, ~0, 0), From fbb9e86636ba8aa4d890091ab06cc7b2f4751322 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Mon, 4 Jan 2021 16:19:17 +0800 Subject: [PATCH 040/182] arm64: dts: allwinner: h6: Switch to macros for RSB clock/reset indices The macros for the clock and reset indices for the RSB hardware block were replaced with raw numbers when the RSB controller node was added. This was done to avoid cross-tree dependencies. Now that both the clk and DT changes have been merged, we can switch back to using the macros. Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard --- arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi index 49e979794094..af8b7d0ef750 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi @@ -995,9 +995,9 @@ r_rsb: rsb@7083000 { compatible = "allwinner,sun8i-a23-rsb"; reg = <0x07083000 0x400>; interrupts = ; - clocks = <&r_ccu 13>; + clocks = <&r_ccu CLK_R_APB2_RSB>; clock-frequency = <3000000>; - resets = <&r_ccu 7>; + resets = <&r_ccu RST_R_APB2_RSB>; pinctrl-names = "default"; pinctrl-0 = <&r_rsb_pins>; status = "disabled"; From 3dd4ce4185df6798dcdcc3669bddb35899d7d5e1 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 16 Mar 2021 14:42:19 +0000 Subject: [PATCH 041/182] arm64: dts: allwinner: Fix SD card CD GPIO for SOPine systems Commit 941432d00768 ("arm64: dts: allwinner: Drop non-removable from SoPine/LTS SD card") enabled the card detect GPIO for the SOPine module, along the way with the Pine64-LTS, which share the same base .dtsi. However while both boards indeed have a working CD GPIO on PF6, the polarity is different: the SOPine modules uses a "push-pull" socket, which has an active-high switch, while the Pine64-LTS use the more traditional push-push socket and the common active-low switch. Fix the polarity in the sopine.dtsi, and overwrite it in the LTS board .dts, to make the SD card work again on systems using SOPine modules. Fixes: 941432d00768 ("arm64: dts: allwinner: Drop non-removable from SoPine/LTS SD card") Reported-by: Ashley Signed-off-by: Andre Przywara Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20210316144219.5973-1-andre.przywara@arm.com --- arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-lts.dts | 4 ++++ arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-lts.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-lts.dts index 437ffe3628a5..e79ce49e7e6a 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-lts.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-lts.dts @@ -19,3 +19,7 @@ led { }; }; }; + +&mmc0 { + cd-gpios = <&pio 5 6 GPIO_ACTIVE_LOW>; /* PF6 push-push switch */ +}; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi index 3402cec87035..df62044ff7a7 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi @@ -34,7 +34,7 @@ &mmc0 { vmmc-supply = <®_dcdc1>; disable-wp; bus-width = <4>; - cd-gpios = <&pio 5 6 GPIO_ACTIVE_LOW>; /* PF6 */ + cd-gpios = <&pio 5 6 GPIO_ACTIVE_HIGH>; /* PF6 push-pull switch */ status = "okay"; }; From c89f3af3b5ba4d8b232c7b397e54dd228c49f6b1 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 31 Mar 2021 15:08:29 +0200 Subject: [PATCH 042/182] MAINTAINERS: Add our new mailing-list We've been struggling to get an LF-hosted mailing list for a while, but now that lists.linux.dev is there we opted in. Let's add it to MAINTAINERS. Signed-off-by: Maxime Ripard Acked-by: Chen-Yu Tsai Acked-by: Jernej Skrabec Link: https://lore.kernel.org/r/20210331130830.64182-1-maxime@cerno.tech --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index d92f85ca831d..d8c00df4045c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1576,6 +1576,7 @@ R: Jernej Skrabec L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/sunxi/linux.git +L: linux-sunxi@lists.linux.dev F: arch/arm/mach-sunxi/ F: arch/arm64/boot/dts/allwinner/ F: drivers/clk/sunxi-ng/ From 2a996ecd142df9ff2369241fd62ab7778ef07874 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 31 Mar 2021 15:08:30 +0200 Subject: [PATCH 043/182] MAINTAINERS: Match on allwinner keyword Some drivers (phy, crypto, net) folders don't have sunxi in it but allwinner. Add that keyword to match on it too. Signed-off-by: Maxime Ripard Acked-by: Chen-Yu Tsai Acked-by: Jernej Skrabec Link: https://lore.kernel.org/r/20210331130830.64182-2-maxime@cerno.tech --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index d8c00df4045c..5e808daae799 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1582,6 +1582,7 @@ F: arch/arm64/boot/dts/allwinner/ F: drivers/clk/sunxi-ng/ F: drivers/pinctrl/sunxi/ F: drivers/soc/sunxi/ +N: allwinner N: sun[x456789]i N: sun50i From 7a2f6e69e9c1060a7a09c1f8322ccb8d942b3078 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Tue, 30 Mar 2021 20:42:18 +0200 Subject: [PATCH 044/182] arm64: dts: allwinner: h6: beelink-gs1: Remove ext. 32 kHz osc reference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Although every Beelink GS1 seems to have external 32768 Hz oscillator, it works only on one from four tested. There are more reports of RTC issues elsewhere, like Armbian forum. One Beelink GS1 owner read RTC osc status register on Android which shipped with the box. Reported value indicated problems with external oscillator. In order to fix RTC and related issues (HDMI-CEC and suspend/resume with Crust) on all boards, switch to internal oscillator. Fixes: 32507b868119 ("arm64: dts: allwinner: h6: Move ext. oscillator to board DTs") Signed-off-by: Jernej Skrabec Tested-by: Clément Péron Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20210330184218.279738-1-jernej.skrabec@siol.net --- arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts index 4f4755152fce..b5808047d6e4 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts @@ -289,10 +289,6 @@ &r_pio { vcc-pm-supply = <®_aldo1>; }; -&rtc { - clocks = <&ext_osc32k>; -}; - &spdif { status = "okay"; }; From 0dcf8febcb7b9d42bec98bc068e01d1a6ea578b8 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 6 Apr 2021 12:17:46 -0500 Subject: [PATCH 045/182] scsi: iscsi: Fix iSCSI cls conn state In commit 9e67600ed6b8 ("scsi: iscsi: Fix race condition between login and sync thread") I missed that libiscsi was now setting the iSCSI class state, and that patch ended up resetting the state during conn stoppage and using the wrong state value during ep_disconnect. This patch moves the setting of the class state to the class module and then fixes the two issues above. Link: https://lore.kernel.org/r/20210406171746.5016-1-michael.christie@oracle.com Fixes: 9e67600ed6b8 ("scsi: iscsi: Fix race condition between login and sync thread") Cc: Gulam Mohamed Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/libiscsi.c | 26 +++----------------------- drivers/scsi/scsi_transport_iscsi.c | 20 ++++++++++++++++---- 2 files changed, 19 insertions(+), 27 deletions(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 7ad11e42306d..bfd2aaa9b66b 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -3179,9 +3179,10 @@ fail_mgmt_tasks(struct iscsi_session *session, struct iscsi_conn *conn) } } -static void iscsi_start_session_recovery(struct iscsi_session *session, - struct iscsi_conn *conn, int flag) +void iscsi_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) { + struct iscsi_conn *conn = cls_conn->dd_data; + struct iscsi_session *session = conn->session; int old_stop_stage; mutex_lock(&session->eh_mutex); @@ -3239,27 +3240,6 @@ static void iscsi_start_session_recovery(struct iscsi_session *session, spin_unlock_bh(&session->frwd_lock); mutex_unlock(&session->eh_mutex); } - -void iscsi_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) -{ - struct iscsi_conn *conn = cls_conn->dd_data; - struct iscsi_session *session = conn->session; - - switch (flag) { - case STOP_CONN_RECOVER: - cls_conn->state = ISCSI_CONN_FAILED; - break; - case STOP_CONN_TERM: - cls_conn->state = ISCSI_CONN_DOWN; - break; - default: - iscsi_conn_printk(KERN_ERR, conn, - "invalid stop flag %d\n", flag); - return; - } - - iscsi_start_session_recovery(session, conn, flag); -} EXPORT_SYMBOL_GPL(iscsi_conn_stop); int iscsi_conn_bind(struct iscsi_cls_session *cls_session, diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index bebfb355abdf..21a2d997a72e 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2470,10 +2470,22 @@ static void iscsi_if_stop_conn(struct iscsi_cls_conn *conn, int flag) * it works. */ mutex_lock(&conn_mutex); - conn->transport->stop_conn(conn, flag); - conn->state = ISCSI_CONN_DOWN; - mutex_unlock(&conn_mutex); + switch (flag) { + case STOP_CONN_RECOVER: + conn->state = ISCSI_CONN_FAILED; + break; + case STOP_CONN_TERM: + conn->state = ISCSI_CONN_DOWN; + break; + default: + iscsi_cls_conn_printk(KERN_ERR, conn, + "invalid stop flag %d\n", flag); + goto unlock; + } + conn->transport->stop_conn(conn, flag); +unlock: + mutex_unlock(&conn_mutex); } static void stop_conn_work_fn(struct work_struct *work) @@ -2961,7 +2973,7 @@ static int iscsi_if_ep_disconnect(struct iscsi_transport *transport, mutex_lock(&conn->ep_mutex); conn->ep = NULL; mutex_unlock(&conn->ep_mutex); - conn->state = ISCSI_CONN_DOWN; + conn->state = ISCSI_CONN_FAILED; } transport->ep_disconnect(ep); From 176ddd89171ddcf661862d90c5d257877f7326d6 Mon Sep 17 00:00:00 2001 From: Jolly Shah Date: Thu, 18 Mar 2021 15:56:32 -0700 Subject: [PATCH 046/182] scsi: libsas: Reset num_scatter if libata marks qc as NODATA When the cache_type for the SCSI device is changed, the SCSI layer issues a MODE_SELECT command. The caching mode details are communicated via a request buffer associated with the SCSI command with data direction set as DMA_TO_DEVICE (scsi_mode_select()). When this command reaches the libata layer, as a part of generic initial setup, libata layer sets up the scatterlist for the command using the SCSI command (ata_scsi_qc_new()). This command is then translated by the libata layer into ATA_CMD_SET_FEATURES (ata_scsi_mode_select_xlat()). The libata layer treats this as a non-data command (ata_mselect_caching()), since it only needs an ATA taskfile to pass the caching on/off information to the device. It does not need the scatterlist that has been setup, so it does not perform dma_map_sg() on the scatterlist (ata_qc_issue()). Unfortunately, when this command reaches the libsas layer (sas_ata_qc_issue()), libsas layer sees it as a non-data command with a scatterlist. It cannot extract the correct DMA length since the scatterlist has not been mapped with dma_map_sg() for a DMA operation. When this partially constructed SAS task reaches pm80xx LLDD, it results in the following warning: "pm80xx_chip_sata_req 6058: The sg list address start_addr=0x0000000000000000 data_len=0x0end_addr_high=0xffffffff end_addr_low=0xffffffff has crossed 4G boundary" Update libsas to handle ATA non-data commands separately so num_scatter and total_xfer_len remain 0. Link: https://lore.kernel.org/r/20210318225632.2481291-1-jollys@google.com Fixes: 53de092f47ff ("scsi: libsas: Set data_dir as DMA_NONE if libata marks qc as NODATA") Tested-by: Luo Jiaxing Reviewed-by: John Garry Signed-off-by: Jolly Shah Signed-off-by: Martin K. Petersen --- drivers/scsi/libsas/sas_ata.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index 024e5a550759..8b9a39077dba 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -201,18 +201,17 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) memcpy(task->ata_task.atapi_packet, qc->cdb, qc->dev->cdb_len); task->total_xfer_len = qc->nbytes; task->num_scatter = qc->n_elem; + task->data_dir = qc->dma_dir; + } else if (qc->tf.protocol == ATA_PROT_NODATA) { + task->data_dir = DMA_NONE; } else { for_each_sg(qc->sg, sg, qc->n_elem, si) xfer += sg_dma_len(sg); task->total_xfer_len = xfer; task->num_scatter = si; - } - - if (qc->tf.protocol == ATA_PROT_NODATA) - task->data_dir = DMA_NONE; - else task->data_dir = qc->dma_dir; + } task->scatter = qc->sg; task->ata_task.retry_count = 1; task->task_state_flags = SAS_TASK_STATE_PENDING; From afd0be7299533bb2e2b09104399d8a467ecbd2c5 Mon Sep 17 00:00:00 2001 From: Ciara Loftus Date: Thu, 8 Apr 2021 05:20:09 +0000 Subject: [PATCH 047/182] libbpf: Fix potential NULL pointer dereference Wait until after the UMEM is checked for null to dereference it. Fixes: 43f1bc1efff1 ("libbpf: Restore umem state after socket create failure") Signed-off-by: Ciara Loftus Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210408052009.7844-1-ciara.loftus@intel.com --- tools/lib/bpf/xsk.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index d24b5cc720ec..007fe5d59438 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -852,18 +852,19 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, struct xsk_ring_cons *comp, const struct xsk_socket_config *usr_config) { + bool unmap, rx_setup_done = false, tx_setup_done = false; void *rx_map = NULL, *tx_map = NULL; struct sockaddr_xdp sxdp = {}; struct xdp_mmap_offsets off; struct xsk_socket *xsk; struct xsk_ctx *ctx; int err, ifindex; - bool unmap = umem->fill_save != fill; - bool rx_setup_done = false, tx_setup_done = false; if (!umem || !xsk_ptr || !(rx || tx)) return -EFAULT; + unmap = umem->fill_save != fill; + xsk = calloc(1, sizeof(*xsk)); if (!xsk) return -ENOMEM; From 2361db89aaadfb671db6911b0063e01ec8922c28 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 9 Mar 2021 17:43:38 -0800 Subject: [PATCH 048/182] libnvdimm: Notify disk drivers to revalidate region read-only Previous kernels allowed the BLKROSET to override the disk's read-only status. With that situation fixed the pmem driver needs to rely on notification events to reevaluate the disk read-only status after the host region has been marked read-write. Recall that when libnvdimm determines that the persistent memory has lost persistence (for example lack of energy to flush from DRAM to FLASH on an NVDIMM-N device) it marks the region read-only, but that state can be overridden by the user via: echo 0 > /sys/bus/nd/devices/regionX/read_only ...to date there is no notification that the region has restored persistence, so the user override is the only recovery. Fixes: 52f019d43c22 ("block: add a hard-readonly flag to struct gendisk") Reported-by: kernel test robot Reported-by: Vishal Verma Tested-by: Vishal Verma Reviewed-by: Christoph Hellwig Cc: Christoph Hellwig Cc: Ming Lei Cc: Martin K. Petersen Cc: Hannes Reinecke Cc: Jens Axboe Link: https://lore.kernel.org/r/161534060720.528671.2341213328968989192.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/nvdimm/bus.c | 14 ++++++-------- drivers/nvdimm/pmem.c | 37 ++++++++++++++++++++++++++++++++---- drivers/nvdimm/region_devs.c | 7 +++++++ include/linux/nd.h | 1 + 4 files changed, 47 insertions(+), 12 deletions(-) diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 48f0985ca8a0..3a777d0073b7 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -631,16 +631,14 @@ void nvdimm_check_and_set_ro(struct gendisk *disk) struct nd_region *nd_region = to_nd_region(dev->parent); int disk_ro = get_disk_ro(disk); - /* - * Upgrade to read-only if the region is read-only preserve as - * read-only if the disk is already read-only. - */ - if (disk_ro || nd_region->ro == disk_ro) + /* catch the disk up with the region ro state */ + if (disk_ro == nd_region->ro) return; - dev_info(dev, "%s read-only, marking %s read-only\n", - dev_name(&nd_region->dev), disk->disk_name); - set_disk_ro(disk, 1); + dev_info(dev, "%s read-%s, marking %s read-%s\n", + dev_name(&nd_region->dev), nd_region->ro ? "only" : "write", + disk->disk_name, nd_region->ro ? "only" : "write"); + set_disk_ro(disk, nd_region->ro); } EXPORT_SYMBOL(nvdimm_check_and_set_ro); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index b8a85bfb2e95..7daac795db39 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -26,6 +26,7 @@ #include #include #include "pmem.h" +#include "btt.h" #include "pfn.h" #include "nd.h" @@ -585,7 +586,7 @@ static void nd_pmem_shutdown(struct device *dev) nvdimm_flush(to_nd_region(dev->parent), NULL); } -static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) +static void pmem_revalidate_poison(struct device *dev) { struct nd_region *nd_region; resource_size_t offset = 0, end_trunc = 0; @@ -595,9 +596,6 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) struct range range; struct kernfs_node *bb_state; - if (event != NVDIMM_REVALIDATE_POISON) - return; - if (is_nd_btt(dev)) { struct nd_btt *nd_btt = to_nd_btt(dev); @@ -635,6 +633,37 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) sysfs_notify_dirent(bb_state); } +static void pmem_revalidate_region(struct device *dev) +{ + struct pmem_device *pmem; + + if (is_nd_btt(dev)) { + struct nd_btt *nd_btt = to_nd_btt(dev); + struct btt *btt = nd_btt->btt; + + nvdimm_check_and_set_ro(btt->btt_disk); + return; + } + + pmem = dev_get_drvdata(dev); + nvdimm_check_and_set_ro(pmem->disk); +} + +static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) +{ + switch (event) { + case NVDIMM_REVALIDATE_POISON: + pmem_revalidate_poison(dev); + break; + case NVDIMM_REVALIDATE_REGION: + pmem_revalidate_region(dev); + break; + default: + dev_WARN_ONCE(dev, 1, "notify: unknown event: %d\n", event); + break; + } +} + MODULE_ALIAS("pmem"); MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_IO); MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_PMEM); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index ef23119db574..51870eb51da6 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -518,6 +518,12 @@ static ssize_t read_only_show(struct device *dev, return sprintf(buf, "%d\n", nd_region->ro); } +static int revalidate_read_only(struct device *dev, void *data) +{ + nd_device_notify(dev, NVDIMM_REVALIDATE_REGION); + return 0; +} + static ssize_t read_only_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { @@ -529,6 +535,7 @@ static ssize_t read_only_store(struct device *dev, return rc; nd_region->ro = ro; + device_for_each_child(dev, NULL, revalidate_read_only); return len; } static DEVICE_ATTR_RW(read_only); diff --git a/include/linux/nd.h b/include/linux/nd.h index cec526c8043d..ee9ad76afbba 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -11,6 +11,7 @@ enum nvdimm_event { NVDIMM_REVALIDATE_POISON, + NVDIMM_REVALIDATE_REGION, }; enum nvdimm_claim_class { From a2948b17f6b936fc52f86c0f92c46d2f91928b79 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Fri, 2 Apr 2021 14:55:55 +0530 Subject: [PATCH 049/182] libnvdimm/region: Fix nvdimm_has_flush() to handle ND_REGION_ASYNC In case a platform doesn't provide explicit flush-hints but provides an explicit flush callback via ND_REGION_ASYNC region flag, then nvdimm_has_flush() still returns '0' indicating that writes do not require flushing. This happens on PPC64 with patch at [1] applied, where 'deep_flush' of a region was denied even though an explicit flush function was provided. Fix this by adding a condition to nvdimm_has_flush() to test for the ND_REGION_ASYNC flag on the region and see if a 'region->flush' callback is assigned. Link: http://lore.kernel.org/r/161703936121.36.7260632399582101498.stgit@e1fbed493c87 [1] Fixes: c5d4355d10d4 ("libnvdimm: nd_region flush callback support") Reported-by: Shivaprasad G Bhat Signed-off-by: Vaibhav Jain Link: https://lore.kernel.org/r/20210402092555.208590-1-vaibhav@linux.ibm.com Signed-off-by: Dan Williams --- drivers/nvdimm/region_devs.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 51870eb51da6..9ccf3d608799 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -1246,6 +1246,11 @@ int nvdimm_has_flush(struct nd_region *nd_region) || !IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API)) return -ENXIO; + /* Test if an explicit flush function is defined */ + if (test_bit(ND_REGION_ASYNC, &nd_region->flags) && nd_region->flush) + return 1; + + /* Test if any flush hints for the region are available */ for (i = 0; i < nd_region->ndr_mappings; i++) { struct nd_mapping *nd_mapping = &nd_region->mapping[i]; struct nvdimm *nvdimm = nd_mapping->nvdimm; @@ -1256,8 +1261,8 @@ int nvdimm_has_flush(struct nd_region *nd_region) } /* - * The platform defines dimm devices without hints, assume - * platform persistence mechanism like ADR + * The platform defines dimm devices without hints nor explicit flush, + * assume platform persistence mechanism like ADR */ return 0; } From b895bdf5d643b6feb7c60856326dd4feb6981560 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 9 Apr 2021 08:49:39 -0700 Subject: [PATCH 050/182] netfilter: nft_limit: avoid possible divide error in nft_limit_init div_u64() divides u64 by u32. nft_limit_init() wants to divide u64 by u64, use the appropriate math function (div64_u64) divide error: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 8390 Comm: syz-executor188 Not tainted 5.12.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:div_u64_rem include/linux/math64.h:28 [inline] RIP: 0010:div_u64 include/linux/math64.h:127 [inline] RIP: 0010:nft_limit_init+0x2a2/0x5e0 net/netfilter/nft_limit.c:85 Code: ef 4c 01 eb 41 0f 92 c7 48 89 de e8 38 a5 22 fa 4d 85 ff 0f 85 97 02 00 00 e8 ea 9e 22 fa 4c 0f af f3 45 89 ed 31 d2 4c 89 f0 <49> f7 f5 49 89 c6 e8 d3 9e 22 fa 48 8d 7d 48 48 b8 00 00 00 00 00 RSP: 0018:ffffc90009447198 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000200000000000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffff875152e6 RDI: 0000000000000003 RBP: ffff888020f80908 R08: 0000200000000000 R09: 0000000000000000 R10: ffffffff875152d8 R11: 0000000000000000 R12: ffffc90009447270 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 FS: 000000000097a300(0000) GS:ffff8880b9d00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000200001c4 CR3: 0000000026a52000 CR4: 00000000001506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: nf_tables_newexpr net/netfilter/nf_tables_api.c:2675 [inline] nft_expr_init+0x145/0x2d0 net/netfilter/nf_tables_api.c:2713 nft_set_elem_expr_alloc+0x27/0x280 net/netfilter/nf_tables_api.c:5160 nf_tables_newset+0x1997/0x3150 net/netfilter/nf_tables_api.c:4321 nfnetlink_rcv_batch+0x85a/0x21b0 net/netfilter/nfnetlink.c:456 nfnetlink_rcv_skb_batch net/netfilter/nfnetlink.c:580 [inline] nfnetlink_rcv+0x3af/0x420 net/netfilter/nfnetlink.c:598 netlink_unicast_kernel net/netlink/af_netlink.c:1312 [inline] netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1338 netlink_sendmsg+0x856/0xd90 net/netlink/af_netlink.c:1927 sock_sendmsg_nosec net/socket.c:654 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:674 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2350 ___sys_sendmsg+0xf3/0x170 net/socket.c:2404 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2433 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes: c26844eda9d4 ("netfilter: nf_tables: Fix nft limit burst handling") Fixes: 3e0f64b7dd31 ("netfilter: nft_limit: fix packet ratelimiting") Signed-off-by: Eric Dumazet Diagnosed-by: Luigi Rizzo Reported-by: syzbot Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_limit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index 0e2c315c3b5e..82ec27bdf941 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -76,13 +76,13 @@ static int nft_limit_init(struct nft_limit *limit, return -EOVERFLOW; if (pkts) { - tokens = div_u64(limit->nsecs, limit->rate) * limit->burst; + tokens = div64_u64(limit->nsecs, limit->rate) * limit->burst; } else { /* The token bucket size limits the number of tokens can be * accumulated. tokens_max specifies the bucket size. * tokens_max = unit * (rate + burst) / rate. */ - tokens = div_u64(limit->nsecs * (limit->rate + limit->burst), + tokens = div64_u64(limit->nsecs * (limit->rate + limit->burst), limit->rate); } From 7ee3c61dcd28bf6e290e06ad382f13511dc790e9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 7 Apr 2021 21:43:39 +0200 Subject: [PATCH 051/182] netfilter: bridge: add pre_exit hooks for ebtable unregistration Just like ip/ip6/arptables, the hooks have to be removed, then synchronize_rcu() has to be called to make sure no more packets are being processed before the ruleset data is released. Place the hook unregistration in the pre_exit hook, then call the new ebtables pre_exit function from there. Years ago, when first netns support got added for netfilter+ebtables, this used an older (now removed) netfilter hook unregister API, that did a unconditional synchronize_rcu(). Now that all is done with call_rcu, ebtable_{filter,nat,broute} pernet exit handlers may free the ebtable ruleset while packets are still in flight. This can only happens on module removal, not during netns exit. The new function expects the table name, not the table struct. This is because upcoming patch set (targeting -next) will remove all net->xt.{nat,filter,broute}_table instances, this makes it necessary to avoid external references to those member variables. The existing APIs will be converted, so follow the upcoming scheme of passing name + hook type instead. Fixes: aee12a0a3727e ("ebtables: remove nf_hook_register usage") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge/ebtables.h | 5 ++-- net/bridge/netfilter/ebtable_broute.c | 8 +++++- net/bridge/netfilter/ebtable_filter.c | 8 +++++- net/bridge/netfilter/ebtable_nat.c | 8 +++++- net/bridge/netfilter/ebtables.c | 30 ++++++++++++++++++++--- 5 files changed, 51 insertions(+), 8 deletions(-) diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 2f5c4e6ecd8a..3a956145a25c 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -110,8 +110,9 @@ extern int ebt_register_table(struct net *net, const struct ebt_table *table, const struct nf_hook_ops *ops, struct ebt_table **res); -extern void ebt_unregister_table(struct net *net, struct ebt_table *table, - const struct nf_hook_ops *); +extern void ebt_unregister_table(struct net *net, struct ebt_table *table); +void ebt_unregister_table_pre_exit(struct net *net, const char *tablename, + const struct nf_hook_ops *ops); extern unsigned int ebt_do_table(struct sk_buff *skb, const struct nf_hook_state *state, struct ebt_table *table); diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 66e7af165494..32bc2821027f 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -105,14 +105,20 @@ static int __net_init broute_net_init(struct net *net) &net->xt.broute_table); } +static void __net_exit broute_net_pre_exit(struct net *net) +{ + ebt_unregister_table_pre_exit(net, "broute", &ebt_ops_broute); +} + static void __net_exit broute_net_exit(struct net *net) { - ebt_unregister_table(net, net->xt.broute_table, &ebt_ops_broute); + ebt_unregister_table(net, net->xt.broute_table); } static struct pernet_operations broute_net_ops = { .init = broute_net_init, .exit = broute_net_exit, + .pre_exit = broute_net_pre_exit, }; static int __init ebtable_broute_init(void) diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 78cb9b21022d..bcf982e12f16 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -99,14 +99,20 @@ static int __net_init frame_filter_net_init(struct net *net) &net->xt.frame_filter); } +static void __net_exit frame_filter_net_pre_exit(struct net *net) +{ + ebt_unregister_table_pre_exit(net, "filter", ebt_ops_filter); +} + static void __net_exit frame_filter_net_exit(struct net *net) { - ebt_unregister_table(net, net->xt.frame_filter, ebt_ops_filter); + ebt_unregister_table(net, net->xt.frame_filter); } static struct pernet_operations frame_filter_net_ops = { .init = frame_filter_net_init, .exit = frame_filter_net_exit, + .pre_exit = frame_filter_net_pre_exit, }; static int __init ebtable_filter_init(void) diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 0888936ef853..0d092773f816 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -99,14 +99,20 @@ static int __net_init frame_nat_net_init(struct net *net) &net->xt.frame_nat); } +static void __net_exit frame_nat_net_pre_exit(struct net *net) +{ + ebt_unregister_table_pre_exit(net, "nat", ebt_ops_nat); +} + static void __net_exit frame_nat_net_exit(struct net *net) { - ebt_unregister_table(net, net->xt.frame_nat, ebt_ops_nat); + ebt_unregister_table(net, net->xt.frame_nat); } static struct pernet_operations frame_nat_net_ops = { .init = frame_nat_net_init, .exit = frame_nat_net_exit, + .pre_exit = frame_nat_net_pre_exit, }; static int __init ebtable_nat_init(void) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index ebe33b60efd6..d481ff24a150 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1232,10 +1232,34 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table, return ret; } -void ebt_unregister_table(struct net *net, struct ebt_table *table, - const struct nf_hook_ops *ops) +static struct ebt_table *__ebt_find_table(struct net *net, const char *name) +{ + struct ebt_table *t; + + mutex_lock(&ebt_mutex); + + list_for_each_entry(t, &net->xt.tables[NFPROTO_BRIDGE], list) { + if (strcmp(t->name, name) == 0) { + mutex_unlock(&ebt_mutex); + return t; + } + } + + mutex_unlock(&ebt_mutex); + return NULL; +} + +void ebt_unregister_table_pre_exit(struct net *net, const char *name, const struct nf_hook_ops *ops) +{ + struct ebt_table *table = __ebt_find_table(net, name); + + if (table) + nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); +} +EXPORT_SYMBOL(ebt_unregister_table_pre_exit); + +void ebt_unregister_table(struct net *net, struct ebt_table *table) { - nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); __ebt_unregister_table(net, table); } From d163a925ebbc6eb5b562b0f1d72c7e817aa75c40 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 7 Apr 2021 21:43:40 +0200 Subject: [PATCH 052/182] netfilter: arp_tables: add pre_exit hook for table unregister Same problem that also existed in iptables/ip(6)tables, when arptable_filter is removed there is no longer a wait period before the table/ruleset is free'd. Unregister the hook in pre_exit, then remove the table in the exit function. This used to work correctly because the old nf_hook_unregister API did unconditional synchronize_net. The per-net hook unregister function uses call_rcu instead. Fixes: b9e69e127397 ("netfilter: xtables: don't hook tables by default") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_arp/arp_tables.h | 5 +++-- net/ipv4/netfilter/arp_tables.c | 9 +++++++-- net/ipv4/netfilter/arptable_filter.c | 10 +++++++++- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index 7d3537c40ec9..26a13294318c 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -52,8 +52,9 @@ extern void *arpt_alloc_initial_table(const struct xt_table *); int arpt_register_table(struct net *net, const struct xt_table *table, const struct arpt_replace *repl, const struct nf_hook_ops *ops, struct xt_table **res); -void arpt_unregister_table(struct net *net, struct xt_table *table, - const struct nf_hook_ops *ops); +void arpt_unregister_table(struct net *net, struct xt_table *table); +void arpt_unregister_table_pre_exit(struct net *net, struct xt_table *table, + const struct nf_hook_ops *ops); extern unsigned int arpt_do_table(struct sk_buff *skb, const struct nf_hook_state *state, struct xt_table *table); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index d1e04d2b5170..6c26533480dd 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1539,10 +1539,15 @@ int arpt_register_table(struct net *net, return ret; } -void arpt_unregister_table(struct net *net, struct xt_table *table, - const struct nf_hook_ops *ops) +void arpt_unregister_table_pre_exit(struct net *net, struct xt_table *table, + const struct nf_hook_ops *ops) { nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); +} +EXPORT_SYMBOL(arpt_unregister_table_pre_exit); + +void arpt_unregister_table(struct net *net, struct xt_table *table) +{ __arpt_unregister_table(net, table); } diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index c216b9ad3bb2..6c300ba5634e 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -56,16 +56,24 @@ static int __net_init arptable_filter_table_init(struct net *net) return err; } +static void __net_exit arptable_filter_net_pre_exit(struct net *net) +{ + if (net->ipv4.arptable_filter) + arpt_unregister_table_pre_exit(net, net->ipv4.arptable_filter, + arpfilter_ops); +} + static void __net_exit arptable_filter_net_exit(struct net *net) { if (!net->ipv4.arptable_filter) return; - arpt_unregister_table(net, net->ipv4.arptable_filter, arpfilter_ops); + arpt_unregister_table(net, net->ipv4.arptable_filter); net->ipv4.arptable_filter = NULL; } static struct pernet_operations arptable_filter_net_ops = { .exit = arptable_filter_net_exit, + .pre_exit = arptable_filter_net_pre_exit, }; static int __init arptable_filter_init(void) From 4af2178ac605faf32ebe638f7ac17d841d40ea9b Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Fri, 9 Apr 2021 17:11:45 +0800 Subject: [PATCH 053/182] MAINTAINERS: update maintainer entry for freescale fec driver Update maintainer entry for freescale fec driver. Suggested-by: Heiner Kallweit Signed-off-by: Joakim Zhang Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index ccd9228350cf..163264c282eb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7089,7 +7089,7 @@ S: Maintained F: drivers/i2c/busses/i2c-cpm.c FREESCALE IMX / MXC FEC DRIVER -M: Fugang Duan +M: Joakim Zhang L: netdev@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/net/fsl-fec.txt From 31457db3750c0b0ed229d836f2609fdb8a5b790e Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 11 Apr 2021 11:02:08 +0200 Subject: [PATCH 054/182] net: davicom: Fix regulator not turned off on failed probe When the probe fails, we must disable the regulator that was previously enabled. This patch is a follow-up to commit ac88c531a5b3 ("net: davicom: Fix regulator not turned off on failed probe") which missed one case. Fixes: 7994fe55a4a2 ("dm9000: Add regulator and reset support to dm9000") Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/ethernet/davicom/dm9000.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c index 252adfa5d837..8a9096aa85cd 100644 --- a/drivers/net/ethernet/davicom/dm9000.c +++ b/drivers/net/ethernet/davicom/dm9000.c @@ -1471,8 +1471,10 @@ dm9000_probe(struct platform_device *pdev) /* Init network device */ ndev = alloc_etherdev(sizeof(struct board_info)); - if (!ndev) - return -ENOMEM; + if (!ndev) { + ret = -ENOMEM; + goto out_regulator_disable; + } SET_NETDEV_DEV(ndev, &pdev->dev); From 6628ddfec7580882f11fdc5c194a8ea781fdadfa Mon Sep 17 00:00:00 2001 From: Phillip Potter Date: Sun, 11 Apr 2021 12:28:24 +0100 Subject: [PATCH 055/182] net: geneve: check skb is large enough for IPv4/IPv6 header Check within geneve_xmit_skb/geneve6_xmit_skb that sk_buff structure is large enough to include IPv4 or IPv6 header, and reject if not. The geneve_xmit_skb portion and overall idea was contributed by Eric Dumazet. Fixes a KMSAN-found uninit-value bug reported by syzbot at: https://syzkaller.appspot.com/bug?id=abe95dc3e3e9667fc23b8d81f29ecad95c6f106f Suggested-by: Eric Dumazet Reported-by: syzbot+2e406a9ac75bb71d4b7a@syzkaller.appspotmail.com Signed-off-by: Phillip Potter Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/geneve.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index d5b1e48e0c09..42f31c681846 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -891,6 +891,9 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; + if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) + return -EINVAL; + sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info, geneve->cfg.info.key.tp_dst, sport); @@ -985,6 +988,9 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; + if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) + return -EINVAL; + sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info, geneve->cfg.info.key.tp_dst, sport); From ea941ac294d75d0ace50797aebf0056f6f8f7a7f Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 16 Feb 2021 17:13:42 -0700 Subject: [PATCH 056/182] dmaengine: idxd: Fix clobbering of SWERR overflow bit on writeback Current code blindly writes over the SWERR and the OVERFLOW bits. Write back the bits actually read instead so the driver avoids clobbering the OVERFLOW bit that comes after the register is read. Fixes: bfe1d56091c1 ("dmaengine: idxd: Init and probe for Intel data accelerators") Reported-by: Sanjay Kumar Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/161352082229.3511254.1002151220537623503.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/irq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index a60ca11a5784..f1463fc58112 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -124,7 +124,9 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) for (i = 0; i < 4; i++) idxd->sw_err.bits[i] = ioread64(idxd->reg_base + IDXD_SWERR_OFFSET + i * sizeof(u64)); - iowrite64(IDXD_SWERR_ACK, idxd->reg_base + IDXD_SWERR_OFFSET); + + iowrite64(idxd->sw_err.bits[0] & IDXD_SWERR_ACK, + idxd->reg_base + IDXD_SWERR_OFFSET); if (idxd->sw_err.valid && idxd->sw_err.wq_idx_valid) { int id = idxd->sw_err.wq_idx; From 4ac823e9cd85f66da274c951d21bf9f6b714b729 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 22 Mar 2021 16:36:25 -0700 Subject: [PATCH 057/182] dmaengine: idxd: fix delta_rec and crc size field for completion record The delta_rec_size and crc_val in the completion record should be 32bits and not 16bits. Fixes: bfe1d56091c1 ("dmaengine: idxd: Init and probe for Intel data accelerators") Reported-by: Nikhil Rao Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/161645618572.2003490.14466173451736323035.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 236d437947bc..e33997b4d750 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -247,8 +247,8 @@ struct dsa_completion_record { uint32_t rsvd2:8; }; - uint16_t delta_rec_size; - uint16_t crc_val; + uint32_t delta_rec_size; + uint32_t crc_val; /* DIF check & strip */ struct { From ea6a5735d2a61b938a302eb3629272342a9e7c46 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 22 Mar 2021 16:37:29 -0700 Subject: [PATCH 058/182] dmaengine: idxd: fix opcap sysfs attribute output The operation capability register is 256bits. The current output only prints out the first 64bits. Fix to output the entire 256bits. The current code omits operation caps from IAX devices. Fixes: c52ca478233c ("dmaengine: idxd: add configuration component of driver") Reported-by: Lucas Van Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/161645624963.2003736.829798666998490151.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/sysfs.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 4dbb03c545e4..c27ca01cf8b2 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1449,8 +1449,14 @@ static ssize_t op_cap_show(struct device *dev, { struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); + int i, rc = 0; - return sprintf(buf, "%#llx\n", idxd->hw.opcap.bits[0]); + for (i = 0; i < 4; i++) + rc += sysfs_emit_at(buf, rc, "%#llx ", idxd->hw.opcap.bits[i]); + + rc--; + rc += sysfs_emit_at(buf, rc, "\n"); + return rc; } static DEVICE_ATTR_RO(op_cap); From 0fff71c5a311e1264988179f7dcc217fda15fadd Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 7 Apr 2021 12:59:47 -0700 Subject: [PATCH 059/182] dmaengine: idxd: fix wq size store permission state WQ size can only be changed when the device is disabled. Current code allows change when device is enabled but wq is disabled. Change the check to detect device state. Fixes: c52ca478233c ("dmaengine: idxd: add configuration component of driver") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/161782558755.107710.18138252584838406025.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index c27ca01cf8b2..5f7bc4b1621a 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -989,7 +989,7 @@ static ssize_t wq_size_store(struct device *dev, if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) return -EPERM; - if (wq->state != IDXD_WQ_DISABLED) + if (idxd->state == IDXD_DEV_ENABLED) return -EPERM; if (size + total_claimed_wq_size(idxd) - wq->size > idxd->max_wq_size) From 88cd1d6191b13689094310c2405394e4ce36d061 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 24 Mar 2021 16:17:57 +0200 Subject: [PATCH 060/182] dmaengine: dw: Make it dependent to HAS_IOMEM Some architectures do not provide devm_*() APIs. Hence make the driver dependent on HAVE_IOMEM. Fixes: dbde5c2934d1 ("dw_dmac: use devm_* functions to simplify code") Reported-by: kernel test robot Signed-off-by: Andy Shevchenko Acked-by: Viresh Kumar Link: https://lore.kernel.org/r/20210324141757.24710-1-andriy.shevchenko@linux.intel.com Signed-off-by: Vinod Koul --- drivers/dma/dw/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/dma/dw/Kconfig b/drivers/dma/dw/Kconfig index e5162690de8f..db25f9b7778c 100644 --- a/drivers/dma/dw/Kconfig +++ b/drivers/dma/dw/Kconfig @@ -10,6 +10,7 @@ config DW_DMAC_CORE config DW_DMAC tristate "Synopsys DesignWare AHB DMA platform driver" + depends on HAS_IOMEM select DW_DMAC_CORE help Support the Synopsys DesignWare AHB DMA controller. This @@ -18,6 +19,7 @@ config DW_DMAC config DW_DMAC_PCI tristate "Synopsys DesignWare AHB DMA PCI driver" depends on PCI + depends on HAS_IOMEM select DW_DMAC_CORE help Support the Synopsys DesignWare AHB DMA controller on the From ea45b6008f8095db0cc09ad6e03c7785c2986197 Mon Sep 17 00:00:00 2001 From: Lv Yunlong Date: Tue, 30 Mar 2021 18:44:58 -0700 Subject: [PATCH 061/182] dmaengine: Fix a double free in dma_async_device_register In the first list_for_each_entry() macro of dma_async_device_register, it gets the chan from list and calls __dma_async_device_channel_register (..,chan). We can see that chan->local is allocated by alloc_percpu() and it is freed chan->local by free_percpu(chan->local) when __dma_async_device_channel_register() failed. But after __dma_async_device_channel_register() failed, the caller will goto err_out and freed the chan->local in the second time by free_percpu(). The cause of this problem is forget to set chan->local to NULL when chan->local was freed in __dma_async_device_channel_register(). My patch sets chan->local to NULL when the callee failed to avoid double free. Fixes: d2fb0a0438384 ("dmaengine: break out channel registration") Signed-off-by: Lv Yunlong Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20210331014458.3944-1-lyl2019@mail.ustc.edu.cn Signed-off-by: Vinod Koul --- drivers/dma/dmaengine.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index fe6a460c4373..af3ee288bc11 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -1086,6 +1086,7 @@ static int __dma_async_device_channel_register(struct dma_device *device, kfree(chan->dev); err_free_local: free_percpu(chan->local); + chan->local = NULL; return rc; } From 917a3200b9f467a154999c7572af345f2470aaf4 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Fri, 9 Apr 2021 16:28:05 +0800 Subject: [PATCH 062/182] dmaengine: tegra20: Fix runtime PM imbalance on error pm_runtime_get_sync() will increase the runtime PM counter even it returns an error. Thus a pairing decrement is needed to prevent refcount leak. Fix this by replacing this API with pm_runtime_resume_and_get(), which will not change the runtime PM counter on error. Signed-off-by: Dinghao Liu Acked-by: Thierry Reding Link: https://lore.kernel.org/r/20210409082805.23643-1-dinghao.liu@zju.edu.cn Signed-off-by: Vinod Koul --- drivers/dma/tegra20-apb-dma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c index 71827d9b0aa1..b7260749e8ee 100644 --- a/drivers/dma/tegra20-apb-dma.c +++ b/drivers/dma/tegra20-apb-dma.c @@ -723,7 +723,7 @@ static void tegra_dma_issue_pending(struct dma_chan *dc) goto end; } if (!tdc->busy) { - err = pm_runtime_get_sync(tdc->tdma->dev); + err = pm_runtime_resume_and_get(tdc->tdma->dev); if (err < 0) { dev_err(tdc2dev(tdc), "Failed to enable DMA\n"); goto end; @@ -818,7 +818,7 @@ static void tegra_dma_synchronize(struct dma_chan *dc) struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc); int err; - err = pm_runtime_get_sync(tdc->tdma->dev); + err = pm_runtime_resume_and_get(tdc->tdma->dev); if (err < 0) { dev_err(tdc2dev(tdc), "Failed to synchronize DMA: %d\n", err); return; From 07503e6aefe4a6efd777062191944a14f03b3a18 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 23 Mar 2021 16:19:59 +0300 Subject: [PATCH 063/182] dmaengine: plx_dma: add a missing put_device() on error path Add a missing put_device(&pdev->dev) if the call to dma_async_device_register(dma); fails. Fixes: 905ca51e63be ("dmaengine: plx-dma: Introduce PLX DMA engine PCI driver skeleton") Signed-off-by: Dan Carpenter Reviewed-by: Logan Gunthorpe Link: https://lore.kernel.org/r/YFnq/0IQzixtAbC1@mwanda Signed-off-by: Vinod Koul --- drivers/dma/plx_dma.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/dma/plx_dma.c b/drivers/dma/plx_dma.c index f387c5bbc170..166934544161 100644 --- a/drivers/dma/plx_dma.c +++ b/drivers/dma/plx_dma.c @@ -507,10 +507,8 @@ static int plx_dma_create(struct pci_dev *pdev) rc = request_irq(pci_irq_vector(pdev, 0), plx_dma_isr, 0, KBUILD_MODNAME, plxdev); - if (rc) { - kfree(plxdev); - return rc; - } + if (rc) + goto free_plx; spin_lock_init(&plxdev->ring_lock); tasklet_setup(&plxdev->desc_task, plx_dma_desc_task); @@ -540,14 +538,20 @@ static int plx_dma_create(struct pci_dev *pdev) rc = dma_async_device_register(dma); if (rc) { pci_err(pdev, "Failed to register dma device: %d\n", rc); - free_irq(pci_irq_vector(pdev, 0), plxdev); - kfree(plxdev); - return rc; + goto put_device; } pci_set_drvdata(pdev, plxdev); return 0; + +put_device: + put_device(&pdev->dev); + free_irq(pci_irq_vector(pdev, 0), plxdev); +free_plx: + kfree(plxdev); + + return rc; } static int plx_dma_probe(struct pci_dev *pdev, From b74e409ea1b18128b877a50883d92a12eba83c33 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 9 Apr 2021 00:13:18 +0200 Subject: [PATCH 064/182] s390/entry: avoid setting up backchain in ext|io handlers Currently when interrupt arrives to cpu while in kernel context INT_HANDLER macro (used for ext_int_handler and io_int_handler) allocates new stack frame and pt_regs on the kernel stack and sets up the backchain to jump over the pt_regs to the frame which has been interrupted. This is not ideal to two reasons: 1. This hides the fact that kernel stack contains interrupt frame in it and hence breaks arch_stack_walk_reliable(), which needs to know that to guarantee "reliability" and checks that there are no pt_regs on the way. 2. It breaks the backchain unwinder logic, which assumes that the next stack frame after an interrupt frame is reliable, while it is not. In some cases (when r14 contains garbage) this leads to early unwinding termination with an error, instead of marking frame as unreliable and continuing. To address that, only set backchain to 0. Fixes: 56e62a737028 ("s390: convert to generic entry") Reviewed-by: Sven Schnelle Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/kernel/entry.S | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index c10b9f31eef7..235bf2ac3359 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -401,15 +401,13 @@ ENTRY(\name) brasl %r14,.Lcleanup_sie_int #endif 0: CHECK_STACK __LC_SAVE_AREA_ASYNC - lgr %r11,%r15 aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - stg %r11,__SF_BACKCHAIN(%r15) j 2f 1: BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP lctlg %c1,%c1,__LC_KERNEL_ASCE lg %r15,__LC_KERNEL_STACK - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) -2: la %r11,STACK_FRAME_OVERHEAD(%r15) +2: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) # clear user controlled registers to prevent speculative use xgr %r0,%r0 From a994eddb947ea9ebb7b14d9a1267001699f0a136 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 9 Apr 2021 00:15:21 +0200 Subject: [PATCH 065/182] s390/entry: save the caller of psw_idle Currently psw_idle does not allocate a stack frame and does not save its r14 and r15 into the save area. Even though this is valid from call ABI point of view, because psw_idle does not make any calls explicitly, in reality psw_idle is an entry point for controlled transition into serving interrupts. So, in practice, psw_idle stack frame is analyzed during stack unwinding. Depending on build options that r14 slot in the save area of psw_idle might either contain a value saved by previous sibling call or complete garbage. [task 0000038000003c28] do_ext_irq+0xd6/0x160 [task 0000038000003c78] ext_int_handler+0xba/0xe8 [task *0000038000003dd8] psw_idle_exit+0x0/0x8 <-- pt_regs ([task 0000038000003dd8] 0x0) [task 0000038000003e10] default_idle_call+0x42/0x148 [task 0000038000003e30] do_idle+0xce/0x160 [task 0000038000003e70] cpu_startup_entry+0x36/0x40 [task 0000038000003ea0] arch_call_rest_init+0x76/0x80 So, to make a stacktrace nicer and actually point for the real caller of psw_idle in this frequently occurring case, make psw_idle save its r14. [task 0000038000003c28] do_ext_irq+0xd6/0x160 [task 0000038000003c78] ext_int_handler+0xba/0xe8 [task *0000038000003dd8] psw_idle_exit+0x0/0x6 <-- pt_regs ([task 0000038000003dd8] arch_cpu_idle+0x3c/0xd0) [task 0000038000003e10] default_idle_call+0x42/0x148 [task 0000038000003e30] do_idle+0xce/0x160 [task 0000038000003e70] cpu_startup_entry+0x36/0x40 [task 0000038000003ea0] arch_call_rest_init+0x76/0x80 Reviewed-by: Sven Schnelle Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/kernel/entry.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 235bf2ac3359..12de7a9c85b3 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -443,6 +443,7 @@ INT_HANDLER io_int_handler,__LC_IO_OLD_PSW,do_io_irq * Load idle PSW. */ ENTRY(psw_idle) + stg %r14,(__SF_GPRS+8*8)(%r15) stg %r3,__SF_EMPTY(%r15) larl %r1,psw_idle_exit stg %r1,__SF_EMPTY+8(%r15) From 11664169981a025b7f6072d136ac724294b7b65c Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Thu, 18 Mar 2021 13:02:02 -0400 Subject: [PATCH 066/182] drm/i915/dpcd_bl: Don't try vesa interface unless specified by VBT Looks like that there actually are another subset of laptops on the market that don't support the Intel HDR backlight interface, but do advertise support for the VESA DPCD backlight interface despite the fact it doesn't seem to work. Note though I'm not entirely clear on this - on one of the machines where this issue was observed, I also noticed that we appeared to be rejecting the VBT defined backlight frequency in intel_dp_aux_vesa_calc_max_backlight(). It's noted in this function that: /* Use highest possible value of Pn for more granularity of brightness * adjustment while satifying the conditions below. * ... * - FxP is within 25% of desired value. * Note: 25% is arbitrary value and may need some tweak. */ So it's possible that this value might just need to be tweaked, but for now let's just disable the VESA backlight interface unless it's specified in the VBT just to be safe. We might be able to try enabling this again by default in the future. Fixes: 2227816e647a ("drm/i915/dp: Allow forcing specific interfaces through enable_dpcd_backlight") Cc: Jani Nikula Cc: Rodrigo Vivi Bugzilla: https://gitlab.freedesktop.org/drm/intel/-/issues/3169 Signed-off-by: Lyude Paul Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210318170204.513000-1-lyude@redhat.com (cherry picked from commit 9e2eb6d5380e9dadcd2baecb51f238e5eba94bee) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c index 651884390137..4f8337c7fd2e 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c @@ -646,7 +646,6 @@ int intel_dp_aux_init_backlight_funcs(struct intel_connector *connector) break; case INTEL_BACKLIGHT_DISPLAY_DDI: try_intel_interface = true; - try_vesa_interface = true; break; default: return -ENODEV; From bf52dc49ba0101f648b4c3ea26b812061406b0d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 27 Mar 2021 02:59:45 +0200 Subject: [PATCH 067/182] drm/i915: Don't zero out the Y plane's watermarks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't zero out the watermarks for the Y plane since we've already computed them when computing the UV plane's watermarks (since the UV plane always appears before ethe Y plane when iterating through the planes). This leads to allocating no DDB for the Y plane since .min_ddb_alloc also gets zeroed. And that of course leads to underruns when scanning out planar formats. Cc: stable@vger.kernel.org Cc: Stanislav Lisovskiy Fixes: dbf71381d733 ("drm/i915: Nuke intel_atomic_crtc_state_for_each_plane_state() from skl+ wm code") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210327005945.4929-1-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy (cherry picked from commit f99b805fb9413ff007ca0b6add871737664117dd) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_pm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 97b57acc02e2..4b4d8d034782 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5471,12 +5471,12 @@ static int icl_build_plane_wm(struct intel_crtc_state *crtc_state, struct skl_plane_wm *wm = &crtc_state->wm.skl.raw.planes[plane_id]; int ret; - memset(wm, 0, sizeof(*wm)); - /* Watermarks calculated in master */ if (plane_state->planar_slave) return 0; + memset(wm, 0, sizeof(*wm)); + if (plane_state->planar_linked_plane) { const struct drm_framebuffer *fb = plane_state->hw.fb; enum plane_id y_plane_id = plane_state->planar_linked_plane->id; From aee6f25e9c911323aa89a200e1bb160c1613ed3d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 25 Mar 2021 12:48:22 +0100 Subject: [PATCH 068/182] drm/i915/display/vlv_dsi: Do not skip panel_pwr_cycle_delay when disabling the panel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After the recently added commit fe0f1e3bfdfe ("drm/i915: Shut down displays gracefully on reboot"), the DSI panel on a Cherry Trail based Predia Basic tablet would no longer properly light up after reboot. I've managed to reproduce this without rebooting by doing: chvt 3; echo 1 > /sys/class/graphics/fb0/blank;\ echo 0 > /sys/class/graphics/fb0/blank Which rapidly turns the panel off and back on again. The vlv_dsi.c code uses an intel_dsi_msleep() helper for the various delays used for panel on/off, since starting with MIPI-sequences version >= 3 the delays are already included inside the MIPI-sequences. The problems exposed by the "Shut down displays gracefully on reboot" change, show that using this helper for the panel_pwr_cycle_delay is not the right thing to do. This has not been noticed until now because normally the panel never is cycled off and directly on again in quick succession. Change the msleep for the panel_pwr_cycle_delay to a normal msleep() call to avoid the panel staying black after a quick off + on cycle. Cc: Ville Syrjälä Fixes: fe0f1e3bfdfe ("drm/i915: Shut down displays gracefully on reboot") Signed-off-by: Hans de Goede Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210325114823.44922-1-hdegoede@redhat.com (cherry picked from commit 2878b29fc25a0dac0e1c6c94177f07c7f94240f0) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/vlv_dsi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index f94025ec603a..a9a8ba1d3aba 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -992,14 +992,14 @@ static void intel_dsi_post_disable(struct intel_atomic_state *state, * FIXME As we do with eDP, just make a note of the time here * and perform the wait before the next panel power on. */ - intel_dsi_msleep(intel_dsi, intel_dsi->panel_pwr_cycle_delay); + msleep(intel_dsi->panel_pwr_cycle_delay); } static void intel_dsi_shutdown(struct intel_encoder *encoder) { struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); - intel_dsi_msleep(intel_dsi, intel_dsi->panel_pwr_cycle_delay); + msleep(intel_dsi->panel_pwr_cycle_delay); } static bool intel_dsi_get_hw_state(struct intel_encoder *encoder, From 2decad92f4731fac9755a083fcfefa66edb7d67d Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 9 Apr 2021 18:37:10 +0100 Subject: [PATCH 069/182] arm64: mte: Ensure TIF_MTE_ASYNC_FAULT is set atomically The entry from EL0 code checks the TFSRE0_EL1 register for any asynchronous tag check faults in user space and sets the TIF_MTE_ASYNC_FAULT flag. This is not done atomically, potentially racing with another CPU calling set_tsk_thread_flag(). Replace the non-atomic ORR+STR with an STSET instruction. While STSET requires ARMv8.1 and an assembler that understands LSE atomics, the MTE feature is part of ARMv8.5 and already requires an updated assembler. Signed-off-by: Catalin Marinas Fixes: 637ec831ea4f ("arm64: mte: Handle synchronous and asynchronous tag check faults") Cc: # 5.10.x Reported-by: Will Deacon Cc: Will Deacon Cc: Vincenzo Frascino Cc: Mark Rutland Link: https://lore.kernel.org/r/20210409173710.18582-1-catalin.marinas@arm.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 6 +++++- arch/arm64/kernel/entry.S | 10 ++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e4e1b6550115..dfdc3e0af5e1 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1406,10 +1406,13 @@ config ARM64_PAN config AS_HAS_LDAPR def_bool $(as-instr,.arch_extension rcpc) +config AS_HAS_LSE_ATOMICS + def_bool $(as-instr,.arch_extension lse) + config ARM64_LSE_ATOMICS bool default ARM64_USE_LSE_ATOMICS - depends on $(as-instr,.arch_extension lse) + depends on AS_HAS_LSE_ATOMICS config ARM64_USE_LSE_ATOMICS bool "Atomic instructions" @@ -1666,6 +1669,7 @@ config ARM64_MTE default y depends on ARM64_AS_HAS_MTE && ARM64_TAGGED_ADDR_ABI depends on AS_HAS_ARMV8_5 + depends on AS_HAS_LSE_ATOMICS # Required for tag checking in the uaccess routines depends on ARM64_PAN select ARCH_USES_HIGH_VMA_FLAGS diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index a31a0a713c85..6acfc5e6b5e0 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -148,16 +148,18 @@ alternative_cb_end .endm /* Check for MTE asynchronous tag check faults */ - .macro check_mte_async_tcf, flgs, tmp + .macro check_mte_async_tcf, tmp, ti_flags #ifdef CONFIG_ARM64_MTE + .arch_extension lse alternative_if_not ARM64_MTE b 1f alternative_else_nop_endif mrs_s \tmp, SYS_TFSRE0_EL1 tbz \tmp, #SYS_TFSR_EL1_TF0_SHIFT, 1f /* Asynchronous TCF occurred for TTBR0 access, set the TI flag */ - orr \flgs, \flgs, #_TIF_MTE_ASYNC_FAULT - str \flgs, [tsk, #TSK_TI_FLAGS] + mov \tmp, #_TIF_MTE_ASYNC_FAULT + add \ti_flags, tsk, #TSK_TI_FLAGS + stset \tmp, [\ti_flags] msr_s SYS_TFSRE0_EL1, xzr 1: #endif @@ -244,7 +246,7 @@ alternative_else_nop_endif disable_step_tsk x19, x20 /* Check for asynchronous tag check faults in user space */ - check_mte_async_tcf x19, x22 + check_mte_async_tcf x22, x23 apply_ssbd 1, x22, x23 ptrauth_keys_install_kernel tsk, x20, x22, x23 From 6df0e6c57dfc064af330071f372f11aa8c584997 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 12 Apr 2021 09:23:27 -0700 Subject: [PATCH 070/182] dmaengine: idxd: clear MSIX permission entry on shutdown Add disabling/clearing of MSIX permission entries on device shutdown to mirror the enabling of the MSIX entries on probe. Current code left the MSIX enabled and the pasid entries still programmed at device shutdown. Fixes: 8e50d392652f ("dmaengine: idxd: Add shared workqueue support") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/161824457969.882533.6020239898682672311.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/device.c | 30 ++++++++++++++++++++++++++++++ drivers/dma/idxd/idxd.h | 2 ++ drivers/dma/idxd/init.c | 11 ++--------- 3 files changed, 34 insertions(+), 9 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 84a6ea60ecf0..c09687013d29 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -574,6 +574,36 @@ void idxd_device_drain_pasid(struct idxd_device *idxd, int pasid) } /* Device configuration bits */ +void idxd_msix_perm_setup(struct idxd_device *idxd) +{ + union msix_perm mperm; + int i, msixcnt; + + msixcnt = pci_msix_vec_count(idxd->pdev); + if (msixcnt < 0) + return; + + mperm.bits = 0; + mperm.pasid = idxd->pasid; + mperm.pasid_en = device_pasid_enabled(idxd); + for (i = 1; i < msixcnt; i++) + iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + i * 8); +} + +void idxd_msix_perm_clear(struct idxd_device *idxd) +{ + union msix_perm mperm; + int i, msixcnt; + + msixcnt = pci_msix_vec_count(idxd->pdev); + if (msixcnt < 0) + return; + + mperm.bits = 0; + for (i = 1; i < msixcnt; i++) + iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + i * 8); +} + static void idxd_group_config_write(struct idxd_group *group) { struct idxd_device *idxd = group->idxd; diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 81a0e65fd316..eda2ee10501f 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -316,6 +316,8 @@ void idxd_unregister_driver(void); struct bus_type *idxd_get_bus_type(struct idxd_device *idxd); /* device interrupt control */ +void idxd_msix_perm_setup(struct idxd_device *idxd); +void idxd_msix_perm_clear(struct idxd_device *idxd); irqreturn_t idxd_irq_handler(int vec, void *data); irqreturn_t idxd_misc_thread(int vec, void *data); irqreturn_t idxd_wq_thread(int irq, void *data); diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 085a0c3b62c6..6584b0ec07d5 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -65,7 +65,6 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) struct idxd_irq_entry *irq_entry; int i, msixcnt; int rc = 0; - union msix_perm mperm; msixcnt = pci_msix_vec_count(pdev); if (msixcnt < 0) { @@ -144,14 +143,7 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) } idxd_unmask_error_interrupts(idxd); - - /* Setup MSIX permission table */ - mperm.bits = 0; - mperm.pasid = idxd->pasid; - mperm.pasid_en = device_pasid_enabled(idxd); - for (i = 1; i < msixcnt; i++) - iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + i * 8); - + idxd_msix_perm_setup(idxd); return 0; err_no_irq: @@ -510,6 +502,7 @@ static void idxd_shutdown(struct pci_dev *pdev) idxd_flush_work_list(irq_entry); } + idxd_msix_perm_clear(idxd); destroy_workqueue(idxd->wq); } From ea9aadc06a9f10ad20a90edc0a484f1147d88a7a Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 12 Apr 2021 09:02:36 -0700 Subject: [PATCH 071/182] dmaengine: idxd: fix wq cleanup of WQCFG registers A pre-release silicon erratum workaround where wq reset does not clear WQCFG registers was leaked into upstream code. Use wq reset command instead of blasting the MMIO region. This also address an issue where we clobber registers in future devices. Fixes: da32b28c95a7 ("dmaengine: idxd: cleanup workqueue config after disabling") Reported-by: Shreenivaas Devarajan Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/161824330020.881560.16375921906426627033.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/device.c | 35 ++++++++++++++++++++++++----------- drivers/dma/idxd/idxd.h | 1 + drivers/dma/idxd/sysfs.c | 9 ++------- 3 files changed, 27 insertions(+), 18 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index c09687013d29..31c819544a22 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -282,6 +282,22 @@ void idxd_wq_drain(struct idxd_wq *wq) idxd_cmd_exec(idxd, IDXD_CMD_DRAIN_WQ, operand, NULL); } +void idxd_wq_reset(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + struct device *dev = &idxd->pdev->dev; + u32 operand; + + if (wq->state != IDXD_WQ_ENABLED) { + dev_dbg(dev, "WQ %d in wrong state: %d\n", wq->id, wq->state); + return; + } + + operand = BIT(wq->id % 16) | ((wq->id / 16) << 16); + idxd_cmd_exec(idxd, IDXD_CMD_RESET_WQ, operand, NULL); + wq->state = IDXD_WQ_DISABLED; +} + int idxd_wq_map_portal(struct idxd_wq *wq) { struct idxd_device *idxd = wq->idxd; @@ -363,8 +379,6 @@ int idxd_wq_disable_pasid(struct idxd_wq *wq) void idxd_wq_disable_cleanup(struct idxd_wq *wq) { struct idxd_device *idxd = wq->idxd; - struct device *dev = &idxd->pdev->dev; - int i, wq_offset; lockdep_assert_held(&idxd->dev_lock); memset(wq->wqcfg, 0, idxd->wqcfg_size); @@ -376,14 +390,6 @@ void idxd_wq_disable_cleanup(struct idxd_wq *wq) wq->ats_dis = 0; clear_bit(WQ_FLAG_DEDICATED, &wq->flags); memset(wq->name, 0, WQ_NAME_SIZE); - - for (i = 0; i < WQCFG_STRIDES(idxd); i++) { - wq_offset = WQCFG_OFFSET(idxd, wq->id, i); - iowrite32(0, idxd->reg_base + wq_offset); - dev_dbg(dev, "WQ[%d][%d][%#x]: %#x\n", - wq->id, i, wq_offset, - ioread32(idxd->reg_base + wq_offset)); - } } /* Device control bits */ @@ -672,7 +678,14 @@ static int idxd_wq_config_write(struct idxd_wq *wq) if (!wq->group) return 0; - memset(wq->wqcfg, 0, idxd->wqcfg_size); + /* + * Instead of memset the entire shadow copy of WQCFG, copy from the hardware after + * wq reset. This will copy back the sticky values that are present on some devices. + */ + for (i = 0; i < WQCFG_STRIDES(idxd); i++) { + wq_offset = WQCFG_OFFSET(idxd, wq->id, i); + wq->wqcfg->bits[i] = ioread32(idxd->reg_base + wq_offset); + } /* byte 0-3 */ wq->wqcfg->wq_size = wq->size; diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index eda2ee10501f..76014c14f473 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -343,6 +343,7 @@ void idxd_wq_free_resources(struct idxd_wq *wq); int idxd_wq_enable(struct idxd_wq *wq); int idxd_wq_disable(struct idxd_wq *wq); void idxd_wq_drain(struct idxd_wq *wq); +void idxd_wq_reset(struct idxd_wq *wq); int idxd_wq_map_portal(struct idxd_wq *wq); void idxd_wq_unmap_portal(struct idxd_wq *wq); void idxd_wq_disable_cleanup(struct idxd_wq *wq); diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 5f7bc4b1621a..18bf4d148989 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -275,7 +275,6 @@ static void disable_wq(struct idxd_wq *wq) { struct idxd_device *idxd = wq->idxd; struct device *dev = &idxd->pdev->dev; - int rc; mutex_lock(&wq->wq_lock); dev_dbg(dev, "%s removing WQ %s\n", __func__, dev_name(&wq->conf_dev)); @@ -296,17 +295,13 @@ static void disable_wq(struct idxd_wq *wq) idxd_wq_unmap_portal(wq); idxd_wq_drain(wq); - rc = idxd_wq_disable(wq); + idxd_wq_reset(wq); idxd_wq_free_resources(wq); wq->client_count = 0; mutex_unlock(&wq->wq_lock); - if (rc < 0) - dev_warn(dev, "Failed to disable %s: %d\n", - dev_name(&wq->conf_dev), rc); - else - dev_info(dev, "wq %s disabled\n", dev_name(&wq->conf_dev)); + dev_info(dev, "wq %s disabled\n", dev_name(&wq->conf_dev)); } static int idxd_config_bus_remove(struct device *dev) From 1fe976d308acb6374c899a4ee8025a0a016e453e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Mon, 12 Apr 2021 18:57:39 +0200 Subject: [PATCH 072/182] net: phy: marvell: fix detection of PHY on Topaz switches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit fee2d546414d ("net: phy: marvell: mv88e6390 temperature sensor reading"), Linux reports the temperature of Topaz hwmon as constant -75°C. This is because switches from the Topaz family (88E6141 / 88E6341) have the address of the temperature sensor register different from Peridot. This address is instead compatible with 88E1510 PHYs, as was used for Topaz before the above mentioned commit. Create a new mapping table between switch family and PHY ID for families which don't have a model number. And define PHY IDs for Topaz and Peridot families. Create a new PHY ID and a new PHY driver for Topaz's internal PHY. The only difference from Peridot's PHY driver is the HWMON probing method. Prior this change Topaz's internal PHY is detected by kernel as: PHY [...] driver [Marvell 88E6390] (irq=63) And afterwards as: PHY [...] driver [Marvell 88E6341 Family] (irq=63) Signed-off-by: Pali Rohár BugLink: https://github.com/globalscaletechnologies/linux/issues/1 Fixes: fee2d546414d ("net: phy: marvell: mv88e6390 temperature sensor reading") Reviewed-by: Marek Behún Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 30 +++++++++++++----------------- drivers/net/phy/marvell.c | 32 +++++++++++++++++++++++++++++--- include/linux/marvell_phy.h | 5 +++-- 3 files changed, 45 insertions(+), 22 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 903d619e08ed..e08bf9377140 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3026,10 +3026,17 @@ static int mv88e6xxx_setup(struct dsa_switch *ds) return err; } +/* prod_id for switch families which do not have a PHY model number */ +static const u16 family_prod_id_table[] = { + [MV88E6XXX_FAMILY_6341] = MV88E6XXX_PORT_SWITCH_ID_PROD_6341, + [MV88E6XXX_FAMILY_6390] = MV88E6XXX_PORT_SWITCH_ID_PROD_6390, +}; + static int mv88e6xxx_mdio_read(struct mii_bus *bus, int phy, int reg) { struct mv88e6xxx_mdio_bus *mdio_bus = bus->priv; struct mv88e6xxx_chip *chip = mdio_bus->chip; + u16 prod_id; u16 val; int err; @@ -3040,23 +3047,12 @@ static int mv88e6xxx_mdio_read(struct mii_bus *bus, int phy, int reg) err = chip->info->ops->phy_read(chip, bus, phy, reg, &val); mv88e6xxx_reg_unlock(chip); - if (reg == MII_PHYSID2) { - /* Some internal PHYs don't have a model number. */ - if (chip->info->family != MV88E6XXX_FAMILY_6165) - /* Then there is the 6165 family. It gets is - * PHYs correct. But it can also have two - * SERDES interfaces in the PHY address - * space. And these don't have a model - * number. But they are not PHYs, so we don't - * want to give them something a PHY driver - * will recognise. - * - * Use the mv88e6390 family model number - * instead, for anything which really could be - * a PHY, - */ - if (!(val & 0x3f0)) - val |= MV88E6XXX_PORT_SWITCH_ID_PROD_6390 >> 4; + /* Some internal PHYs don't have a model number. */ + if (reg == MII_PHYSID2 && !(val & 0x3f0) && + chip->info->family < ARRAY_SIZE(family_prod_id_table)) { + prod_id = family_prod_id_table[chip->info->family]; + if (prod_id) + val |= prod_id >> 4; } return err ? err : val; diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index e26a5d663f8a..8018ddf7f316 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -3021,9 +3021,34 @@ static struct phy_driver marvell_drivers[] = { .get_stats = marvell_get_stats, }, { - .phy_id = MARVELL_PHY_ID_88E6390, + .phy_id = MARVELL_PHY_ID_88E6341_FAMILY, .phy_id_mask = MARVELL_PHY_ID_MASK, - .name = "Marvell 88E6390", + .name = "Marvell 88E6341 Family", + /* PHY_GBIT_FEATURES */ + .flags = PHY_POLL_CABLE_TEST, + .probe = m88e1510_probe, + .config_init = marvell_config_init, + .config_aneg = m88e6390_config_aneg, + .read_status = marvell_read_status, + .config_intr = marvell_config_intr, + .handle_interrupt = marvell_handle_interrupt, + .resume = genphy_resume, + .suspend = genphy_suspend, + .read_page = marvell_read_page, + .write_page = marvell_write_page, + .get_sset_count = marvell_get_sset_count, + .get_strings = marvell_get_strings, + .get_stats = marvell_get_stats, + .get_tunable = m88e1540_get_tunable, + .set_tunable = m88e1540_set_tunable, + .cable_test_start = marvell_vct7_cable_test_start, + .cable_test_tdr_start = marvell_vct5_cable_test_tdr_start, + .cable_test_get_status = marvell_vct7_cable_test_get_status, + }, + { + .phy_id = MARVELL_PHY_ID_88E6390_FAMILY, + .phy_id_mask = MARVELL_PHY_ID_MASK, + .name = "Marvell 88E6390 Family", /* PHY_GBIT_FEATURES */ .flags = PHY_POLL_CABLE_TEST, .probe = m88e6390_probe, @@ -3107,7 +3132,8 @@ static struct mdio_device_id __maybe_unused marvell_tbl[] = { { MARVELL_PHY_ID_88E1540, MARVELL_PHY_ID_MASK }, { MARVELL_PHY_ID_88E1545, MARVELL_PHY_ID_MASK }, { MARVELL_PHY_ID_88E3016, MARVELL_PHY_ID_MASK }, - { MARVELL_PHY_ID_88E6390, MARVELL_PHY_ID_MASK }, + { MARVELL_PHY_ID_88E6341_FAMILY, MARVELL_PHY_ID_MASK }, + { MARVELL_PHY_ID_88E6390_FAMILY, MARVELL_PHY_ID_MASK }, { MARVELL_PHY_ID_88E1340S, MARVELL_PHY_ID_MASK }, { MARVELL_PHY_ID_88E1548P, MARVELL_PHY_ID_MASK }, { } diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h index 52b1610eae68..c544b70dfbd2 100644 --- a/include/linux/marvell_phy.h +++ b/include/linux/marvell_phy.h @@ -28,11 +28,12 @@ /* Marvel 88E1111 in Finisar SFP module with modified PHY ID */ #define MARVELL_PHY_ID_88E1111_FINISAR 0x01ff0cc0 -/* The MV88e6390 Ethernet switch contains embedded PHYs. These PHYs do +/* These Ethernet switch families contain embedded PHYs, but they do * not have a model ID. So the switch driver traps reads to the ID2 * register and returns the switch family ID */ -#define MARVELL_PHY_ID_88E6390 0x01410f90 +#define MARVELL_PHY_ID_88E6341_FAMILY 0x01410f41 +#define MARVELL_PHY_ID_88E6390_FAMILY 0x01410f90 #define MARVELL_PHY_FAMILY_ID(id) ((id) >> 4) From f33b0e196ed7aa3dc285b26db7768c1db1eb3a41 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 12 Apr 2021 11:47:07 -0700 Subject: [PATCH 073/182] ethtool: fix kdoc attr name Add missing 't' in attrtype. Signed-off-by: Jakub Kicinski Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- net/ethtool/netlink.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 6eabd58d81bf..cde9f3169ae5 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -36,9 +36,9 @@ static inline int ethnl_strz_size(const char *s) /** * ethnl_put_strz() - put string attribute with fixed size string - * @skb: skb with the message - * @attrype: attribute type - * @s: ETH_GSTRING_LEN sized string (may not be null terminated) + * @skb: skb with the message + * @attrtype: attribute type + * @s: ETH_GSTRING_LEN sized string (may not be null terminated) * * Puts an attribute with null terminated string from @s into the message. * From b29c457a6511435960115c0f548c4360d5f4801d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 7 Apr 2021 21:38:57 +0200 Subject: [PATCH 074/182] netfilter: x_tables: fix compat match/target pad out-of-bound write xt_compat_match/target_from_user doesn't check that zeroing the area to start of next rule won't write past end of allocated ruleset blob. Remove this code and zero the entire blob beforehand. Reported-by: syzbot+cfc0247ac173f597aaaa@syzkaller.appspotmail.com Reported-by: Andy Nguyen Fixes: 9fa492cdc160c ("[NETFILTER]: x_tables: simplify compat API") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/arp_tables.c | 2 ++ net/ipv4/netfilter/ip_tables.c | 2 ++ net/ipv6/netfilter/ip6_tables.c | 2 ++ net/netfilter/x_tables.c | 10 ++-------- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 6c26533480dd..d6d45d820d79 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1193,6 +1193,8 @@ static int translate_compat_table(struct net *net, if (!newinfo) goto out_unlock; + memset(newinfo->entries, 0, size); + newinfo->number = compatr->num_entries; for (i = 0; i < NF_ARP_NUMHOOKS; i++) { newinfo->hook_entry[i] = compatr->hook_entry[i]; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index f15bc21d7301..f77ea0dbe656 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1428,6 +1428,8 @@ translate_compat_table(struct net *net, if (!newinfo) goto out_unlock; + memset(newinfo->entries, 0, size); + newinfo->number = compatr->num_entries; for (i = 0; i < NF_INET_NUMHOOKS; i++) { newinfo->hook_entry[i] = compatr->hook_entry[i]; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 2e2119bfcf13..eb2b5404806c 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1443,6 +1443,8 @@ translate_compat_table(struct net *net, if (!newinfo) goto out_unlock; + memset(newinfo->entries, 0, size); + newinfo->number = compatr->num_entries; for (i = 0; i < NF_INET_NUMHOOKS; i++) { newinfo->hook_entry[i] = compatr->hook_entry[i]; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 6bd31a7a27fc..92e9d4ebc5e8 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -733,7 +733,7 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, { const struct xt_match *match = m->u.kernel.match; struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m; - int pad, off = xt_compat_match_offset(match); + int off = xt_compat_match_offset(match); u_int16_t msize = cm->u.user.match_size; char name[sizeof(m->u.user.name)]; @@ -743,9 +743,6 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, match->compat_from_user(m->data, cm->data); else memcpy(m->data, cm->data, msize - sizeof(*cm)); - pad = XT_ALIGN(match->matchsize) - match->matchsize; - if (pad > 0) - memset(m->data + match->matchsize, 0, pad); msize += off; m->u.user.match_size = msize; @@ -1116,7 +1113,7 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, { const struct xt_target *target = t->u.kernel.target; struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t; - int pad, off = xt_compat_target_offset(target); + int off = xt_compat_target_offset(target); u_int16_t tsize = ct->u.user.target_size; char name[sizeof(t->u.user.name)]; @@ -1126,9 +1123,6 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, target->compat_from_user(t->data, ct->data); else memcpy(t->data, ct->data, tsize - sizeof(*ct)); - pad = XT_ALIGN(target->targetsize) - target->targetsize; - if (pad > 0) - memset(t->data + target->targetsize, 0, pad); tsize += off; t->u.user.target_size = tsize; From 4d8f9065830e526c83199186c5f56a6514f457d2 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 10 Apr 2021 21:29:38 +0200 Subject: [PATCH 075/182] netfilter: nftables: clone set element expression template memcpy() breaks when using connlimit in set elements. Use nft_expr_clone() to initialize the connlimit expression list, otherwise connlimit garbage collector crashes when walking on the list head copy. [ 493.064656] Workqueue: events_power_efficient nft_rhash_gc [nf_tables] [ 493.064685] RIP: 0010:find_or_evict+0x5a/0x90 [nf_conncount] [ 493.064694] Code: 2b 43 40 83 f8 01 77 0d 48 c7 c0 f5 ff ff ff 44 39 63 3c 75 df 83 6d 18 01 48 8b 43 08 48 89 de 48 8b 13 48 8b 3d ee 2f 00 00 <48> 89 42 08 48 89 10 48 b8 00 01 00 00 00 00 ad de 48 89 03 48 83 [ 493.064699] RSP: 0018:ffffc90000417dc0 EFLAGS: 00010297 [ 493.064704] RAX: 0000000000000000 RBX: ffff888134f38410 RCX: 0000000000000000 [ 493.064708] RDX: 0000000000000000 RSI: ffff888134f38410 RDI: ffff888100060cc0 [ 493.064711] RBP: ffff88812ce594a8 R08: ffff888134f38438 R09: 00000000ebb9025c [ 493.064714] R10: ffffffff8219f838 R11: 0000000000000017 R12: 0000000000000001 [ 493.064718] R13: ffffffff82146740 R14: ffff888134f38410 R15: 0000000000000000 [ 493.064721] FS: 0000000000000000(0000) GS:ffff88840e440000(0000) knlGS:0000000000000000 [ 493.064725] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 493.064729] CR2: 0000000000000008 CR3: 00000001330aa002 CR4: 00000000001706e0 [ 493.064733] Call Trace: [ 493.064737] nf_conncount_gc_list+0x8f/0x150 [nf_conncount] [ 493.064746] nft_rhash_gc+0x106/0x390 [nf_tables] Reported-by: Laura Garcia Liebana Fixes: 409444522976 ("netfilter: nf_tables: add elements with stateful expressions") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 46 ++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f57f1a6ba96f..589d2f6978d3 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5295,16 +5295,35 @@ int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set, return -ENOMEM; } -static void nft_set_elem_expr_setup(const struct nft_set_ext *ext, int i, - struct nft_expr *expr_array[]) +static int nft_set_elem_expr_setup(struct nft_ctx *ctx, + const struct nft_set_ext *ext, + struct nft_expr *expr_array[], + u32 num_exprs) { struct nft_set_elem_expr *elem_expr = nft_set_ext_expr(ext); - struct nft_expr *expr = nft_setelem_expr_at(elem_expr, elem_expr->size); + struct nft_expr *expr; + int i, err; - memcpy(expr, expr_array[i], expr_array[i]->ops->size); - elem_expr->size += expr_array[i]->ops->size; - kfree(expr_array[i]); - expr_array[i] = NULL; + for (i = 0; i < num_exprs; i++) { + expr = nft_setelem_expr_at(elem_expr, elem_expr->size); + err = nft_expr_clone(expr, expr_array[i]); + if (err < 0) + goto err_elem_expr_setup; + + elem_expr->size += expr_array[i]->ops->size; + nft_expr_destroy(ctx, expr_array[i]); + expr_array[i] = NULL; + } + + return 0; + +err_elem_expr_setup: + for (; i < num_exprs; i++) { + nft_expr_destroy(ctx, expr_array[i]); + expr_array[i] = NULL; + } + + return -ENOMEM; } static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, @@ -5556,12 +5575,15 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, *nft_set_ext_obj(ext) = obj; obj->use++; } - for (i = 0; i < num_exprs; i++) - nft_set_elem_expr_setup(ext, i, expr_array); + err = nft_set_elem_expr_setup(ctx, ext, expr_array, num_exprs); + if (err < 0) + goto err_elem_expr; trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set); - if (trans == NULL) - goto err_trans; + if (trans == NULL) { + err = -ENOMEM; + goto err_elem_expr; + } ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK; err = set->ops->insert(ctx->net, set, &elem, &ext2); @@ -5605,7 +5627,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, set->ops->remove(ctx->net, set, &elem); err_element_clash: kfree(trans); -err_trans: +err_elem_expr: if (obj) obj->use--; From 738fa58ee1328481d1d7889e7c430b3401c571b9 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Mon, 12 Apr 2021 17:41:01 +0800 Subject: [PATCH 076/182] arm64: kprobes: Restore local irqflag if kprobes is cancelled If instruction being single stepped caused a page fault, the kprobes is cancelled to let the page fault handler continue as a normal page fault. But the local irqflags are disabled so cpu will restore pstate with DAIF masked. After pagefault is serviced, the kprobes is triggerred again, we overwrite the saved_irqflag by calling kprobes_save_local_irqflag(). NOTE, DAIF is masked in this new saved irqflag. After kprobes is serviced, the cpu pstate is retored with DAIF masked. This patch is inspired by one patch for riscv from Liao Chang. Signed-off-by: Jisheng Zhang Acked-by: Masami Hiramatsu Link: https://lore.kernel.org/r/20210412174101.6bfb0594@xhacker.debian Signed-off-by: Will Deacon --- arch/arm64/kernel/probes/kprobes.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 66aac2881ba8..85645b2b0c7a 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -267,10 +267,12 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr) if (!instruction_pointer(regs)) BUG(); - if (kcb->kprobe_status == KPROBE_REENTER) + if (kcb->kprobe_status == KPROBE_REENTER) { restore_previous_kprobe(kcb); - else + } else { + kprobes_restore_local_irqflag(kcb, regs); reset_current_kprobe(); + } break; case KPROBE_HIT_ACTIVE: From 196d941753297d0ca73c563ccd7d00be049ec226 Mon Sep 17 00:00:00 2001 From: Yuanyuan Zhong Date: Mon, 12 Apr 2021 17:17:59 -0600 Subject: [PATCH 077/182] pinctrl: lewisburg: Update number of pins in community When updating pin names for Intel Lewisburg, the numbers of pins were left behind. Update them accordingly. Fixes: e66ff71fd0db ("pinctrl: lewisburg: Update pin list according to v1.1v6") Signed-off-by: Yuanyuan Zhong Signed-off-by: Andy Shevchenko --- drivers/pinctrl/intel/pinctrl-lewisburg.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-lewisburg.c b/drivers/pinctrl/intel/pinctrl-lewisburg.c index 7fdf4257df1e..ad4b446d588e 100644 --- a/drivers/pinctrl/intel/pinctrl-lewisburg.c +++ b/drivers/pinctrl/intel/pinctrl-lewisburg.c @@ -299,9 +299,9 @@ static const struct pinctrl_pin_desc lbg_pins[] = { static const struct intel_community lbg_communities[] = { LBG_COMMUNITY(0, 0, 71), LBG_COMMUNITY(1, 72, 132), - LBG_COMMUNITY(3, 133, 144), - LBG_COMMUNITY(4, 145, 180), - LBG_COMMUNITY(5, 181, 246), + LBG_COMMUNITY(3, 133, 143), + LBG_COMMUNITY(4, 144, 178), + LBG_COMMUNITY(5, 179, 246), }; static const struct intel_pinctrl_soc_data lbg_soc_data = { From 6998a8800d73116187aad542391ce3b2dd0f9e30 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 13 Apr 2021 16:01:00 +0200 Subject: [PATCH 078/182] ACPI: x86: Call acpi_boot_table_init() after acpi_table_upgrade() Commit 1a1c130ab757 ("ACPI: tables: x86: Reserve memory occupied by ACPI tables") attempted to address an issue with reserving the memory occupied by ACPI tables, but it broke the initrd-based table override mechanism relied on by multiple users. To restore the initrd-based ACPI table override functionality, move the acpi_boot_table_init() invocation in setup_arch() on x86 after the acpi_table_upgrade() one. Fixes: 1a1c130ab757 ("ACPI: tables: x86: Reserve memory occupied by ACPI tables") Reported-by: Hans de Goede Tested-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- arch/x86/kernel/setup.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 5ecd69a48393..ccab6cf91283 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1045,9 +1045,6 @@ void __init setup_arch(char **cmdline_p) cleanup_highmap(); - /* Look for ACPI tables and reserve memory occupied by them. */ - acpi_boot_table_init(); - memblock_set_current_limit(ISA_END_ADDRESS); e820__memblock_setup(); @@ -1132,6 +1129,8 @@ void __init setup_arch(char **cmdline_p) reserve_initrd(); acpi_table_upgrade(); + /* Look for ACPI tables and reserve memory occupied by them. */ + acpi_boot_table_init(); vsmp_init(); From 909290786ea335366e21d7f1ed5812b90f2f0a92 Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Mon, 12 Apr 2021 23:41:24 +0200 Subject: [PATCH 079/182] vfio/pci: Add missing range check in vfio_pci_mmap When mmaping an extra device region verify that the region index derived from the mmap offset is valid. Fixes: a15b1883fee1 ("vfio_pci: Allow mapping extra regions") Cc: stable@vger.kernel.org Signed-off-by: Christian A. Ehrhardt Message-Id: <20210412214124.GA241759@lisa.in-ulm.de> Reviewed-by: David Gibson Reviewed-by: Cornelia Huck Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 65e7e6b44578..5023e23db3bc 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -1656,6 +1656,8 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma) index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT); + if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions) + return -EINVAL; if (vma->vm_end < vma->vm_start) return -EINVAL; if ((vma->vm_flags & VM_SHARED) == 0) @@ -1664,7 +1666,7 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma) int regnum = index - VFIO_PCI_NUM_REGIONS; struct vfio_pci_region *region = vdev->region + regnum; - if (region && region->ops && region->ops->mmap && + if (region->ops && region->ops->mmap && (region->flags & VFIO_REGION_INFO_FLAG_MMAP)) return region->ops->mmap(vdev, region, vma); return -EINVAL; From 8db403b9631331ef1d5e302cdf353c48849ca9d5 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 11 Apr 2021 12:21:54 +0200 Subject: [PATCH 080/182] tracing/dynevent: Fix a memory leak in an error handling path We must free 'argv' before returning, as already done in all the other paths of this function. Link: https://lkml.kernel.org/r/21e3594ccd7fc88c5c162c98450409190f304327.1618136448.git.christophe.jaillet@wanadoo.fr Fixes: d262271d0483 ("tracing/dynevent: Delegate parsing to create function") Acked-by: Masami Hiramatsu Signed-off-by: Christophe JAILLET Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_dynevent.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c index dc971a68dda4..e57cc0870892 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -63,8 +63,10 @@ int dyn_event_release(const char *raw_command, struct dyn_event_operations *type event = p + 1; *p = '\0'; } - if (event[0] == '\0') - return -EINVAL; + if (event[0] == '\0') { + ret = -EINVAL; + goto out; + } mutex_lock(&event_mutex); for_each_dyn_event_safe(pos, n) { From 31166efb1cee348eb6314e9c0095d84cbeb66b9d Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 8 Mar 2021 12:41:56 -0800 Subject: [PATCH 081/182] ixgbe: Fix NULL pointer dereference in ethtool loopback test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ixgbe driver currently generates a NULL pointer dereference when performing the ethtool loopback test. This is due to the fact that there isn't a q_vector associated with the test ring when it is setup as interrupts are not normally added to the test rings. To address this I have added code that will check for a q_vector before returning a napi_id value. If a q_vector is not present it will return a value of 0. Fixes: b02e5a0ebb17 ("xsk: Propagate napi_id to XDP socket Rx path") Signed-off-by: Alexander Duyck Acked-by: Björn Töpel Tested-by: Dave Switzer Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 03d9aad516d4..45d2c8f37c01 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6536,6 +6536,13 @@ static int ixgbe_setup_all_tx_resources(struct ixgbe_adapter *adapter) return err; } +static int ixgbe_rx_napi_id(struct ixgbe_ring *rx_ring) +{ + struct ixgbe_q_vector *q_vector = rx_ring->q_vector; + + return q_vector ? q_vector->napi.napi_id : 0; +} + /** * ixgbe_setup_rx_resources - allocate Rx resources (Descriptors) * @adapter: pointer to ixgbe_adapter @@ -6583,7 +6590,7 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter, /* XDP RX-queue info */ if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev, - rx_ring->queue_index, rx_ring->q_vector->napi.napi_id) < 0) + rx_ring->queue_index, ixgbe_rx_napi_id(rx_ring)) < 0) goto err; rx_ring->xdp_prog = adapter->xdp_prog; From debb9df311582c83fe369baa35fa4b92e8a9c58a Mon Sep 17 00:00:00 2001 From: Yongxin Liu Date: Mon, 22 Mar 2021 15:14:48 +0800 Subject: [PATCH 082/182] ixgbe: fix unbalanced device enable/disable in suspend/resume pci_disable_device() called in __ixgbe_shutdown() decreases dev->enable_cnt by 1. pci_enable_device_mem() which increases dev->enable_cnt by 1, was removed from ixgbe_resume() in commit 6f82b2558735 ("ixgbe: use generic power management"). This caused unbalanced increase/decrease. So add pci_enable_device_mem() back. Fix the following call trace. ixgbe 0000:17:00.1: disabling already-disabled device Call Trace: __ixgbe_shutdown+0x10a/0x1e0 [ixgbe] ixgbe_suspend+0x32/0x70 [ixgbe] pci_pm_suspend+0x87/0x160 ? pci_pm_freeze+0xd0/0xd0 dpm_run_callback+0x42/0x170 __device_suspend+0x114/0x460 async_suspend+0x1f/0xa0 async_run_entry_fn+0x3c/0xf0 process_one_work+0x1dd/0x410 worker_thread+0x34/0x3f0 ? cancel_delayed_work+0x90/0x90 kthread+0x14c/0x170 ? kthread_park+0x90/0x90 ret_from_fork+0x1f/0x30 Fixes: 6f82b2558735 ("ixgbe: use generic power management") Signed-off-by: Yongxin Liu Tested-by: Dave Switzer Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 45d2c8f37c01..cffb95f8f632 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6899,6 +6899,11 @@ static int __maybe_unused ixgbe_resume(struct device *dev_d) adapter->hw.hw_addr = adapter->io_addr; + err = pci_enable_device_mem(pdev); + if (err) { + e_dev_err("Cannot enable PCI device from suspend\n"); + return err; + } smp_mb__before_atomic(); clear_bit(__IXGBE_DISABLED, &adapter->state); pci_set_master(pdev); From ef963ae427aa4669905e0a96b3bd9d44dc85db32 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 31 Mar 2021 15:46:28 +0100 Subject: [PATCH 083/182] ice: Fix potential infinite loop when using u8 loop counter A for-loop is using a u8 loop counter that is being compared to a u32 cmp_dcbcfg->numapp to check for the end of the loop. If cmp_dcbcfg->numapp is larger than 255 then the counter j will wrap around to zero and hence an infinite loop occurs. Fix this by making counter j the same type as cmp_dcbcfg->numapp. Addresses-Coverity: ("Infinite loop") Fixes: aeac8ce864d9 ("ice: Recognize 860 as iSCSI port in CEE mode") Signed-off-by: Colin Ian King Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_dcb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c index 211ac6f907ad..28e834a128c0 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb.c @@ -747,8 +747,8 @@ ice_cee_to_dcb_cfg(struct ice_aqc_get_cee_dcb_cfg_resp *cee_cfg, struct ice_port_info *pi) { u32 status, tlv_status = le32_to_cpu(cee_cfg->tlv_status); - u32 ice_aqc_cee_status_mask, ice_aqc_cee_status_shift; - u8 i, j, err, sync, oper, app_index, ice_app_sel_type; + u32 ice_aqc_cee_status_mask, ice_aqc_cee_status_shift, j; + u8 i, err, sync, oper, app_index, ice_app_sel_type; u16 app_prio = le16_to_cpu(cee_cfg->oper_app_prio); u16 ice_aqc_cee_app_mask, ice_aqc_cee_app_shift; struct ice_dcbx_cfg *cmp_dcbcfg, *dcbcfg; From 610f8c0fc8d46e0933955ce13af3d64484a4630a Mon Sep 17 00:00:00 2001 From: Hristo Venev Date: Mon, 12 Apr 2021 20:41:16 +0300 Subject: [PATCH 084/182] net: sit: Unregister catch-all devices A sit interface created without a local or a remote address is linked into the `sit_net::tunnels_wc` list of its original namespace. When deleting a network namespace, delete the devices that have been moved. The following script triggers a null pointer dereference if devices linked in a deleted `sit_net` remain: for i in `seq 1 30`; do ip netns add ns-test ip netns exec ns-test ip link add dev veth0 type veth peer veth1 ip netns exec ns-test ip link add dev sit$i type sit dev veth0 ip netns exec ns-test ip link set dev sit$i netns $$ ip netns del ns-test done for i in `seq 1 30`; do ip link del dev sit$i done Fixes: 5e6700b3bf98f ("sit: add support of x-netns") Signed-off-by: Hristo Venev Signed-off-by: David S. Miller --- net/ipv6/sit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 63ccd9f2dccc..9fdccf0718b5 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1867,9 +1867,9 @@ static void __net_exit sit_destroy_tunnels(struct net *net, if (dev->rtnl_link_ops == &sit_link_ops) unregister_netdevice_queue(dev, head); - for (prio = 1; prio < 4; prio++) { + for (prio = 0; prio < 4; prio++) { int h; - for (h = 0; h < IP6_SIT_HASH_SIZE; h++) { + for (h = 0; h < (prio ? IP6_SIT_HASH_SIZE : 1); h++) { struct ip_tunnel *t; t = rtnl_dereference(sitn->tunnels[prio][h]); From 941ea91e87a6e879ed82dad4949f6234f2702bec Mon Sep 17 00:00:00 2001 From: Hristo Venev Date: Mon, 12 Apr 2021 20:41:17 +0300 Subject: [PATCH 085/182] net: ip6_tunnel: Unregister catch-all devices Similarly to the sit case, we need to remove the tunnels with no addresses that have been moved to another network namespace. Fixes: 0bd8762824e73 ("ip6tnl: add x-netns support") Signed-off-by: Hristo Venev Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 3fa0eca5a06f..42fe7db6bbb3 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -2244,6 +2244,16 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head t = rtnl_dereference(t->next); } } + + t = rtnl_dereference(ip6n->tnls_wc[0]); + while (t) { + /* If dev is in the same netns, it has already + * been added to the list by the previous loop. + */ + if (!net_eq(dev_net(t->dev), net)) + unregister_netdevice_queue(t->dev, list); + t = rtnl_dereference(t->next); + } } static int __net_init ip6_tnl_init_net(struct net *net) From 97684f0970f6e112926de631fdd98d9693c7e5c1 Mon Sep 17 00:00:00 2001 From: Jonathon Reinhart Date: Tue, 13 Apr 2021 03:08:48 -0400 Subject: [PATCH 086/182] net: Make tcp_allowed_congestion_control readonly in non-init netns Currently, tcp_allowed_congestion_control is global and writable; writing to it in any net namespace will leak into all other net namespaces. tcp_available_congestion_control and tcp_allowed_congestion_control are the only sysctls in ipv4_net_table (the per-netns sysctl table) with a NULL data pointer; their handlers (proc_tcp_available_congestion_control and proc_allowed_congestion_control) have no other way of referencing a struct net. Thus, they operate globally. Because ipv4_net_table does not use designated initializers, there is no easy way to fix up this one "bad" table entry. However, the data pointer updating logic shouldn't be applied to NULL pointers anyway, so we instead force these entries to be read-only. These sysctls used to exist in ipv4_table (init-net only), but they were moved to the per-net ipv4_net_table, presumably without realizing that tcp_allowed_congestion_control was writable and thus introduced a leak. Because the intent of that commit was only to know (i.e. read) "which congestion algorithms are available or allowed", this read-only solution should be sufficient. The logic added in recent commit 31c4d2f160eb: ("net: Ensure net namespace isolation of sysctls") does not and cannot check for NULL data pointers, because other table entries (e.g. /proc/sys/net/netfilter/nf_log/) have .data=NULL but use other methods (.extra2) to access the struct net. Fixes: 9cb8e048e5d9 ("net/ipv4/sysctl: show tcp_{allowed, available}_congestion_control in non-initial netns") Signed-off-by: Jonathon Reinhart Signed-off-by: David S. Miller --- net/ipv4/sysctl_net_ipv4.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index f55095d3ed16..60465f077497 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1378,9 +1378,19 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) if (!table) goto err_alloc; - /* Update the variables to point into the current struct net */ - for (i = 0; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) - table[i].data += (void *)net - (void *)&init_net; + for (i = 0; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) { + if (table[i].data) { + /* Update the variables to point into + * the current struct net + */ + table[i].data += (void *)net - (void *)&init_net; + } else { + /* Entries without data pointer are global; + * Make them read-only in non-init_net ns + */ + table[i].mode &= ~0222; + } + } } net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table); From ca09bf7bb109a37a7ff05f230bb3fa3627e6625f Mon Sep 17 00:00:00 2001 From: Lijun Pan Date: Tue, 13 Apr 2021 03:33:25 -0500 Subject: [PATCH 087/182] ibmvnic: correctly use dev_consume/free_skb_irq It is more correct to use dev_kfree_skb_irq when packets are dropped, and to use dev_consume_skb_irq when packets are consumed. Fixes: 0d973388185d ("ibmvnic: Introduce xmit_more support using batched subCRQ hcalls") Suggested-by: Thomas Falcon Signed-off-by: Lijun Pan Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 9c6438d3b3a5..110a0d0eaabb 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3204,9 +3204,6 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter, next = ibmvnic_next_scrq(adapter, scrq); for (i = 0; i < next->tx_comp.num_comps; i++) { - if (next->tx_comp.rcs[i]) - dev_err(dev, "tx error %x\n", - next->tx_comp.rcs[i]); index = be32_to_cpu(next->tx_comp.correlators[i]); if (index & IBMVNIC_TSO_POOL_MASK) { tx_pool = &adapter->tso_pool[pool]; @@ -3220,7 +3217,13 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter, num_entries += txbuff->num_entries; if (txbuff->skb) { total_bytes += txbuff->skb->len; - dev_consume_skb_irq(txbuff->skb); + if (next->tx_comp.rcs[i]) { + dev_err(dev, "tx error %x\n", + next->tx_comp.rcs[i]); + dev_kfree_skb_irq(txbuff->skb); + } else { + dev_consume_skb_irq(txbuff->skb); + } txbuff->skb = NULL; } else { netdev_warn(adapter->netdev, From b166a20b07382b8bc1dcee2a448715c9c2c81b5b Mon Sep 17 00:00:00 2001 From: Or Cohen Date: Tue, 13 Apr 2021 21:10:31 +0300 Subject: [PATCH 088/182] net/sctp: fix race condition in sctp_destroy_sock If sctp_destroy_sock is called without sock_net(sk)->sctp.addr_wq_lock held and sp->do_auto_asconf is true, then an element is removed from the auto_asconf_splist without any proper locking. This can happen in the following functions: 1. In sctp_accept, if sctp_sock_migrate fails. 2. In inet_create or inet6_create, if there is a bpf program attached to BPF_CGROUP_INET_SOCK_CREATE which denies creation of the sctp socket. The bug is fixed by acquiring addr_wq_lock in sctp_destroy_sock instead of sctp_close. This addresses CVE-2021-23133. Reported-by: Or Cohen Reviewed-by: Xin Long Fixes: 610236587600 ("bpf: Add new cgroup attach type to enable sock modifications") Signed-off-by: Or Cohen Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/socket.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index a710917c5ac7..b9b3d899a611 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1520,11 +1520,9 @@ static void sctp_close(struct sock *sk, long timeout) /* Supposedly, no process has access to the socket, but * the net layers still may. - * Also, sctp_destroy_sock() needs to be called with addr_wq_lock - * held and that should be grabbed before socket lock. */ - spin_lock_bh(&net->sctp.addr_wq_lock); - bh_lock_sock_nested(sk); + local_bh_disable(); + bh_lock_sock(sk); /* Hold the sock, since sk_common_release() will put sock_put() * and we have just a little more cleanup. @@ -1533,7 +1531,7 @@ static void sctp_close(struct sock *sk, long timeout) sk_common_release(sk); bh_unlock_sock(sk); - spin_unlock_bh(&net->sctp.addr_wq_lock); + local_bh_enable(); sock_put(sk); @@ -4993,9 +4991,6 @@ static int sctp_init_sock(struct sock *sk) sk_sockets_allocated_inc(sk); sock_prot_inuse_add(net, sk->sk_prot, 1); - /* Nothing can fail after this block, otherwise - * sctp_destroy_sock() will be called without addr_wq_lock held - */ if (net->sctp.default_auto_asconf) { spin_lock(&sock_net(sk)->sctp.addr_wq_lock); list_add_tail(&sp->auto_asconf_list, @@ -5030,7 +5025,9 @@ static void sctp_destroy_sock(struct sock *sk) if (sp->do_auto_asconf) { sp->do_auto_asconf = 0; + spin_lock_bh(&sock_net(sk)->sctp.addr_wq_lock); list_del(&sp->auto_asconf_list); + spin_unlock_bh(&sock_net(sk)->sctp.addr_wq_lock); } sctp_endpoint_free(sp->ep); local_bh_disable(); From 38ec4944b593fd90c5ef42aaaa53e66ae5769d04 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 13 Apr 2021 05:41:35 -0700 Subject: [PATCH 089/182] gro: ensure frag0 meets IP header alignment After commit 0f6925b3e8da ("virtio_net: Do not pull payload in skb->head") Guenter Roeck reported one failure in his tests using sh architecture. After much debugging, we have been able to spot silent unaligned accesses in inet_gro_receive() The issue at hand is that upper networking stacks assume their header is word-aligned. Low level drivers are supposed to reserve NET_IP_ALIGN bytes before the Ethernet header to make that happen. This patch hardens skb_gro_reset_offset() to not allow frag0 fast-path if the fragment is not properly aligned. Some arches like x86, arm64 and powerpc do not care and define NET_IP_ALIGN as 0, this extra check will be a NOP for them. Note that if frag0 is not used, GRO will call pskb_may_pull() as many times as needed to pull network and transport headers. Fixes: 0f6925b3e8da ("virtio_net: Do not pull payload in skb->head") Fixes: 78a478d0efd9 ("gro: Inline skb_gro_header and cache frag0 virtual address") Signed-off-by: Eric Dumazet Reported-by: Guenter Roeck Cc: Xuan Zhuo Cc: "Michael S. Tsirkin" Cc: Jason Wang Acked-by: Michael S. Tsirkin Tested-by: Guenter Roeck Signed-off-by: David S. Miller --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index af8c1ea040b9..1f79b9aa9a3f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5924,7 +5924,8 @@ static void skb_gro_reset_offset(struct sk_buff *skb) NAPI_GRO_CB(skb)->frag0_len = 0; if (!skb_headlen(skb) && pinfo->nr_frags && - !PageHighMem(skb_frag_page(frag0))) { + !PageHighMem(skb_frag_page(frag0)) && + (!NET_IP_ALIGN || !(skb_frag_off(frag0) & 3))) { NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int, skb_frag_size(frag0), From 04c4f2ee3f68c9a4bf1653d15f1a9a435ae33f7a Mon Sep 17 00:00:00 2001 From: Reiji Watanabe Date: Tue, 13 Apr 2021 15:47:40 +0000 Subject: [PATCH 090/182] KVM: VMX: Don't use vcpu->run->internal.ndata as an array index __vmx_handle_exit() uses vcpu->run->internal.ndata as an index for an array access. Since vcpu->run is (can be) mapped to a user address space with a writer permission, the 'ndata' could be updated by the user process at anytime (the user process can set it to outside the bounds of the array). So, it is not safe that __vmx_handle_exit() uses the 'ndata' that way. Fixes: 1aa561b1a4c0 ("kvm: x86: Add "last CPU" to some KVM_EXIT information") Signed-off-by: Reiji Watanabe Reviewed-by: Jim Mattson Message-Id: <20210413154739.490299-1-reijiw@google.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 32cf8287d4a7..29b40e092d13 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6027,19 +6027,19 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) exit_reason.basic != EXIT_REASON_PML_FULL && exit_reason.basic != EXIT_REASON_APIC_ACCESS && exit_reason.basic != EXIT_REASON_TASK_SWITCH)) { + int ndata = 3; + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV; - vcpu->run->internal.ndata = 3; vcpu->run->internal.data[0] = vectoring_info; vcpu->run->internal.data[1] = exit_reason.full; vcpu->run->internal.data[2] = vcpu->arch.exit_qualification; if (exit_reason.basic == EXIT_REASON_EPT_MISCONFIG) { - vcpu->run->internal.ndata++; - vcpu->run->internal.data[3] = + vcpu->run->internal.data[ndata++] = vmcs_read64(GUEST_PHYSICAL_ADDRESS); } - vcpu->run->internal.data[vcpu->run->internal.ndata++] = - vcpu->arch.last_vmentry_cpu; + vcpu->run->internal.data[ndata++] = vcpu->arch.last_vmentry_cpu; + vcpu->run->internal.ndata = ndata; return 0; } From 2afeec08ab5c86ae21952151f726bfe184f6b23d Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Tue, 13 Apr 2021 16:25:12 +0100 Subject: [PATCH 091/182] xen-netback: Check for hotplug-status existence before watching The logic in connect() is currently written with the assumption that xenbus_watch_pathfmt() will return an error for a node that does not exist. This assumption is incorrect: xenstore does allow a watch to be registered for a nonexistent node (and will send notifications should the node be subsequently created). As of commit 1f2565780 ("xen-netback: remove 'hotplug-status' once it has served its purpose"), this leads to a failure when a domU transitions into XenbusStateConnected more than once. On the first domU transition into Connected state, the "hotplug-status" node will be deleted by the hotplug_status_changed() callback in dom0. On the second or subsequent domU transition into Connected state, the hotplug_status_changed() callback will therefore never be invoked, and so the backend will remain stuck in InitWait. This failure prevents scenarios such as reloading the xen-netfront module within a domU, or booting a domU via iPXE. There is unfortunately no way for the domU to work around this dom0 bug. Fix by explicitly checking for existence of the "hotplug-status" node, thereby creating the behaviour that was previously assumed to exist. Signed-off-by: Michael Brown Reviewed-by: Paul Durrant Signed-off-by: David S. Miller --- drivers/net/xen-netback/xenbus.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index a5439c130130..d24b7a7993aa 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -824,11 +824,15 @@ static void connect(struct backend_info *be) xenvif_carrier_on(be->vif); unregister_hotplug_status_watch(be); - err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch, NULL, - hotplug_status_changed, - "%s/%s", dev->nodename, "hotplug-status"); - if (!err) + if (xenbus_exists(XBT_NIL, dev->nodename, "hotplug-status")) { + err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch, + NULL, hotplug_status_changed, + "%s/%s", dev->nodename, + "hotplug-status"); + if (err) + goto err; be->have_hotplug_status_watch = 1; + } netif_tx_wake_all_queues(be->vif->dev); From 8ca7cab82bda4eb0b8064befeeeaa38106cac637 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 14 Apr 2021 08:28:28 -0700 Subject: [PATCH 092/182] dm verity fec: fix misaligned RS roots IO commit df7b59ba9245 ("dm verity: fix FEC for RS roots unaligned to block size") introduced the possibility for misaligned roots IO relative to the underlying device's logical block size. E.g. Android's default RS roots=2 results in dm_bufio->block_size=1024, which causes the following EIO if the logical block size of the device is 4096, given v->data_dev_block_bits=12: E sd 0 : 0:0:0: [sda] tag#30 request not aligned to the logical block size E blk_update_request: I/O error, dev sda, sector 10368424 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 0 E device-mapper: verity-fec: 254:8: FEC 9244672: parity read failed (block 18056): -5 Fix this by onlu using f->roots for dm_bufio blocksize IFF it is aligned to v->data_dev_block_bits. Fixes: df7b59ba9245 ("dm verity: fix FEC for RS roots unaligned to block size") Cc: stable@vger.kernel.org Signed-off-by: Jaegeuk Kim Signed-off-by: Mike Snitzer --- drivers/md/dm-verity-fec.c | 11 ++++++++--- drivers/md/dm-verity-fec.h | 1 + 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index 66f4c6398f67..cea2b3789736 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -65,7 +65,7 @@ static u8 *fec_read_parity(struct dm_verity *v, u64 rsb, int index, u8 *res; position = (index + rsb) * v->fec->roots; - block = div64_u64_rem(position, v->fec->roots << SECTOR_SHIFT, &rem); + block = div64_u64_rem(position, v->fec->io_size, &rem); *offset = (unsigned)rem; res = dm_bufio_read(v->fec->bufio, block, buf); @@ -154,7 +154,7 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio, /* read the next block when we run out of parity bytes */ offset += v->fec->roots; - if (offset >= v->fec->roots << SECTOR_SHIFT) { + if (offset >= v->fec->io_size) { dm_bufio_release(buf); par = fec_read_parity(v, rsb, block_offset, &offset, &buf); @@ -742,8 +742,13 @@ int verity_fec_ctr(struct dm_verity *v) return -E2BIG; } + if ((f->roots << SECTOR_SHIFT) & ((1 << v->data_dev_block_bits) - 1)) + f->io_size = 1 << v->data_dev_block_bits; + else + f->io_size = v->fec->roots << SECTOR_SHIFT; + f->bufio = dm_bufio_client_create(f->dev->bdev, - f->roots << SECTOR_SHIFT, + f->io_size, 1, 0, NULL, NULL); if (IS_ERR(f->bufio)) { ti->error = "Cannot initialize FEC bufio client"; diff --git a/drivers/md/dm-verity-fec.h b/drivers/md/dm-verity-fec.h index 42fbd3a7fc9f..3c46c8d61883 100644 --- a/drivers/md/dm-verity-fec.h +++ b/drivers/md/dm-verity-fec.h @@ -36,6 +36,7 @@ struct dm_verity_fec { struct dm_dev *dev; /* parity data device */ struct dm_bufio_client *data_bufio; /* for data dev access */ struct dm_bufio_client *bufio; /* for parity data access */ + size_t io_size; /* IO size for roots */ sector_t start; /* parity data start in blocks */ sector_t blocks; /* number of blocks covered */ sector_t rounds; /* number of interleaving rounds */ From c7d95613c7d6e003969722a290397b8271bdad17 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 13 Apr 2021 11:43:00 +0100 Subject: [PATCH 093/182] io_uring: fix early sqd_list removal sqpoll hangs [ 245.463317] INFO: task iou-sqp-1374:1377 blocked for more than 122 seconds. [ 245.463334] task:iou-sqp-1374 state:D flags:0x00004000 [ 245.463345] Call Trace: [ 245.463352] __schedule+0x36b/0x950 [ 245.463376] schedule+0x68/0xe0 [ 245.463385] __io_uring_cancel+0xfb/0x1a0 [ 245.463407] do_exit+0xc0/0xb40 [ 245.463423] io_sq_thread+0x49b/0x710 [ 245.463445] ret_from_fork+0x22/0x30 It happens when sqpoll forgot to run park_task_work and goes to exit, then exiting user may remove ctx from sqd_list, and so corresponding io_sq_thread() -> io_uring_cancel_sqpoll() won't be executed. Hopefully it just stucks in do_exit() in this case. Fixes: dbe1bdbb39db ("io_uring: handle signals for IO threads like a normal thread") Reported-by: Joakim Hassila Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index bd14327c8e7e..dff34975d86b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6754,6 +6754,9 @@ static int io_sq_thread(void *data) current->flags |= PF_NO_SETAFFINITY; mutex_lock(&sqd->lock); + /* a user may had exited before the thread started */ + io_run_task_work_head(&sqd->park_task_work); + while (!test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state)) { int ret; bool cap_entries, sqt_spin, needs_sched; @@ -6770,10 +6773,10 @@ static int io_sq_thread(void *data) } cond_resched(); mutex_lock(&sqd->lock); - if (did_sig) - break; io_run_task_work(); io_run_task_work_head(&sqd->park_task_work); + if (did_sig) + break; timeout = jiffies + sqd->sq_thread_idle; continue; } From 16756d3e77ad58cd07e36cbed724aa13ae5a0278 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 13 Apr 2021 20:46:14 -0700 Subject: [PATCH 094/182] ethtool: pause: make sure we init driver stats The intention was for pause statistics to not be reported when driver does not have the relevant callback (only report an empty netlink nest). What happens currently we report all 0s instead. Make sure statistics are initialized to "not set" (which is -1) so the dumping code skips them. Fixes: 9a27a33027f2 ("ethtool: add standard pause stats") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/ethtool/pause.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ethtool/pause.c b/net/ethtool/pause.c index 09998dc5c185..d4ac02718b72 100644 --- a/net/ethtool/pause.c +++ b/net/ethtool/pause.c @@ -38,16 +38,16 @@ static int pause_prepare_data(const struct ethnl_req_info *req_base, if (!dev->ethtool_ops->get_pauseparam) return -EOPNOTSUPP; + ethtool_stats_init((u64 *)&data->pausestat, + sizeof(data->pausestat) / 8); + ret = ethnl_ops_begin(dev); if (ret < 0) return ret; dev->ethtool_ops->get_pauseparam(dev, &data->pauseparam); if (req_base->flags & ETHTOOL_FLAG_STATS && - dev->ethtool_ops->get_pause_stats) { - ethtool_stats_init((u64 *)&data->pausestat, - sizeof(data->pausestat) / 8); + dev->ethtool_ops->get_pause_stats) dev->ethtool_ops->get_pause_stats(dev, &data->pausestat); - } ethnl_ops_complete(dev); return 0; From 453a77894efa4d9b6ef9644d74b9419c47ac427c Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 14 Apr 2021 10:47:10 +0200 Subject: [PATCH 095/182] r8169: don't advertise pause in jumbo mode It has been reported [0] that using pause frames in jumbo mode impacts performance. There's no available chip documentation, but vendor drivers r8168 and r8125 don't advertise pause in jumbo mode. So let's do the same, according to Roman it fixes the issue. [0] https://bugzilla.kernel.org/show_bug.cgi?id=212617 Fixes: 9cf9b84cc701 ("r8169: make use of phy_set_asym_pause") Reported-by: Roman Mamedov Tested-by: Roman Mamedov Signed-off-by: Heiner Kallweit Cc: stable@vger.kernel.org Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 581a92fc3292..1df2c002c9f6 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2350,6 +2350,13 @@ static void rtl_jumbo_config(struct rtl8169_private *tp) if (pci_is_pcie(tp->pci_dev) && tp->supports_gmii) pcie_set_readrq(tp->pci_dev, readrq); + + /* Chip doesn't support pause in jumbo mode */ + linkmode_mod_bit(ETHTOOL_LINK_MODE_Pause_BIT, + tp->phydev->advertising, !jumbo); + linkmode_mod_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, + tp->phydev->advertising, !jumbo); + phy_start_aneg(tp->phydev); } DECLARE_RTL_COND(rtl_chipcmd_cond) @@ -4630,8 +4637,6 @@ static int r8169_phy_connect(struct rtl8169_private *tp) if (!tp->supports_gmii) phy_set_max_speed(phydev, SPEED_100); - phy_support_asym_pause(phydev); - phy_attached_info(phydev); return 0; From 0775ebc4cf8554bdcd2c212669a0868ab68df5c0 Mon Sep 17 00:00:00 2001 From: Lijun Pan Date: Wed, 14 Apr 2021 02:46:14 -0500 Subject: [PATCH 096/182] ibmvnic: avoid calling napi_disable() twice __ibmvnic_open calls napi_disable without checking whether NAPI polling has already been disabled or not. This could cause napi_disable being called twice, which could generate deadlock. For example, the first napi_disable will spin until NAPI_STATE_SCHED is cleared by napi_complete_done, then set it again. When napi_disable is called the second time, it will loop infinitely because no dev->poll will be running to clear NAPI_STATE_SCHED. To prevent above scenario from happening, call ibmvnic_napi_disable() which checks if napi is disabled or not before calling napi_disable. Fixes: bfc32f297337 ("ibmvnic: Move resource initialization to its own routine") Suggested-by: Thomas Falcon Signed-off-by: Lijun Pan Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 110a0d0eaabb..2d27f8aa0d4b 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1149,8 +1149,7 @@ static int __ibmvnic_open(struct net_device *netdev) rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_UP); if (rc) { - for (i = 0; i < adapter->req_rx_queues; i++) - napi_disable(&adapter->napi[i]); + ibmvnic_napi_disable(adapter); release_resources(adapter); return rc; } From d3a6abccbd272aea7dc2c6f984bb5a2c11278e44 Mon Sep 17 00:00:00 2001 From: Lijun Pan Date: Wed, 14 Apr 2021 02:46:15 -0500 Subject: [PATCH 097/182] ibmvnic: remove duplicate napi_schedule call in do_reset function During adapter reset, do_reset/do_hard_reset calls ibmvnic_open(), which will calls napi_schedule if previous state is VNIC_CLOSED (i.e, the reset case, and "ifconfig down" case). So there is no need for do_reset to call napi_schedule again at the end of the function though napi_schedule will neglect the request if napi is already scheduled. Fixes: ed651a10875f ("ibmvnic: Updated reset handling") Signed-off-by: Lijun Pan Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 2d27f8aa0d4b..f4bd63216672 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1921,7 +1921,7 @@ static int do_reset(struct ibmvnic_adapter *adapter, u64 old_num_rx_queues, old_num_tx_queues; u64 old_num_rx_slots, old_num_tx_slots; struct net_device *netdev = adapter->netdev; - int i, rc; + int rc; netdev_dbg(adapter->netdev, "[S:%d FOP:%d] Reset reason %d, reset_state %d\n", @@ -2110,10 +2110,6 @@ static int do_reset(struct ibmvnic_adapter *adapter, /* refresh device's multicast list */ ibmvnic_set_multi(netdev); - /* kick napi */ - for (i = 0; i < adapter->req_rx_queues; i++) - napi_schedule(&adapter->napi[i]); - if (adapter->reset_reason == VNIC_RESET_FAILOVER || adapter->reset_reason == VNIC_RESET_MOBILITY) __netdev_notify_peers(netdev); From 7c451f3ef676c805a4b77a743a01a5c21a250a73 Mon Sep 17 00:00:00 2001 From: Lijun Pan Date: Wed, 14 Apr 2021 02:46:16 -0500 Subject: [PATCH 098/182] ibmvnic: remove duplicate napi_schedule call in open function Remove the unnecessary napi_schedule() call in __ibmvnic_open() since interrupt_rx() calls napi_schedule_prep/__napi_schedule during every receive interrupt. Fixes: ed651a10875f ("ibmvnic: Updated reset handling") Signed-off-by: Lijun Pan Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index f4bd63216672..ffb2a91750c7 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1156,11 +1156,6 @@ static int __ibmvnic_open(struct net_device *netdev) netif_tx_start_all_queues(netdev); - if (prev_state == VNIC_CLOSED) { - for (i = 0; i < adapter->req_rx_queues; i++) - napi_schedule(&adapter->napi[i]); - } - adapter->state = VNIC_OPEN; return rc; } From 292ecd9f5a94dd29d09fe03b5b669cb20b44f19e Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 14 Apr 2021 12:00:27 +0200 Subject: [PATCH 099/182] doc: move seg6_flowlabel to seg6-sysctl.rst Let's have all seg6 sysctl at the same place. Fixes: a6dc6670cd7e ("ipv6: sr: Add documentation for seg_flowlabel sysctl") Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.rst | 15 --------------- Documentation/networking/seg6-sysctl.rst | 13 +++++++++++++ 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index c7952ac5bd2f..3feb5e565b1a 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1849,21 +1849,6 @@ ip6frag_low_thresh - INTEGER ip6frag_time - INTEGER Time in seconds to keep an IPv6 fragment in memory. -IPv6 Segment Routing: - -seg6_flowlabel - INTEGER - Controls the behaviour of computing the flowlabel of outer - IPv6 header in case of SR T.encaps - - == ======================================================= - -1 set flowlabel to zero. - 0 copy flowlabel from Inner packet in case of Inner IPv6 - (Set flowlabel to 0 in case IPv4/L2) - 1 Compute the flowlabel using seg6_make_flowlabel() - == ======================================================= - - Default is 0. - ``conf/default/*``: Change the interface-specific default settings. diff --git a/Documentation/networking/seg6-sysctl.rst b/Documentation/networking/seg6-sysctl.rst index ec73e1445030..07c20e470baf 100644 --- a/Documentation/networking/seg6-sysctl.rst +++ b/Documentation/networking/seg6-sysctl.rst @@ -24,3 +24,16 @@ seg6_require_hmac - INTEGER * 1 - Drop SR packets without HMAC, validate SR packets with HMAC Default is 0. + +seg6_flowlabel - INTEGER + Controls the behaviour of computing the flowlabel of outer + IPv6 header in case of SR T.encaps + + == ======================================================= + -1 set flowlabel to zero. + 0 copy flowlabel from Inner packet in case of Inner IPv6 + (Set flowlabel to 0 in case IPv4/L2) + 1 Compute the flowlabel using seg6_make_flowlabel() + == ======================================================= + + Default is 0. From 2e1534f395e73152e2051332034bff61a56a8368 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 14 Apr 2021 12:03:25 +0200 Subject: [PATCH 100/182] vrf: fix a comment about loopback device This is a leftover of the below commit. Fixes: 4f04256c983a ("net: vrf: Drop local rtable and rt6_info") Signed-off-by: Nicolas Dichtel Acked-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 6d9130859c55..503e2fd7ce51 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -471,9 +471,8 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, skb_dst_drop(skb); - /* if dst.dev is loopback or the VRF device again this is locally - * originated traffic destined to a local address. Short circuit - * to Rx path + /* if dst.dev is the VRF device again this is locally originated traffic + * destined to a local address. Short circuit to Rx path. */ if (dst->dev == dev) return vrf_local_xmit(skb, dev, dst); @@ -547,9 +546,8 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, skb_dst_drop(skb); - /* if dst.dev is loopback or the VRF device again this is locally - * originated traffic destined to a local address. Short circuit - * to Rx path + /* if dst.dev is the VRF device again this is locally originated traffic + * destined to a local address. Short circuit to Rx path. */ if (rt->dst.dev == vrf_dev) return vrf_local_xmit(skb, vrf_dev, &rt->dst); From ab4d9913632b1e5ffcf3365783e98718b3c83c7f Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Thu, 14 Jan 2021 18:38:16 -0500 Subject: [PATCH 101/182] drm/vmwgfx: Make sure we unpin no longer needed buffers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We were not correctly unpinning no longer needed buffers. In particular vmw_buffer_object, which is internally often pinned on creation wasn't unpinned on destruction and none of the internal MOB buffers were unpinned before being put back. Technically this existed for a long time but commit 57fcd550eb15 ("drm/ttm: Warn on pinning without holding a reference") introduced a WARN_ON which was filling up the kernel logs rather quickly. Quite frankly internal usage of vmw_buffer_object and in general pinning needs to be refactored in vmwgfx but for now this makes it work. Signed-off-by: Zack Rusin Reviewed-by: Martin Krastev Reviewed-by: Roland Scheidegger Fixes: 57fcd550eb15 ("drm/ttm: Warn on pinning without holding a reference") Link: https://patchwork.freedesktop.org/patch/414984/?series=86052&rev=1 Cc: Huang Rui Cc: Christian König Cc: Daniel Vetter Cc: Christian Koenig Cc: dri-devel@lists.freedesktop.org --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 2 ++ drivers/gpu/drm/vmwgfx/vmwgfx_mob.c | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 5fa5bcd20cc5..336f5086ca26 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -1525,6 +1525,8 @@ static inline void vmw_bo_unreference(struct vmw_buffer_object **buf) *buf = NULL; if (tmp_buf != NULL) { + if (tmp_buf->base.pin_count > 0) + ttm_bo_unpin(&tmp_buf->base); ttm_bo_put(&tmp_buf->base); } } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c index a372980fe6a5..a0b53141dded 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c @@ -277,6 +277,7 @@ static int vmw_otable_batch_setup(struct vmw_private *dev_priv, &batch->otables[i]); } + ttm_bo_unpin(batch->otable_bo); ttm_bo_put(batch->otable_bo); batch->otable_bo = NULL; return ret; @@ -342,6 +343,7 @@ static void vmw_otable_batch_takedown(struct vmw_private *dev_priv, vmw_bo_fence_single(bo, NULL); ttm_bo_unreserve(bo); + ttm_bo_unpin(batch->otable_bo); ttm_bo_put(batch->otable_bo); batch->otable_bo = NULL; } @@ -528,6 +530,7 @@ static void vmw_mob_pt_setup(struct vmw_mob *mob, void vmw_mob_destroy(struct vmw_mob *mob) { if (mob->pt_bo) { + ttm_bo_unpin(mob->pt_bo); ttm_bo_put(mob->pt_bo); mob->pt_bo = NULL; } @@ -643,6 +646,7 @@ int vmw_mob_bind(struct vmw_private *dev_priv, out_no_cmd_space: vmw_fifo_resource_dec(dev_priv); if (pt_set_up) { + ttm_bo_unpin(mob->pt_bo); ttm_bo_put(mob->pt_bo); mob->pt_bo = NULL; } From 68ce556bd1643498080af310d4544f46f3c4f3df Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 22 Mar 2021 12:54:57 -0400 Subject: [PATCH 102/182] drm/vmwgfx: Fix the lockdep breakage Thomas has noticed that the lockdep was broken in vmwgfx. It was broken during the pci initialization rework. This fixes the breakage by making sure we initialize the locking code before doing anything else. This was independently spotted and fixed by Tetsuo Handa as well. Reviewed-by: Martin Krastev Reviewed-by: Roland Scheidegger Cc: Tetsuo Handa Cc: dri-devel@lists.freedesktop.org Signed-off-by: Zack Rusin Fixes: 8772c0bb58bbf98a ("drm/vmwgfx: Cleanup pci resource allocation") Link: https://patchwork.freedesktop.org/patch/msgid/20210408172245.673785-1-zackr@vmware.com --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 19 +++++++++---------- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 2 -- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index dd69b51c40e4..6fa24645fbbf 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -712,6 +712,15 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) dev_priv->last_read_seqno = (uint32_t) -100; dev_priv->drm.dev_private = dev_priv; + mutex_init(&dev_priv->cmdbuf_mutex); + mutex_init(&dev_priv->binding_mutex); + ttm_lock_init(&dev_priv->reservation_sem); + spin_lock_init(&dev_priv->resource_lock); + spin_lock_init(&dev_priv->hw_lock); + spin_lock_init(&dev_priv->waiter_lock); + spin_lock_init(&dev_priv->cap_lock); + spin_lock_init(&dev_priv->cursor_lock); + ret = vmw_setup_pci_resources(dev_priv, pci_id); if (ret) return ret; @@ -719,16 +728,6 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) if (ret) goto out_no_pci_or_version; - mutex_init(&dev_priv->cmdbuf_mutex); - mutex_init(&dev_priv->release_mutex); - mutex_init(&dev_priv->binding_mutex); - mutex_init(&dev_priv->global_kms_state_mutex); - ttm_lock_init(&dev_priv->reservation_sem); - spin_lock_init(&dev_priv->resource_lock); - spin_lock_init(&dev_priv->hw_lock); - spin_lock_init(&dev_priv->waiter_lock); - spin_lock_init(&dev_priv->cap_lock); - spin_lock_init(&dev_priv->cursor_lock); for (i = vmw_res_context; i < vmw_res_max; ++i) { idr_init(&dev_priv->res_idr[i]); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 336f5086ca26..8087a9013455 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -529,7 +529,6 @@ struct vmw_private { struct vmw_overlay *overlay_priv; struct drm_property *hotplug_mode_update_property; struct drm_property *implicit_placement_property; - struct mutex global_kms_state_mutex; spinlock_t cursor_lock; struct drm_atomic_state *suspend_state; @@ -592,7 +591,6 @@ struct vmw_private { bool refuse_hibernation; bool suspend_locked; - struct mutex release_mutex; atomic_t num_fifo_resources; /* From 2ef4fb92363c44e8a6f93fd0877b6a7dee6f874d Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 22 Mar 2021 13:04:11 -0400 Subject: [PATCH 103/182] drm/vmwgfx: Make sure bo's are unpinned before putting them back MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During cotable resize we pin the backup buffer to make sure the trylock doesn't fail. We were never unpinning the backup buffer resulting in every subsequent cotable resize trying to release a pinned bo. After we copy the old backup to the new we can release the pin. Mob's are always pinned so we just have to make sure we unpin them before releasing them. Reviewed-by: Thomas Hellström (Intel) Fixes: d1a73c641afd ("drm/vmwgfx: Make sure we unpin no longer needed buffers") Link: https://patchwork.freedesktop.org/patch/msgid/20210413205938.788366-1-zackr@vmware.com Signed-off-by: Zack Rusin --- drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c | 4 ++++ drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 5 +---- drivers/gpu/drm/vmwgfx/vmwgfx_mob.c | 18 ++++++++++++++---- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c index ba658fa9cf6c..183571c387b7 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c @@ -481,11 +481,15 @@ static int vmw_cotable_resize(struct vmw_resource *res, size_t new_size) vmw_bo_unreference(&old_buf); res->id = vcotbl->type; + /* Release the pin acquired in vmw_bo_init */ + ttm_bo_unpin(bo); + return 0; out_map_new: ttm_bo_kunmap(&old_map); out_wait: + ttm_bo_unpin(bo); ttm_bo_unreserve(bo); vmw_bo_unreference(&buf); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 8087a9013455..eb76a6b9ebca 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -1522,11 +1522,8 @@ static inline void vmw_bo_unreference(struct vmw_buffer_object **buf) struct vmw_buffer_object *tmp_buf = *buf; *buf = NULL; - if (tmp_buf != NULL) { - if (tmp_buf->base.pin_count > 0) - ttm_bo_unpin(&tmp_buf->base); + if (tmp_buf != NULL) ttm_bo_put(&tmp_buf->base); - } } static inline struct vmw_buffer_object * diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c index a0b53141dded..f2d625415458 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c @@ -94,6 +94,16 @@ static void vmw_mob_pt_setup(struct vmw_mob *mob, struct vmw_piter data_iter, unsigned long num_data_pages); + +static inline void vmw_bo_unpin_unlocked(struct ttm_buffer_object *bo) +{ + int ret = ttm_bo_reserve(bo, false, true, NULL); + BUG_ON(ret != 0); + ttm_bo_unpin(bo); + ttm_bo_unreserve(bo); +} + + /* * vmw_setup_otable_base - Issue an object table base setup command to * the device @@ -277,7 +287,7 @@ static int vmw_otable_batch_setup(struct vmw_private *dev_priv, &batch->otables[i]); } - ttm_bo_unpin(batch->otable_bo); + vmw_bo_unpin_unlocked(batch->otable_bo); ttm_bo_put(batch->otable_bo); batch->otable_bo = NULL; return ret; @@ -341,9 +351,9 @@ static void vmw_otable_batch_takedown(struct vmw_private *dev_priv, BUG_ON(ret != 0); vmw_bo_fence_single(bo, NULL); + ttm_bo_unpin(bo); ttm_bo_unreserve(bo); - ttm_bo_unpin(batch->otable_bo); ttm_bo_put(batch->otable_bo); batch->otable_bo = NULL; } @@ -530,7 +540,7 @@ static void vmw_mob_pt_setup(struct vmw_mob *mob, void vmw_mob_destroy(struct vmw_mob *mob) { if (mob->pt_bo) { - ttm_bo_unpin(mob->pt_bo); + vmw_bo_unpin_unlocked(mob->pt_bo); ttm_bo_put(mob->pt_bo); mob->pt_bo = NULL; } @@ -646,7 +656,7 @@ int vmw_mob_bind(struct vmw_private *dev_priv, out_no_cmd_space: vmw_fifo_resource_dec(dev_priv); if (pt_set_up) { - ttm_bo_unpin(mob->pt_bo); + vmw_bo_unpin_unlocked(mob->pt_bo); ttm_bo_put(mob->pt_bo); mob->pt_bo = NULL; } From a714e27ea8bdee2b238748029d31472d0a65b611 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 14 Apr 2021 14:20:29 +0300 Subject: [PATCH 104/182] net: macb: fix the restore of cmp registers Commit a14d273ba159 ("net: macb: restore cmp registers on resume path") introduces the restore of CMP registers on resume path. In case the IP doesn't support type 2 screeners (zero on DCFG8 register) the struct macb::rx_fs_list::list is not initialized and thus the list_for_each_entry(item, &bp->rx_fs_list.list, list) loop introduced in commit a14d273ba159 ("net: macb: restore cmp registers on resume path") will access an uninitialized list leading to crash. Thus, initialize the struct macb::rx_fs_list::list without taking into account if the IP supports type 2 screeners or not. Fixes: a14d273ba159 ("net: macb: restore cmp registers on resume path") Signed-off-by: Claudiu Beznea Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 6e5cf490c01d..0f6a6cb7e98d 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3918,6 +3918,7 @@ static int macb_init(struct platform_device *pdev) reg = gem_readl(bp, DCFG8); bp->max_tuples = min((GEM_BFEXT(SCR2CMP, reg) / 3), GEM_BFEXT(T2SCR, reg)); + INIT_LIST_HEAD(&bp->rx_fs_list.list); if (bp->max_tuples > 0) { /* also needs one ethtype match to check IPv4 */ if (GEM_BFEXT(SCR2ETH, reg) > 0) { @@ -3928,7 +3929,6 @@ static int macb_init(struct platform_device *pdev) /* Filtering is supported in hw but don't enable it in kernel now */ dev->hw_features |= NETIF_F_NTUPLE; /* init Rx flow definitions */ - INIT_LIST_HEAD(&bp->rx_fs_list.list); bp->rx_fs_list.count = 0; spin_lock_init(&bp->rx_fs_lock); } else From 416dcc5ce9d2a810477171c62ffa061a98f87367 Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Wed, 14 Apr 2021 19:31:48 +0800 Subject: [PATCH 105/182] cavium/liquidio: Fix duplicate argument Fix the following coccicheck warning: ./drivers/net/ethernet/cavium/liquidio/cn66xx_regs.h:413:6-28: duplicated argument to & or | The CN6XXX_INTR_M1UPB0_ERR here is duplicate. Here should be CN6XXX_INTR_M1UNB0_ERR. Signed-off-by: Wan Jiabing Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/cn66xx_regs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/liquidio/cn66xx_regs.h b/drivers/net/ethernet/cavium/liquidio/cn66xx_regs.h index b248966837b4..7aad40b2aa73 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn66xx_regs.h +++ b/drivers/net/ethernet/cavium/liquidio/cn66xx_regs.h @@ -412,7 +412,7 @@ | CN6XXX_INTR_M0UNWI_ERR \ | CN6XXX_INTR_M1UPB0_ERR \ | CN6XXX_INTR_M1UPWI_ERR \ - | CN6XXX_INTR_M1UPB0_ERR \ + | CN6XXX_INTR_M1UNB0_ERR \ | CN6XXX_INTR_M1UNWI_ERR \ | CN6XXX_INTR_INSTR_DB_OF_ERR \ | CN6XXX_INTR_SLIST_DB_OF_ERR \ From 00423969d806d7169d16fa6314c570a472ca26c9 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 14 Apr 2021 17:10:07 +0200 Subject: [PATCH 106/182] Revert "net: stmmac: re-init rx buffers when mac resume back" This reverts commit 9c63faaa931e443e7abbbee9de0169f1d4710546, which introduces a suspend/resume regression on Jetson TX2 boards that can be reproduced every time. Given that the issue that this was supposed to fix only occurs very sporadically the safest course of action is to revert before v5.12 and then we can have another go at fixing the more rare issue in the next release (and perhaps backport it if necessary). The root cause of the observed problem seems to be that when the system is suspended, some packets are still in transit. When the descriptors for these buffers are cleared on resume, the descriptors become invalid and cause a fatal bus error. Link: https://lore.kernel.org/r/708edb92-a5df-ecc4-3126-5ab36707e275@nvidia.com/ Reported-by: Jonathan Hunter Signed-off-by: Thierry Reding Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 84 +------------------ 1 file changed, 1 insertion(+), 83 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 208cae344ffa..4749bd0af160 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1379,88 +1379,6 @@ static void stmmac_free_tx_buffer(struct stmmac_priv *priv, u32 queue, int i) } } -/** - * stmmac_reinit_rx_buffers - reinit the RX descriptor buffer. - * @priv: driver private structure - * Description: this function is called to re-allocate a receive buffer, perform - * the DMA mapping and init the descriptor. - */ -static void stmmac_reinit_rx_buffers(struct stmmac_priv *priv) -{ - u32 rx_count = priv->plat->rx_queues_to_use; - u32 queue; - int i; - - for (queue = 0; queue < rx_count; queue++) { - struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; - - for (i = 0; i < priv->dma_rx_size; i++) { - struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i]; - - if (buf->page) { - page_pool_recycle_direct(rx_q->page_pool, buf->page); - buf->page = NULL; - } - - if (priv->sph && buf->sec_page) { - page_pool_recycle_direct(rx_q->page_pool, buf->sec_page); - buf->sec_page = NULL; - } - } - } - - for (queue = 0; queue < rx_count; queue++) { - struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; - - for (i = 0; i < priv->dma_rx_size; i++) { - struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i]; - struct dma_desc *p; - - if (priv->extend_desc) - p = &((rx_q->dma_erx + i)->basic); - else - p = rx_q->dma_rx + i; - - if (!buf->page) { - buf->page = page_pool_dev_alloc_pages(rx_q->page_pool); - if (!buf->page) - goto err_reinit_rx_buffers; - - buf->addr = page_pool_get_dma_addr(buf->page); - } - - if (priv->sph && !buf->sec_page) { - buf->sec_page = page_pool_dev_alloc_pages(rx_q->page_pool); - if (!buf->sec_page) - goto err_reinit_rx_buffers; - - buf->sec_addr = page_pool_get_dma_addr(buf->sec_page); - } - - stmmac_set_desc_addr(priv, p, buf->addr); - if (priv->sph) - stmmac_set_desc_sec_addr(priv, p, buf->sec_addr, true); - else - stmmac_set_desc_sec_addr(priv, p, buf->sec_addr, false); - if (priv->dma_buf_sz == BUF_SIZE_16KiB) - stmmac_init_desc3(priv, p); - } - } - - return; - -err_reinit_rx_buffers: - do { - while (--i >= 0) - stmmac_free_rx_buffer(priv, queue, i); - - if (queue == 0) - break; - - i = priv->dma_rx_size; - } while (queue-- > 0); -} - /** * init_dma_rx_desc_rings - init the RX descriptor rings * @dev: net device structure @@ -5428,7 +5346,7 @@ int stmmac_resume(struct device *dev) mutex_lock(&priv->lock); stmmac_reset_queues_param(priv); - stmmac_reinit_rx_buffers(priv); + stmmac_free_tx_skbufs(priv); stmmac_clear_descriptors(priv); From 41bafb31dcd58d834bdffa5db703f94fd2cec727 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Mon, 12 Apr 2021 17:50:08 +0300 Subject: [PATCH 107/182] net/mlx5: Fix setting of devlink traps in switchdev mode Prevent setting of devlink traps on the uplink while in switchdev mode. In this mode, it is the SW switch responsibility to handle both packets with a mismatch in destination MAC or VLAN ID. Therefore, there are no flow steering tables to trap undesirable packets and driver crashes upon setting a trap. Fixes: 241dc159391f ("net/mlx5: Notify on trap action by blocking event") Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Reviewed-by: Roi Dayan Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index d7d8a68ef23d..d0f9d3cee97d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -246,6 +246,11 @@ static int mlx5_devlink_trap_action_set(struct devlink *devlink, struct mlx5_devlink_trap *dl_trap; int err = 0; + if (is_mdev_switchdev_mode(dev)) { + NL_SET_ERR_MSG_MOD(extack, "Devlink traps can't be set in switchdev mode"); + return -EOPNOTSUPP; + } + dl_trap = mlx5_find_trap_by_id(dev, trap->id); if (!dl_trap) { mlx5_core_err(dev, "Devlink trap: Set action on invalid trap id 0x%x", trap->id); From 7a320c9db3e73fb6c4f9a331087df9df18767221 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Sun, 11 Apr 2021 09:33:12 +0300 Subject: [PATCH 108/182] net/mlx5e: Fix setting of RS FEC mode Change register setting from bit number to bit mask. Fixes: b5ede32d3329 ("net/mlx5e: Add support for FEC modes based on 50G per lane links") Signed-off-by: Aya Levin Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en/port.c | 23 ++++--------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index 308fd279669e..89510cac46c2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -387,21 +387,6 @@ enum mlx5e_fec_supported_link_mode { *_policy = MLX5_GET(pplm_reg, _buf, fec_override_admin_##link); \ } while (0) -#define MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(buf, policy, write, link) \ - do { \ - unsigned long policy_long; \ - u16 *__policy = &(policy); \ - bool _write = (write); \ - \ - policy_long = *__policy; \ - if (_write && *__policy) \ - *__policy = find_first_bit(&policy_long, \ - sizeof(policy_long) * BITS_PER_BYTE);\ - MLX5E_FEC_OVERRIDE_ADMIN_POLICY(buf, *__policy, _write, link); \ - if (!_write && *__policy) \ - *__policy = 1 << *__policy; \ - } while (0) - /* get/set FEC admin field for a given speed */ static int mlx5e_fec_admin_field(u32 *pplm, u16 *fec_policy, bool write, enum mlx5e_fec_supported_link_mode link_mode) @@ -423,16 +408,16 @@ static int mlx5e_fec_admin_field(u32 *pplm, u16 *fec_policy, bool write, MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 100g); break; case MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X: - MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(pplm, *fec_policy, write, 50g_1x); + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 50g_1x); break; case MLX5E_FEC_SUPPORTED_LINK_MODE_100G_2X: - MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(pplm, *fec_policy, write, 100g_2x); + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 100g_2x); break; case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_4X: - MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(pplm, *fec_policy, write, 200g_4x); + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 200g_4x); break; case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_8X: - MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(pplm, *fec_policy, write, 400g_8x); + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 400g_8x); break; default: return -EINVAL; From e3e0f9b279705154b951d579dc3d8b7041710e24 Mon Sep 17 00:00:00 2001 From: wenxu Date: Fri, 9 Apr 2021 13:33:48 +0800 Subject: [PATCH 109/182] net/mlx5e: fix ingress_ifindex check in mlx5e_flower_parse_meta In the nft_offload there is the mate flow_dissector with no ingress_ifindex but with ingress_iftype that only be used in the software. So if the mask of ingress_ifindex in meta is 0, this meta check should be bypass. Fixes: 6d65bc64e232 ("net/mlx5e: Add mlx5e_flower_parse_meta support") Signed-off-by: wenxu Acked-by: Pablo Neira Ayuso Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index df2a0af854bb..d675107d9eca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1895,6 +1895,9 @@ static int mlx5e_flower_parse_meta(struct net_device *filter_dev, return 0; flow_rule_match_meta(rule, &match); + if (!match.mask->ingress_ifindex) + return 0; + if (match.mask->ingress_ifindex != 0xFFFFFFFF) { NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask"); return -EOPNOTSUPP; From 22315a2296f4c251fa92aec45fbbae37e9301b6c Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 13 Apr 2021 17:08:04 -0700 Subject: [PATCH 110/182] arm64: alternatives: Move length validation in alternative_{insn, endif} After commit 2decad92f473 ("arm64: mte: Ensure TIF_MTE_ASYNC_FAULT is set atomically"), LLVM's integrated assembler fails to build entry.S: :5:7: error: expected assembly-time absolute expression .org . - (664b-663b) + (662b-661b) ^ :6:7: error: expected assembly-time absolute expression .org . - (662b-661b) + (664b-663b) ^ The root cause is LLVM's assembler has a one-pass design, meaning it cannot figure out these instruction lengths when the .org directive is outside of the subsection that they are in, which was changed by the .arch_extension directive added in the above commit. Apply the same fix from commit 966a0acce2fc ("arm64/alternatives: move length validation inside the subsection") to the alternative_endif macro, shuffling the .org directives so that the length validation happen will always happen in the same subsections. alternative_insn has not shown any issue yet but it appears that it could have the same issue in the future so just preemptively change it. Fixes: f7b93d42945c ("arm64/alternatives: use subsections for replacement sequences") Cc: # 5.8.x Link: https://github.com/ClangBuiltLinux/linux/issues/1347 Signed-off-by: Nathan Chancellor Reviewed-by: Sami Tolvanen Tested-by: Sami Tolvanen Reviewed-by: Nick Desaulniers Tested-by: Nick Desaulniers Link: https://lore.kernel.org/r/20210414000803.662534-1-nathan@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/alternative-macros.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h index 5df500dcc627..8a078fc662ac 100644 --- a/arch/arm64/include/asm/alternative-macros.h +++ b/arch/arm64/include/asm/alternative-macros.h @@ -97,9 +97,9 @@ .popsection .subsection 1 663: \insn2 -664: .previous - .org . - (664b-663b) + (662b-661b) +664: .org . - (664b-663b) + (662b-661b) .org . - (662b-661b) + (664b-663b) + .previous .endif .endm @@ -169,11 +169,11 @@ */ .macro alternative_endif 664: + .org . - (664b-663b) + (662b-661b) + .org . - (662b-661b) + (664b-663b) .if .Lasm_alt_mode==0 .previous .endif - .org . - (664b-663b) + (662b-661b) - .org . - (662b-661b) + (664b-663b) .endm /* From 39930213e7779b9c4257499972b8afb8858f1a2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Thu, 8 Apr 2021 04:00:00 +0200 Subject: [PATCH 111/182] i2c: mv64xxx: Fix random system lock caused by runtime PM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I noticed a weird bug with this driver on Marvell CN9130 Customer Reference Board. Sometime after boot, the system locks with the following message: [104.071363] i2c i2c-0: mv64xxx: I2C bus locked, block: 1, time_left: 0 The system does not respond afterwards, only warns about RCU stalls. This first appeared with commit e5c02cf54154 ("i2c: mv64xxx: Add runtime PM support"). With further experimentation I discovered that adding a delay into mv64xxx_i2c_hw_init() fixes this issue. This function is called before every xfer, due to how runtime PM works in this driver. It seems that in order to work correctly, a delay is needed after the bus is reset in this function. Since there already is a known erratum with this controller needing a delay, I assume that this is just another place this needs to be applied. Therefore I apply the delay only if errata_delay is true. Signed-off-by: Marek Behún Acked-by: Gregory CLEMENT Reviewed-by: Samuel Holland Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mv64xxx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c index c590d36b5fd1..5c8e94b6cdb5 100644 --- a/drivers/i2c/busses/i2c-mv64xxx.c +++ b/drivers/i2c/busses/i2c-mv64xxx.c @@ -221,6 +221,10 @@ mv64xxx_i2c_hw_init(struct mv64xxx_i2c_data *drv_data) writel(0, drv_data->reg_base + drv_data->reg_offsets.ext_addr); writel(MV64XXX_I2C_REG_CONTROL_TWSIEN | MV64XXX_I2C_REG_CONTROL_STOP, drv_data->reg_base + drv_data->reg_offsets.control); + + if (drv_data->errata_delay) + udelay(5); + drv_data->state = MV64XXX_I2C_STATE_IDLE; } From 4e39a072a6a0fc422ba7da5e4336bdc295d70211 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Wed, 14 Apr 2021 10:34:28 +0800 Subject: [PATCH 112/182] i40e: fix the panic when running bpf in xdpdrv mode Fix this panic by adding more rules to calculate the value of @rss_size_max which could be used in allocating the queues when bpf is loaded, which, however, could cause the failure and then trigger the NULL pointer of vsi->rx_rings. Prio to this fix, the machine doesn't care about how many cpus are online and then allocates 256 queues on the machine with 32 cpus online actually. Once the load of bpf begins, the log will go like this "failed to get tracking for 256 queues for VSI 0 err -12" and this "setup of MAIN VSI failed". Thus, I attach the key information of the crash-log here. BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 RIP: 0010:i40e_xdp+0xdd/0x1b0 [i40e] Call Trace: [2160294.717292] ? i40e_reconfig_rss_queues+0x170/0x170 [i40e] [2160294.717666] dev_xdp_install+0x4f/0x70 [2160294.718036] dev_change_xdp_fd+0x11f/0x230 [2160294.718380] ? dev_disable_lro+0xe0/0xe0 [2160294.718705] do_setlink+0xac7/0xe70 [2160294.719035] ? __nla_parse+0xed/0x120 [2160294.719365] rtnl_newlink+0x73b/0x860 Fixes: 41c445ff0f48 ("i40e: main driver core") Co-developed-by: Shujin Li Signed-off-by: Shujin Li Signed-off-by: Jason Xing Reviewed-by: Jesse Brandeburg Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/i40e/i40e_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 30ad7c08d0fb..527023ee4c07 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -12357,6 +12357,7 @@ static int i40e_sw_init(struct i40e_pf *pf) { int err = 0; int size; + u16 pow; /* Set default capability flags */ pf->flags = I40E_FLAG_RX_CSUM_ENABLED | @@ -12375,6 +12376,11 @@ static int i40e_sw_init(struct i40e_pf *pf) pf->rss_table_size = pf->hw.func_caps.rss_table_size; pf->rss_size_max = min_t(int, pf->rss_size_max, pf->hw.func_caps.num_tx_qp); + + /* find the next higher power-of-2 of num cpus */ + pow = roundup_pow_of_two(num_online_cpus()); + pf->rss_size_max = min_t(int, pf->rss_size_max, pow); + if (pf->hw.func_caps.rss) { pf->flags |= I40E_FLAG_RSS_ENABLED; pf->alloc_rss_size = min_t(int, pf->rss_size_max, From 1a73e427b824133940c2dd95ebe26b6dce1cbf10 Mon Sep 17 00:00:00 2001 From: Vinay Kumar Yadav Date: Thu, 15 Apr 2021 13:17:45 +0530 Subject: [PATCH 113/182] ch_ktls: Fix kernel panic Taking page refcount is not ideal and causes kernel panic sometimes. It's better to take tx_ctx lock for the complete skb transmit, to avoid page cleanup if ACK received in middle. Fixes: 5a4b9fe7fece ("cxgb4/chcr: complete record tx handling") Signed-off-by: Vinay Kumar Yadav Signed-off-by: Rohit Maheshwari Signed-off-by: David S. Miller --- .../chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 24 ++++--------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index 1115b8f9ea4e..e39fa0940367 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -2010,12 +2010,11 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) * we will send the complete record again. */ + spin_lock_irqsave(&tx_ctx->base.lock, flags); + do { - int i; cxgb4_reclaim_completed_tx(adap, &q->q, true); - /* lock taken */ - spin_lock_irqsave(&tx_ctx->base.lock, flags); /* fetch the tls record */ record = tls_get_record(&tx_ctx->base, tcp_seq, &tx_info->record_no); @@ -2074,11 +2073,11 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) tls_end_offset, skb_offset, 0); - spin_unlock_irqrestore(&tx_ctx->base.lock, flags); if (ret) { /* free the refcount taken earlier */ if (tls_end_offset < data_len) dev_kfree_skb_any(skb); + spin_unlock_irqrestore(&tx_ctx->base.lock, flags); goto out; } @@ -2088,16 +2087,6 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) continue; } - /* increase page reference count of the record, so that there - * won't be any chance of page free in middle if in case stack - * receives ACK and try to delete the record. - */ - for (i = 0; i < record->num_frags; i++) - __skb_frag_ref(&record->frags[i]); - /* lock cleared */ - spin_unlock_irqrestore(&tx_ctx->base.lock, flags); - - /* if a tls record is finishing in this SKB */ if (tls_end_offset <= data_len) { ret = chcr_end_part_handler(tx_info, skb, record, @@ -2122,13 +2111,9 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) data_len = 0; } - /* clear the frag ref count which increased locally before */ - for (i = 0; i < record->num_frags; i++) { - /* clear the frag ref count */ - __skb_frag_unref(&record->frags[i]); - } /* if any failure, come out from the loop. */ if (ret) { + spin_unlock_irqrestore(&tx_ctx->base.lock, flags); if (th->fin) dev_kfree_skb_any(skb); @@ -2143,6 +2128,7 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) } while (data_len > 0); + spin_unlock_irqrestore(&tx_ctx->base.lock, flags); atomic64_inc(&port_stats->ktls_tx_encrypted_packets); atomic64_add(skb_data_len, &port_stats->ktls_tx_encrypted_bytes); From bc16efd2430652f894ae34b1de5eccc3bf0d2810 Mon Sep 17 00:00:00 2001 From: Vinay Kumar Yadav Date: Thu, 15 Apr 2021 13:17:46 +0530 Subject: [PATCH 114/182] ch_ktls: fix device connection close When sge queue is full and chcr_ktls_xmit_wr_complete() returns failure, skb is not freed if it is not the last tls record in this skb, causes refcount never gets freed and tls_dev_del() never gets called on this connection. Fixes: 5a4b9fe7fece ("cxgb4/chcr: complete record tx handling") Signed-off-by: Vinay Kumar Yadav Signed-off-by: Rohit Maheshwari Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index e39fa0940367..a626560f8365 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -1735,7 +1735,9 @@ static int chcr_end_part_handler(struct chcr_ktls_info *tx_info, struct sge_eth_txq *q, u32 skb_offset, u32 tls_end_offset, bool last_wr) { + bool free_skb_if_tx_fails = false; struct sk_buff *nskb = NULL; + /* check if it is a complete record */ if (tls_end_offset == record->len) { nskb = skb; @@ -1758,6 +1760,8 @@ static int chcr_end_part_handler(struct chcr_ktls_info *tx_info, if (last_wr) dev_kfree_skb_any(skb); + else + free_skb_if_tx_fails = true; last_wr = true; @@ -1769,6 +1773,8 @@ static int chcr_end_part_handler(struct chcr_ktls_info *tx_info, record->num_frags, (last_wr && tcp_push_no_fin), mss)) { + if (free_skb_if_tx_fails) + dev_kfree_skb_any(skb); goto out; } tx_info->prev_seq = record->end_seq; From 21d8c25e3f4b9052a471ced8f47b531956eb9963 Mon Sep 17 00:00:00 2001 From: Vinay Kumar Yadav Date: Thu, 15 Apr 2021 13:17:47 +0530 Subject: [PATCH 115/182] ch_ktls: tcb close causes tls connection failure HW doesn't need marking TCB closed. This TCB state change sometimes causes problem to the new connection which gets the same tid. Fixes: 34aba2c45024 ("cxgb4/chcr : Register to tls add and del callback") Signed-off-by: Vinay Kumar Yadav Signed-off-by: Rohit Maheshwari Signed-off-by: David S. Miller --- .../chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index a626560f8365..8559eec161f0 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -349,18 +349,6 @@ static int chcr_set_tcb_field(struct chcr_ktls_info *tx_info, u16 word, return cxgb4_ofld_send(tx_info->netdev, skb); } -/* - * chcr_ktls_mark_tcb_close: mark tcb state to CLOSE - * @tx_info - driver specific tls info. - * return: NET_TX_OK/NET_XMIT_DROP. - */ -static int chcr_ktls_mark_tcb_close(struct chcr_ktls_info *tx_info) -{ - return chcr_set_tcb_field(tx_info, TCB_T_STATE_W, - TCB_T_STATE_V(TCB_T_STATE_M), - CHCR_TCB_STATE_CLOSED, 1); -} - /* * chcr_ktls_dev_del: call back for tls_dev_del. * Remove the tid and l2t entry and close the connection. @@ -395,8 +383,6 @@ static void chcr_ktls_dev_del(struct net_device *netdev, /* clear tid */ if (tx_info->tid != -1) { - /* clear tcb state and then release tid */ - chcr_ktls_mark_tcb_close(tx_info); cxgb4_remove_tid(&tx_info->adap->tids, tx_info->tx_chan, tx_info->tid, tx_info->ip_family); } @@ -574,7 +560,6 @@ static int chcr_ktls_dev_add(struct net_device *netdev, struct sock *sk, return 0; free_tid: - chcr_ktls_mark_tcb_close(tx_info); #if IS_ENABLED(CONFIG_IPV6) /* clear clip entry */ if (tx_info->ip_family == AF_INET6) @@ -672,10 +657,6 @@ static int chcr_ktls_cpl_act_open_rpl(struct adapter *adap, if (tx_info->pending_close) { spin_unlock(&tx_info->lock); if (!status) { - /* it's a late success, tcb status is established, - * mark it close. - */ - chcr_ktls_mark_tcb_close(tx_info); cxgb4_remove_tid(&tx_info->adap->tids, tx_info->tx_chan, tid, tx_info->ip_family); } From e8a4155567b3c903f49cbf89b8017e9cc22c4fe4 Mon Sep 17 00:00:00 2001 From: Vinay Kumar Yadav Date: Thu, 15 Apr 2021 13:17:48 +0530 Subject: [PATCH 116/182] ch_ktls: do not send snd_una update to TCB in middle snd_una update should not be done when the same skb is being sent out.chcr_short_record_handler() sends it again even though SND_UNA update is already sent for the skb in chcr_ktls_xmit(), which causes mismatch in un-acked TCP seq number, later causes problem in sending out complete record. Fixes: 429765a149f1 ("chcr: handle partial end part of a record") Signed-off-by: Vinay Kumar Yadav Signed-off-by: Rohit Maheshwari Signed-off-by: David S. Miller --- .../chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 53 ------------------- 1 file changed, 53 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index 8559eec161f0..a3f5b80888e5 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -1644,54 +1644,6 @@ static void chcr_ktls_copy_record_in_skb(struct sk_buff *nskb, refcount_add(nskb->truesize, &nskb->sk->sk_wmem_alloc); } -/* - * chcr_ktls_update_snd_una: Reset the SEND_UNA. It will be done to avoid - * sending the same segment again. It will discard the segment which is before - * the current tx max. - * @tx_info - driver specific tls info. - * @q - TX queue. - * return: NET_TX_OK/NET_XMIT_DROP. - */ -static int chcr_ktls_update_snd_una(struct chcr_ktls_info *tx_info, - struct sge_eth_txq *q) -{ - struct fw_ulptx_wr *wr; - unsigned int ndesc; - int credits; - void *pos; - u32 len; - - len = sizeof(*wr) + roundup(CHCR_SET_TCB_FIELD_LEN, 16); - ndesc = DIV_ROUND_UP(len, 64); - - credits = chcr_txq_avail(&q->q) - ndesc; - if (unlikely(credits < 0)) { - chcr_eth_txq_stop(q); - return NETDEV_TX_BUSY; - } - - pos = &q->q.desc[q->q.pidx]; - - wr = pos; - /* ULPTX wr */ - wr->op_to_compl = htonl(FW_WR_OP_V(FW_ULPTX_WR)); - wr->cookie = 0; - /* fill len in wr field */ - wr->flowid_len16 = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(len, 16))); - - pos += sizeof(*wr); - - pos = chcr_write_cpl_set_tcb_ulp(tx_info, q, tx_info->tid, pos, - TCB_SND_UNA_RAW_W, - TCB_SND_UNA_RAW_V(TCB_SND_UNA_RAW_M), - TCB_SND_UNA_RAW_V(0), 0); - - chcr_txq_advance(&q->q, ndesc); - cxgb4_ring_tx_db(tx_info->adap, &q->q, ndesc); - - return 0; -} - /* * chcr_end_part_handler: This handler will handle the record which * is complete or if record's end part is received. T6 adapter has a issue that @@ -1892,11 +1844,6 @@ static int chcr_short_record_handler(struct chcr_ktls_info *tx_info, /* reset tcp_seq as per the prior_data_required len */ tcp_seq -= prior_data_len; } - /* reset snd una, so the middle record won't send the already - * sent part. - */ - if (chcr_ktls_update_snd_una(tx_info, q)) - goto out; atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_middle_pkts); } else { atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_start_pkts); From b21bb4cd1102dd9e24a169d09cf4e6f3c8a46bcf Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 15 Apr 2021 16:26:08 -0700 Subject: [PATCH 117/182] cxl/mem: Fix register block offset calculation The "Register Offset Low" register of a "DVSEC Register Locator" contains the 64K aligned offset for the registers along with the BAR indicator and an id. The implementation was treating the "Register Block Offset Low" field a value rather than as a pre-aligned component of the 64-bit offset. So, just mask, don't mask and shift (FIELD_GET). The user visible result of this bug is that the driver fails to bind to the device after none of the required blocks are found. This was missed earlier because the primary development done in the QEMU environment only uses 0 offsets, i.e. 0 shifted is still 0. Fixes: 8adaf747c9f0 ("cxl/mem: Find device capabilities") Reported-by: Vishal Verma Signed-off-by: Ben Widawsky Link: https://lore.kernel.org/r/20210415232610.603273-1-ben.widawsky@intel.com Signed-off-by: Dan Williams --- drivers/cxl/mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index e3003f49b329..1b5078311f7d 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -998,7 +998,7 @@ static struct cxl_mem *cxl_mem_create(struct pci_dev *pdev, u32 reg_lo, return NULL; } - offset = ((u64)reg_hi << 32) | FIELD_GET(CXL_REGLOC_ADDR_MASK, reg_lo); + offset = ((u64)reg_hi << 32) | (reg_lo & CXL_REGLOC_ADDR_MASK); bar = FIELD_GET(CXL_REGLOC_BIR_MASK, reg_lo); /* Basic sanity check that BAR is big enough */ From 199fc6b8dee7d6d50467a57e0dc7e3e1b7d59966 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Mon, 29 Mar 2021 11:13:07 +0800 Subject: [PATCH 118/182] riscv: Fix spelling mistake "SPARSEMEM" to "SPARSMEM" There is a spelling mistake when SPARSEMEM Kconfig copy. Fixes: a5406a7ff56e ("riscv: Correct SPARSEMEM configuration") Cc: stable@vger.kernel.org Signed-off-by: Kefeng Wang Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 0d0cf67359cb..4515a10c5d22 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -153,7 +153,7 @@ config ARCH_FLATMEM_ENABLE config ARCH_SPARSEMEM_ENABLE def_bool y depends on MMU - select SPARSEMEM_STATIC if 32BIT && SPARSMEM + select SPARSEMEM_STATIC if 32BIT && SPARSEMEM select SPARSEMEM_VMEMMAP_ENABLE if 64BIT config ARCH_SELECT_MEMORY_MODEL From 2349a3b26e29b8d860466bafda2e02b4b87a9e40 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 30 Mar 2021 02:12:26 +0800 Subject: [PATCH 119/182] riscv: add do_page_fault and do_trap_break into the kprobes blacklist These two functions are used to implement the kprobes feature so they can't be kprobed. Fixes: c22b0bcb1dd0 ("riscv: Add kprobes supported") Cc: stable@vger.kernel.org Signed-off-by: Jisheng Zhang Reviewed-by: Masami Hiramatsu Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/traps.c | 1 + arch/riscv/mm/fault.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 0879b5df11b9..1357abf79570 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -178,6 +178,7 @@ asmlinkage __visible void do_trap_break(struct pt_regs *regs) else die(regs, "Kernel BUG"); } +NOKPROBE_SYMBOL(do_trap_break); #ifdef CONFIG_GENERIC_BUG int is_valid_bugaddr(unsigned long pc) diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 8f17519208c7..c5dbd55cbf7c 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -328,3 +328,4 @@ asmlinkage void do_page_fault(struct pt_regs *regs) } return; } +NOKPROBE_SYMBOL(do_page_fault); From e31be8d343e64e7ab17aef55c1d1b36dc504da67 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 30 Mar 2021 02:14:40 +0800 Subject: [PATCH 120/182] riscv: kprobes/ftrace: Add recursion protection to the ftrace callback Currently, the riscv's kprobes(powerred by ftrace) handler is preemptible. Futher check indicates we miss something similar as the commit c536aa1c5b17 ("kprobes/ftrace: Add recursion protection to the ftrace callback"), so do similar modifications as the commit does. Fixes: 829adda597fe ("riscv: Add KPROBES_ON_FTRACE supported") Cc: stable@vger.kernel.org Signed-off-by: Jisheng Zhang Reviewed-by: Steven Rostedt (VMware) Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/probes/ftrace.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kernel/probes/ftrace.c b/arch/riscv/kernel/probes/ftrace.c index 17ca5e923bb0..aab85a82f419 100644 --- a/arch/riscv/kernel/probes/ftrace.c +++ b/arch/riscv/kernel/probes/ftrace.c @@ -9,10 +9,16 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct kprobe *p; struct pt_regs *regs; struct kprobe_ctlblk *kcb; + int bit; + bit = ftrace_test_recursion_trylock(ip, parent_ip); + if (bit < 0) + return; + + preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (unlikely(!p) || kprobe_disabled(p)) - return; + goto out; regs = ftrace_get_regs(fregs); kcb = get_kprobe_ctlblk(); @@ -45,6 +51,9 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, */ __this_cpu_write(current_kprobe, NULL); } +out: + preempt_enable_notrace(); + ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); From 7ae11635ec90072083503c6b6485cdffe46203b3 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 30 Mar 2021 02:16:24 +0800 Subject: [PATCH 121/182] riscv: keep interrupts disabled for BREAKPOINT exception Current riscv's kprobe handlers are run with both preemption and interrupt enabled, this violates kprobe requirements. Fix this issue by keeping interrupts disabled for BREAKPOINT exception. Fixes: c22b0bcb1dd0 ("riscv: Add kprobes supported") Cc: stable@vger.kernel.org Signed-off-by: Jisheng Zhang Reviewed-by: Masami Hiramatsu [Palmer: add a comment] Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/entry.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 76274a4a1d8e..83095faa680e 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -130,6 +130,9 @@ skip_context_tracking: */ andi t0, s1, SR_PIE beqz t0, 1f + /* kprobes, entered via ebreak, must have interrupts disabled. */ + li t0, EXC_BREAKPOINT + beq s4, t0, 1f #ifdef CONFIG_TRACE_IRQFLAGS call trace_hardirqs_on #endif From 6b5b2a5bcfe9a250da19bac4ef7cabdc81d154ec Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Wed, 14 Apr 2021 16:48:12 +0800 Subject: [PATCH 122/182] drm/i915/gvt: Fix BDW command parser regression On BDW new Windows driver has brought extra registers to handle for LRM/LRR command in WA ctx. Add allowed registers in cmd parser for BDW. Cc: Alex Williamson Cc: Yan Zhao Cc: stable@vger.kernel.org Tested-by: Alex Williamson Reviewed-by: Colin Xu Fixes: 73a37a43d1b0 ("drm/i915/gvt: filter cmds "lrr-src" and "lrr-dst" in cmd_handler") Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20210414084813.3763353-1-zhenyuw@linux.intel.com --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index fef1e857cefc..01c1d1b36acd 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -916,19 +916,26 @@ static int cmd_reg_handler(struct parser_exec_state *s, if (!strncmp(cmd, "srm", 3) || !strncmp(cmd, "lrm", 3)) { - if (offset != i915_mmio_reg_offset(GEN8_L3SQCREG4) && - offset != 0x21f0) { + if (offset == i915_mmio_reg_offset(GEN8_L3SQCREG4) || + offset == 0x21f0 || + (IS_BROADWELL(gvt->gt->i915) && + offset == i915_mmio_reg_offset(INSTPM))) + return 0; + else { gvt_vgpu_err("%s access to register (%x)\n", cmd, offset); return -EPERM; - } else - return 0; + } } if (!strncmp(cmd, "lrr-src", 7) || !strncmp(cmd, "lrr-dst", 7)) { - gvt_vgpu_err("not allowed cmd %s\n", cmd); - return -EPERM; + if (IS_BROADWELL(gvt->gt->i915) && offset == 0x215c) + return 0; + else { + gvt_vgpu_err("not allowed cmd %s reg (%x)\n", cmd, offset); + return -EPERM; + } } if (!strncmp(cmd, "pipe_ctrl", 9)) { From 9601148392520e2e134936e76788fc2a6371e7be Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 23 Mar 2021 08:32:59 +0100 Subject: [PATCH 123/182] bpf: Use correct permission flag for mixed signed bounds arithmetic We forbid adding unknown scalars with mixed signed bounds due to the spectre v1 masking mitigation. Hence this also needs bypass_spec_v1 flag instead of allow_ptr_leaks. Fixes: 2c78ee898d8f ("bpf: Implement CAP_BPF") Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3a738724a380..2ede4b850230 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6085,7 +6085,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, dst, reg_type_str[ptr_reg->type]); return -EACCES; case PTR_TO_MAP_VALUE: - if (!env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) { + if (!env->env->bypass_spec_v1 && !known && (smin_val < 0) != (smax_val < 0)) { verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n", off_reg == dst_reg ? dst : src); return -EACCES; From 6f55b2f2a1178856c19bbce2f71449926e731914 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 22 Mar 2021 15:45:52 +0100 Subject: [PATCH 124/182] bpf: Move off_reg into sanitize_ptr_alu Small refactor to drag off_reg into sanitize_ptr_alu(), so we later on can use off_reg for generalizing some of the checks for all pointer types. Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2ede4b850230..4ee014dadac7 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5929,11 +5929,12 @@ static int sanitize_val_alu(struct bpf_verifier_env *env, static int sanitize_ptr_alu(struct bpf_verifier_env *env, struct bpf_insn *insn, const struct bpf_reg_state *ptr_reg, - struct bpf_reg_state *dst_reg, - bool off_is_neg) + const struct bpf_reg_state *off_reg, + struct bpf_reg_state *dst_reg) { struct bpf_verifier_state *vstate = env->cur_state; struct bpf_insn_aux_data *aux = cur_aux(env); + bool off_is_neg = off_reg->smin_value < 0; bool ptr_is_dst_reg = ptr_reg == dst_reg; u8 opcode = BPF_OP(insn->code); u32 alu_state, alu_limit; @@ -6110,7 +6111,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, switch (opcode) { case BPF_ADD: - ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0); + ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg); if (ret < 0) { verbose(env, "R%d tried to add from different maps, paths, or prohibited types\n", dst); return ret; @@ -6165,7 +6166,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, } break; case BPF_SUB: - ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0); + ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg); if (ret < 0) { verbose(env, "R%d tried to sub from different maps, paths, or prohibited types\n", dst); return ret; From 24c109bb1537c12c02aeed2d51a347b4d6a9b76e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 23 Mar 2021 08:51:02 +0100 Subject: [PATCH 125/182] bpf: Ensure off_reg has no mixed signed bounds for all types The mixed signed bounds check really belongs into retrieve_ptr_limit() instead of outside of it in adjust_ptr_min_max_vals(). The reason is that this check is not tied to PTR_TO_MAP_VALUE only, but to all pointer types that we handle in retrieve_ptr_limit() and given errors from the latter propagate back to adjust_ptr_min_max_vals() and lead to rejection of the program, it's a better place to reside to avoid anything slipping through for future types. The reason why we must reject such off_reg is that we otherwise would not be able to derive a mask, see details in 9d7eceede769 ("bpf: restrict unknown scalars of mixed signed bounds for unprivileged"). Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 4ee014dadac7..a21d7f1a0ba8 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5857,12 +5857,18 @@ static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env) } static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, - u32 *ptr_limit, u8 opcode, bool off_is_neg) + const struct bpf_reg_state *off_reg, + u32 *ptr_limit, u8 opcode) { + bool off_is_neg = off_reg->smin_value < 0; bool mask_to_left = (opcode == BPF_ADD && off_is_neg) || (opcode == BPF_SUB && !off_is_neg); u32 off, max; + if (!tnum_is_const(off_reg->var_off) && + (off_reg->smin_value < 0) != (off_reg->smax_value < 0)) + return -EACCES; + switch (ptr_reg->type) { case PTR_TO_STACK: /* Offset 0 is out-of-bounds, but acceptable start for the @@ -5956,7 +5962,7 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, alu_state |= ptr_is_dst_reg ? BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST; - err = retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg); + err = retrieve_ptr_limit(ptr_reg, off_reg, &alu_limit, opcode); if (err < 0) return err; @@ -6036,8 +6042,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value; u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value, umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value; - u32 dst = insn->dst_reg, src = insn->src_reg; u8 opcode = BPF_OP(insn->code); + u32 dst = insn->dst_reg; int ret; dst_reg = ®s[dst]; @@ -6085,13 +6091,6 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, verbose(env, "R%d pointer arithmetic on %s prohibited\n", dst, reg_type_str[ptr_reg->type]); return -EACCES; - case PTR_TO_MAP_VALUE: - if (!env->env->bypass_spec_v1 && !known && (smin_val < 0) != (smax_val < 0)) { - verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n", - off_reg == dst_reg ? dst : src); - return -EACCES; - } - fallthrough; default: break; } From b658bbb844e28f1862867f37e8ca11a8e2aa94a3 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 23 Mar 2021 09:04:10 +0100 Subject: [PATCH 126/182] bpf: Rework ptr_limit into alu_limit and add common error path Small refactor with no semantic changes in order to consolidate the max ptr_limit boundary check. Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a21d7f1a0ba8..b8e0171d9591 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5858,12 +5858,12 @@ static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env) static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, const struct bpf_reg_state *off_reg, - u32 *ptr_limit, u8 opcode) + u32 *alu_limit, u8 opcode) { bool off_is_neg = off_reg->smin_value < 0; bool mask_to_left = (opcode == BPF_ADD && off_is_neg) || (opcode == BPF_SUB && !off_is_neg); - u32 off, max; + u32 off, max = 0, ptr_limit = 0; if (!tnum_is_const(off_reg->var_off) && (off_reg->smin_value < 0) != (off_reg->smax_value < 0)) @@ -5880,22 +5880,27 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, */ off = ptr_reg->off + ptr_reg->var_off.value; if (mask_to_left) - *ptr_limit = MAX_BPF_STACK + off; + ptr_limit = MAX_BPF_STACK + off; else - *ptr_limit = -off - 1; - return *ptr_limit >= max ? -ERANGE : 0; + ptr_limit = -off - 1; + break; case PTR_TO_MAP_VALUE: max = ptr_reg->map_ptr->value_size; if (mask_to_left) { - *ptr_limit = ptr_reg->umax_value + ptr_reg->off; + ptr_limit = ptr_reg->umax_value + ptr_reg->off; } else { off = ptr_reg->smin_value + ptr_reg->off; - *ptr_limit = ptr_reg->map_ptr->value_size - off - 1; + ptr_limit = ptr_reg->map_ptr->value_size - off - 1; } - return *ptr_limit >= max ? -ERANGE : 0; + break; default: return -EINVAL; } + + if (ptr_limit >= max) + return -ERANGE; + *alu_limit = ptr_limit; + return 0; } static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env, From a6aaece00a57fa6f22575364b3903dfbccf5345d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 23 Mar 2021 09:30:01 +0100 Subject: [PATCH 127/182] bpf: Improve verifier error messages for users Consolidate all error handling and provide more user-friendly error messages from sanitize_ptr_alu() and sanitize_val_alu(). Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 86 +++++++++++++++++++++++++++++++------------ 1 file changed, 63 insertions(+), 23 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b8e0171d9591..f378d4ae405f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5856,6 +5856,14 @@ static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env) return &env->insn_aux_data[env->insn_idx]; } +enum { + REASON_BOUNDS = -1, + REASON_TYPE = -2, + REASON_PATHS = -3, + REASON_LIMIT = -4, + REASON_STACK = -5, +}; + static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, const struct bpf_reg_state *off_reg, u32 *alu_limit, u8 opcode) @@ -5867,7 +5875,7 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, if (!tnum_is_const(off_reg->var_off) && (off_reg->smin_value < 0) != (off_reg->smax_value < 0)) - return -EACCES; + return REASON_BOUNDS; switch (ptr_reg->type) { case PTR_TO_STACK: @@ -5894,11 +5902,11 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, } break; default: - return -EINVAL; + return REASON_TYPE; } if (ptr_limit >= max) - return -ERANGE; + return REASON_LIMIT; *alu_limit = ptr_limit; return 0; } @@ -5918,7 +5926,7 @@ static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux, if (aux->alu_state && (aux->alu_state != alu_state || aux->alu_limit != alu_limit)) - return -EACCES; + return REASON_PATHS; /* Corresponding fixup done in fixup_bpf_calls(). */ aux->alu_state = alu_state; @@ -5991,7 +5999,46 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true); if (!ptr_is_dst_reg && ret) *dst_reg = tmp; - return !ret ? -EFAULT : 0; + return !ret ? REASON_STACK : 0; +} + +static int sanitize_err(struct bpf_verifier_env *env, + const struct bpf_insn *insn, int reason, + const struct bpf_reg_state *off_reg, + const struct bpf_reg_state *dst_reg) +{ + static const char *err = "pointer arithmetic with it prohibited for !root"; + const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub"; + u32 dst = insn->dst_reg, src = insn->src_reg; + + switch (reason) { + case REASON_BOUNDS: + verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n", + off_reg == dst_reg ? dst : src, err); + break; + case REASON_TYPE: + verbose(env, "R%d has pointer with unsupported alu operation, %s\n", + off_reg == dst_reg ? src : dst, err); + break; + case REASON_PATHS: + verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n", + dst, op, err); + break; + case REASON_LIMIT: + verbose(env, "R%d tried to %s beyond pointer bounds, %s\n", + dst, op, err); + break; + case REASON_STACK: + verbose(env, "R%d could not be pushed for speculative verification, %s\n", + dst, err); + break; + default: + verbose(env, "verifier internal error: unknown reason (%d)\n", + reason); + break; + } + + return -EACCES; } /* check that stack access falls within stack limits and that 'reg' doesn't @@ -6116,10 +6163,9 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, switch (opcode) { case BPF_ADD: ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg); - if (ret < 0) { - verbose(env, "R%d tried to add from different maps, paths, or prohibited types\n", dst); - return ret; - } + if (ret < 0) + return sanitize_err(env, insn, ret, off_reg, dst_reg); + /* We can take a fixed offset as long as it doesn't overflow * the s32 'off' field */ @@ -6171,10 +6217,9 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, break; case BPF_SUB: ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg); - if (ret < 0) { - verbose(env, "R%d tried to sub from different maps, paths, or prohibited types\n", dst); - return ret; - } + if (ret < 0) + return sanitize_err(env, insn, ret, off_reg, dst_reg); + if (dst_reg == off_reg) { /* scalar -= pointer. Creates an unknown scalar */ verbose(env, "R%d tried to subtract pointer from scalar\n", @@ -6863,9 +6908,8 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, s32 s32_min_val, s32_max_val; u32 u32_min_val, u32_max_val; u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32; - u32 dst = insn->dst_reg; - int ret; bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64); + int ret; smin_val = src_reg.smin_value; smax_val = src_reg.smax_value; @@ -6924,20 +6968,16 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, switch (opcode) { case BPF_ADD: ret = sanitize_val_alu(env, insn); - if (ret < 0) { - verbose(env, "R%d tried to add from different pointers or scalars\n", dst); - return ret; - } + if (ret < 0) + return sanitize_err(env, insn, ret, NULL, NULL); scalar32_min_max_add(dst_reg, &src_reg); scalar_min_max_add(dst_reg, &src_reg); dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off); break; case BPF_SUB: ret = sanitize_val_alu(env, insn); - if (ret < 0) { - verbose(env, "R%d tried to sub from different pointers or scalars\n", dst); - return ret; - } + if (ret < 0) + return sanitize_err(env, insn, ret, NULL, NULL); scalar32_min_max_sub(dst_reg, &src_reg); scalar_min_max_sub(dst_reg, &src_reg); dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off); From 073815b756c51ba9d8384d924c5d1c03ca3d1ae4 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 23 Mar 2021 15:05:48 +0100 Subject: [PATCH 128/182] bpf: Refactor and streamline bounds check into helper Move the bounds check in adjust_ptr_min_max_vals() into a small helper named sanitize_check_bounds() in order to simplify the former a bit. Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 49 +++++++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f378d4ae405f..db77e2c670b9 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6075,6 +6075,37 @@ static int check_stack_access_for_ptr_arithmetic( return 0; } +static int sanitize_check_bounds(struct bpf_verifier_env *env, + const struct bpf_insn *insn, + const struct bpf_reg_state *dst_reg) +{ + u32 dst = insn->dst_reg; + + /* For unprivileged we require that resulting offset must be in bounds + * in order to be able to sanitize access later on. + */ + if (env->bypass_spec_v1) + return 0; + + switch (dst_reg->type) { + case PTR_TO_STACK: + if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg, + dst_reg->off + dst_reg->var_off.value)) + return -EACCES; + break; + case PTR_TO_MAP_VALUE: + if (check_map_access(env, dst, dst_reg->off, 1, false)) { + verbose(env, "R%d pointer arithmetic of map value goes out of range, " + "prohibited for !root\n", dst); + return -EACCES; + } + break; + default: + break; + } + + return 0; +} /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off. * Caller should also handle BPF_MOV case separately. @@ -6300,22 +6331,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, __reg_deduce_bounds(dst_reg); __reg_bound_offset(dst_reg); - /* For unprivileged we require that resulting offset must be in bounds - * in order to be able to sanitize access later on. - */ - if (!env->bypass_spec_v1) { - if (dst_reg->type == PTR_TO_MAP_VALUE && - check_map_access(env, dst, dst_reg->off, 1, false)) { - verbose(env, "R%d pointer arithmetic of map value goes out of range, " - "prohibited for !root\n", dst); - return -EACCES; - } else if (dst_reg->type == PTR_TO_STACK && - check_stack_access_for_ptr_arithmetic( - env, dst, dst_reg, dst_reg->off + - dst_reg->var_off.value)) { - return -EACCES; - } - } + if (sanitize_check_bounds(env, insn, dst_reg) < 0) + return -EACCES; return 0; } From f528819334881fd622fdadeddb3f7edaed8b7c9b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 24 Mar 2021 11:25:39 +0100 Subject: [PATCH 129/182] bpf: Move sanitize_val_alu out of op switch Add a small sanitize_needed() helper function and move sanitize_val_alu() out of the main opcode switch. In upcoming work, we'll move sanitize_ptr_alu() as well out of its opcode switch so this helps to streamline both. Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index db77e2c670b9..e41b6326e3e6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5945,6 +5945,11 @@ static int sanitize_val_alu(struct bpf_verifier_env *env, return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0); } +static bool sanitize_needed(u8 opcode) +{ + return opcode == BPF_ADD || opcode == BPF_SUB; +} + static int sanitize_ptr_alu(struct bpf_verifier_env *env, struct bpf_insn *insn, const struct bpf_reg_state *ptr_reg, @@ -6968,6 +6973,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, return 0; } + if (sanitize_needed(opcode)) { + ret = sanitize_val_alu(env, insn); + if (ret < 0) + return sanitize_err(env, insn, ret, NULL, NULL); + } + /* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops. * There are two classes of instructions: The first class we track both * alu32 and alu64 sign/unsigned bounds independently this provides the @@ -6984,17 +6995,11 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, */ switch (opcode) { case BPF_ADD: - ret = sanitize_val_alu(env, insn); - if (ret < 0) - return sanitize_err(env, insn, ret, NULL, NULL); scalar32_min_max_add(dst_reg, &src_reg); scalar_min_max_add(dst_reg, &src_reg); dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off); break; case BPF_SUB: - ret = sanitize_val_alu(env, insn); - if (ret < 0) - return sanitize_err(env, insn, ret, NULL, NULL); scalar32_min_max_sub(dst_reg, &src_reg); scalar_min_max_sub(dst_reg, &src_reg); dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off); From 7fedb63a8307dda0ec3b8969a3b233a1dd7ea8e0 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 24 Mar 2021 10:38:26 +0100 Subject: [PATCH 130/182] bpf: Tighten speculative pointer arithmetic mask This work tightens the offset mask we use for unprivileged pointer arithmetic in order to mitigate a corner case reported by Piotr and Benedict where in the speculative domain it is possible to advance, for example, the map value pointer by up to value_size-1 out-of-bounds in order to leak kernel memory via side-channel to user space. Before this change, the computed ptr_limit for retrieve_ptr_limit() helper represents largest valid distance when moving pointer to the right or left which is then fed as aux->alu_limit to generate masking instructions against the offset register. After the change, the derived aux->alu_limit represents the largest potential value of the offset register which we mask against which is just a narrower subset of the former limit. For minimal complexity, we call sanitize_ptr_alu() from 2 observation points in adjust_ptr_min_max_vals(), that is, before and after the simulated alu operation. In the first step, we retieve the alu_state and alu_limit before the operation as well as we branch-off a verifier path and push it to the verification stack as we did before which checks the dst_reg under truncation, in other words, when the speculative domain would attempt to move the pointer out-of-bounds. In the second step, we retrieve the new alu_limit and calculate the absolute distance between both. Moreover, we commit the alu_state and final alu_limit via update_alu_sanitation_state() to the env's instruction aux data, and bail out from there if there is a mismatch due to coming from different verification paths with different states. Reported-by: Piotr Krysiuk Reported-by: Benedict Schlueter Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov Tested-by: Benedict Schlueter --- kernel/bpf/verifier.c | 73 ++++++++++++++++++++++++++----------------- 1 file changed, 44 insertions(+), 29 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e41b6326e3e6..0399ac092b36 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5871,7 +5871,7 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, bool off_is_neg = off_reg->smin_value < 0; bool mask_to_left = (opcode == BPF_ADD && off_is_neg) || (opcode == BPF_SUB && !off_is_neg); - u32 off, max = 0, ptr_limit = 0; + u32 max = 0, ptr_limit = 0; if (!tnum_is_const(off_reg->var_off) && (off_reg->smin_value < 0) != (off_reg->smax_value < 0)) @@ -5880,26 +5880,18 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, switch (ptr_reg->type) { case PTR_TO_STACK: /* Offset 0 is out-of-bounds, but acceptable start for the - * left direction, see BPF_REG_FP. + * left direction, see BPF_REG_FP. Also, unknown scalar + * offset where we would need to deal with min/max bounds is + * currently prohibited for unprivileged. */ max = MAX_BPF_STACK + mask_to_left; - /* Indirect variable offset stack access is prohibited in - * unprivileged mode so it's not handled here. - */ - off = ptr_reg->off + ptr_reg->var_off.value; - if (mask_to_left) - ptr_limit = MAX_BPF_STACK + off; - else - ptr_limit = -off - 1; + ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off); break; case PTR_TO_MAP_VALUE: max = ptr_reg->map_ptr->value_size; - if (mask_to_left) { - ptr_limit = ptr_reg->umax_value + ptr_reg->off; - } else { - off = ptr_reg->smin_value + ptr_reg->off; - ptr_limit = ptr_reg->map_ptr->value_size - off - 1; - } + ptr_limit = (mask_to_left ? + ptr_reg->smin_value : + ptr_reg->umax_value) + ptr_reg->off; break; default: return REASON_TYPE; @@ -5954,10 +5946,12 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, struct bpf_insn *insn, const struct bpf_reg_state *ptr_reg, const struct bpf_reg_state *off_reg, - struct bpf_reg_state *dst_reg) + struct bpf_reg_state *dst_reg, + struct bpf_insn_aux_data *tmp_aux, + const bool commit_window) { + struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : tmp_aux; struct bpf_verifier_state *vstate = env->cur_state; - struct bpf_insn_aux_data *aux = cur_aux(env); bool off_is_neg = off_reg->smin_value < 0; bool ptr_is_dst_reg = ptr_reg == dst_reg; u8 opcode = BPF_OP(insn->code); @@ -5976,18 +5970,33 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, if (vstate->speculative) goto do_sim; - alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0; - alu_state |= ptr_is_dst_reg ? - BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST; - err = retrieve_ptr_limit(ptr_reg, off_reg, &alu_limit, opcode); if (err < 0) return err; + if (commit_window) { + /* In commit phase we narrow the masking window based on + * the observed pointer move after the simulated operation. + */ + alu_state = tmp_aux->alu_state; + alu_limit = abs(tmp_aux->alu_limit - alu_limit); + } else { + alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0; + alu_state |= ptr_is_dst_reg ? + BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST; + } + err = update_alu_sanitation_state(aux, alu_state, alu_limit); if (err < 0) return err; do_sim: + /* If we're in commit phase, we're done here given we already + * pushed the truncated dst_reg into the speculative verification + * stack. + */ + if (commit_window) + return 0; + /* Simulate and find potential out-of-bounds access under * speculative execution from truncation as a result of * masking when off was not within expected range. If off @@ -6130,6 +6139,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value; u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value, umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value; + struct bpf_insn_aux_data tmp_aux = {}; u8 opcode = BPF_OP(insn->code); u32 dst = insn->dst_reg; int ret; @@ -6196,12 +6206,15 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, /* pointer types do not carry 32-bit bounds at the moment. */ __mark_reg32_unbounded(dst_reg); - switch (opcode) { - case BPF_ADD: - ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg); + if (sanitize_needed(opcode)) { + ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg, + &tmp_aux, false); if (ret < 0) return sanitize_err(env, insn, ret, off_reg, dst_reg); + } + switch (opcode) { + case BPF_ADD: /* We can take a fixed offset as long as it doesn't overflow * the s32 'off' field */ @@ -6252,10 +6265,6 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, } break; case BPF_SUB: - ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg); - if (ret < 0) - return sanitize_err(env, insn, ret, off_reg, dst_reg); - if (dst_reg == off_reg) { /* scalar -= pointer. Creates an unknown scalar */ verbose(env, "R%d tried to subtract pointer from scalar\n", @@ -6338,6 +6347,12 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, if (sanitize_check_bounds(env, insn, dst_reg) < 0) return -EACCES; + if (sanitize_needed(opcode)) { + ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg, + &tmp_aux, true); + if (ret < 0) + return sanitize_err(env, insn, ret, off_reg, dst_reg); + } return 0; } From d7a5091351756d0ae8e63134313c455624e36a13 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 24 Mar 2021 14:52:31 +0100 Subject: [PATCH 131/182] bpf: Update selftests to reflect new error states Update various selftest error messages: * The 'Rx tried to sub from different maps, paths, or prohibited types' is reworked into more specific/differentiated error messages for better guidance. * The change into 'value -4294967168 makes map_value pointer be out of bounds' is due to moving the mixed bounds check into the speculation handling and thus occuring slightly later than above mentioned sanity check. * The change into 'math between map_value pointer and register with unbounded min value' is similarly due to register sanity check coming before the mixed bounds check. * The case of 'map access: known scalar += value_ptr from different maps' now loads fine given masks are the same from the different paths (despite max map value size being different). Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov --- tools/testing/selftests/bpf/verifier/bounds.c | 5 ----- .../selftests/bpf/verifier/bounds_deduction.c | 21 ++++++++++--------- .../bpf/verifier/bounds_mix_sign_unsign.c | 13 ------------ .../testing/selftests/bpf/verifier/map_ptr.c | 4 ++-- tools/testing/selftests/bpf/verifier/unpriv.c | 2 +- .../selftests/bpf/verifier/value_ptr_arith.c | 6 ++---- 6 files changed, 16 insertions(+), 35 deletions(-) diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c index 57ed67b86074..8a1caf46ffbc 100644 --- a/tools/testing/selftests/bpf/verifier/bounds.c +++ b/tools/testing/selftests/bpf/verifier/bounds.c @@ -261,8 +261,6 @@ }, .fixup_map_hash_8b = { 3 }, /* not actually fully unbounded, but the bound is very high */ - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root", - .result_unpriv = REJECT, .errstr = "value -4294967168 makes map_value pointer be out of bounds", .result = REJECT, }, @@ -298,9 +296,6 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_8b = { 3 }, - /* not actually fully unbounded, but the bound is very high */ - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root", - .result_unpriv = REJECT, .errstr = "value -4294967168 makes map_value pointer be out of bounds", .result = REJECT, }, diff --git a/tools/testing/selftests/bpf/verifier/bounds_deduction.c b/tools/testing/selftests/bpf/verifier/bounds_deduction.c index c162498a64fc..91869aea6d64 100644 --- a/tools/testing/selftests/bpf/verifier/bounds_deduction.c +++ b/tools/testing/selftests/bpf/verifier/bounds_deduction.c @@ -6,7 +6,7 @@ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 tried to sub from different maps, paths, or prohibited types", + .errstr_unpriv = "R1 has pointer with unsupported alu operation", .errstr = "R0 tried to subtract pointer from scalar", .result = REJECT, }, @@ -21,7 +21,7 @@ BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R1 tried to sub from different maps, paths, or prohibited types", + .errstr_unpriv = "R1 has pointer with unsupported alu operation", .result_unpriv = REJECT, .result = ACCEPT, .retval = 1, @@ -34,22 +34,23 @@ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 tried to sub from different maps, paths, or prohibited types", + .errstr_unpriv = "R1 has pointer with unsupported alu operation", .errstr = "R0 tried to subtract pointer from scalar", .result = REJECT, }, { "check deducing bounds from const, 4", .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 1), BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1), BPF_EXIT_INSN(), - BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0), + BPF_ALU64_REG(BPF_SUB, BPF_REG_6, BPF_REG_0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R1 tried to sub from different maps, paths, or prohibited types", + .errstr_unpriv = "R6 has pointer with unsupported alu operation", .result_unpriv = REJECT, .result = ACCEPT, }, @@ -61,7 +62,7 @@ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 tried to sub from different maps, paths, or prohibited types", + .errstr_unpriv = "R1 has pointer with unsupported alu operation", .errstr = "R0 tried to subtract pointer from scalar", .result = REJECT, }, @@ -74,7 +75,7 @@ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 tried to sub from different maps, paths, or prohibited types", + .errstr_unpriv = "R1 has pointer with unsupported alu operation", .errstr = "R0 tried to subtract pointer from scalar", .result = REJECT, }, @@ -88,7 +89,7 @@ offsetof(struct __sk_buff, mark)), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R1 tried to sub from different maps, paths, or prohibited types", + .errstr_unpriv = "R1 has pointer with unsupported alu operation", .errstr = "dereference of modified ctx ptr", .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -103,7 +104,7 @@ offsetof(struct __sk_buff, mark)), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R1 tried to add from different maps, paths, or prohibited types", + .errstr_unpriv = "R1 has pointer with unsupported alu operation", .errstr = "dereference of modified ctx ptr", .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -116,7 +117,7 @@ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 tried to sub from different maps, paths, or prohibited types", + .errstr_unpriv = "R1 has pointer with unsupported alu operation", .errstr = "R0 tried to subtract pointer from scalar", .result = REJECT, }, diff --git a/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c b/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c index 9baca7a75c42..c2aa6f26738b 100644 --- a/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c +++ b/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c @@ -19,7 +19,6 @@ }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -43,7 +42,6 @@ }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -69,7 +67,6 @@ }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", - .errstr_unpriv = "R8 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -94,7 +91,6 @@ }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", - .errstr_unpriv = "R8 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -141,7 +137,6 @@ }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -210,7 +205,6 @@ }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -260,7 +254,6 @@ }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -287,7 +280,6 @@ }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -313,7 +305,6 @@ }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -342,7 +333,6 @@ }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", - .errstr_unpriv = "R7 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -372,7 +362,6 @@ }, .fixup_map_hash_8b = { 4 }, .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -400,7 +389,5 @@ }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, - .result_unpriv = REJECT, }, diff --git a/tools/testing/selftests/bpf/verifier/map_ptr.c b/tools/testing/selftests/bpf/verifier/map_ptr.c index 6f610cfddae5..1f82021429bf 100644 --- a/tools/testing/selftests/bpf/verifier/map_ptr.c +++ b/tools/testing/selftests/bpf/verifier/map_ptr.c @@ -76,7 +76,7 @@ }, .fixup_map_hash_16b = { 4 }, .result_unpriv = REJECT, - .errstr_unpriv = "R1 tried to add from different maps, paths, or prohibited types", + .errstr_unpriv = "R1 has pointer with unsupported alu operation", .result = ACCEPT, }, { @@ -94,6 +94,6 @@ }, .fixup_map_hash_16b = { 4 }, .result_unpriv = REJECT, - .errstr_unpriv = "R1 tried to add from different maps, paths, or prohibited types", + .errstr_unpriv = "R0 has pointer with unsupported alu operation", .result = ACCEPT, }, diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c index 3e32400c4b44..bd436df5cc32 100644 --- a/tools/testing/selftests/bpf/verifier/unpriv.c +++ b/tools/testing/selftests/bpf/verifier/unpriv.c @@ -505,7 +505,7 @@ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R1 tried to add from different maps, paths, or prohibited types", + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", .result_unpriv = REJECT, .result = ACCEPT, }, diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c index feb91266db39..e5913fd3b903 100644 --- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c +++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c @@ -21,8 +21,6 @@ .fixup_map_hash_16b = { 5 }, .fixup_map_array_48b = { 8 }, .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R1 tried to add from different maps", .retval = 1, }, { @@ -122,7 +120,7 @@ .fixup_map_array_48b = { 1 }, .result = ACCEPT, .result_unpriv = REJECT, - .errstr_unpriv = "R2 tried to add from different pointers or scalars", + .errstr_unpriv = "R2 tried to add from different maps, paths or scalars", .retval = 0, }, { @@ -169,7 +167,7 @@ .fixup_map_array_48b = { 1 }, .result = ACCEPT, .result_unpriv = REJECT, - .errstr_unpriv = "R2 tried to add from different maps, paths, or prohibited types", + .errstr_unpriv = "R2 tried to add from different maps, paths or scalars", .retval = 0, }, { From 6b389c16378a03fe71f3b1365b593ba41d2dd8ec Mon Sep 17 00:00:00 2001 From: Lijun Pan Date: Thu, 15 Apr 2021 23:18:13 -0500 Subject: [PATCH 132/182] MAINTAINERS: update my email Update my email and change myself to Reviewer. Signed-off-by: Lijun Pan Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 163264c282eb..a31f76c41aec 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8517,9 +8517,9 @@ F: drivers/pci/hotplug/rpaphp* IBM Power SRIOV Virtual NIC Device Driver M: Dany Madden -M: Lijun Pan M: Sukadev Bhattiprolu R: Thomas Falcon +R: Lijun Pan L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/ibm/ibmvnic.* From 845be1cd34464620861b457b808e5fb2115d06b0 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 16 Apr 2021 15:45:54 -0700 Subject: [PATCH 133/182] mm: eliminate "expecting prototype" kernel-doc warnings Fix stray kernel-doc warnings in mm/ due to mis-typed or missing function names. Quietens these kernel-doc warnings: mm/mmu_gather.c:264: warning: expecting prototype for tlb_gather_mmu(). Prototype was for __tlb_gather_mmu() instead mm/oom_kill.c:180: warning: expecting prototype for Check whether unreclaimable slab amount is greater than(). Prototype was for should_dump_unreclaim_slab() instead mm/shuffle.c:155: warning: expecting prototype for shuffle_free_memory(). Prototype was for __shuffle_free_memory() instead Link: https://lkml.kernel.org/r/20210411210642.11362-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Reviewed-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mmu_gather.c | 29 +++++++++++++++++++---------- mm/oom_kill.c | 2 +- mm/shuffle.c | 4 ++-- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index 0dc7149b0c61..1b9837419bf9 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -249,16 +249,6 @@ void tlb_flush_mmu(struct mmu_gather *tlb) tlb_flush_mmu_free(tlb); } -/** - * tlb_gather_mmu - initialize an mmu_gather structure for page-table tear-down - * @tlb: the mmu_gather structure to initialize - * @mm: the mm_struct of the target address space - * @fullmm: @mm is without users and we're going to destroy the full address - * space (exit/execve) - * - * Called to initialize an (on-stack) mmu_gather structure for page-table - * tear-down from @mm. - */ static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm) { @@ -283,11 +273,30 @@ static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, inc_tlb_flush_pending(tlb->mm); } +/** + * tlb_gather_mmu - initialize an mmu_gather structure for page-table tear-down + * @tlb: the mmu_gather structure to initialize + * @mm: the mm_struct of the target address space + * + * Called to initialize an (on-stack) mmu_gather structure for page-table + * tear-down from @mm. + */ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm) { __tlb_gather_mmu(tlb, mm, false); } +/** + * tlb_gather_mmu_fullmm - initialize an mmu_gather structure for page-table tear-down + * @tlb: the mmu_gather structure to initialize + * @mm: the mm_struct of the target address space + * + * In this case, @mm is without users and we're going to destroy the + * full address space (exit/execve). + * + * Called to initialize an (on-stack) mmu_gather structure for page-table + * tear-down from @mm. + */ void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm) { __tlb_gather_mmu(tlb, mm, true); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 9efaf430cfd3..fa1cf18bac97 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -170,7 +170,7 @@ static bool oom_unkillable_task(struct task_struct *p) return false; } -/** +/* * Check whether unreclaimable slab amount is greater than * all user memory(LRU pages). * dump_unreclaimable_slab() could help in the case that diff --git a/mm/shuffle.c b/mm/shuffle.c index 9c2e145a747a..c13c33b247e8 100644 --- a/mm/shuffle.c +++ b/mm/shuffle.c @@ -147,8 +147,8 @@ void __meminit __shuffle_zone(struct zone *z) spin_unlock_irqrestore(&z->lock, flags); } -/** - * shuffle_free_memory - reduce the predictability of the page allocator +/* + * __shuffle_free_memory - reduce the predictability of the page allocator * @pgdat: node page data */ void __meminit __shuffle_free_memory(pg_data_t *pgdat) From 5c595ac4c776c44b5c59de22ab43b3fe256d9fbb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 16 Apr 2021 15:45:57 -0700 Subject: [PATCH 134/182] kasan: fix hwasan build for gcc gcc-11 adds support for -fsanitize=kernel-hwaddress, so it becomes possible to enable CONFIG_KASAN_SW_TAGS. Unfortunately this fails to build at the moment, because the corresponding command line arguments use llvm specific syntax. Change it to use the cc-param macro instead, which works on both clang and gcc. [elver@google.com: fixup for "kasan: fix hwasan build for gcc"] Link: https://lkml.kernel.org/r/YHQZVfVVLE/LDK2v@elver.google.com Link: https://lkml.kernel.org/r/20210323124112.1229772-1-arnd@kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: Marco Elver Reviewed-by: Marco Elver Acked-by: Andrey Konovalov Cc: Masahiro Yamada Cc: Michal Marek Cc: Andrey Ryabinin Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Alexander Potapenko Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/Makefile.kasan | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan index 1e000cc2e7b4..127012f45166 100644 --- a/scripts/Makefile.kasan +++ b/scripts/Makefile.kasan @@ -2,6 +2,8 @@ CFLAGS_KASAN_NOSANITIZE := -fno-builtin KASAN_SHADOW_OFFSET ?= $(CONFIG_KASAN_SHADOW_OFFSET) +cc-param = $(call cc-option, -mllvm -$(1), $(call cc-option, --param $(1))) + ifdef CONFIG_KASAN_GENERIC ifdef CONFIG_KASAN_INLINE @@ -12,8 +14,6 @@ endif CFLAGS_KASAN_MINIMAL := -fsanitize=kernel-address -cc-param = $(call cc-option, -mllvm -$(1), $(call cc-option, --param $(1))) - # -fasan-shadow-offset fails without -fsanitize CFLAGS_KASAN_SHADOW := $(call cc-option, -fsanitize=kernel-address \ -fasan-shadow-offset=$(KASAN_SHADOW_OFFSET), \ @@ -36,14 +36,14 @@ endif # CONFIG_KASAN_GENERIC ifdef CONFIG_KASAN_SW_TAGS ifdef CONFIG_KASAN_INLINE - instrumentation_flags := -mllvm -hwasan-mapping-offset=$(KASAN_SHADOW_OFFSET) + instrumentation_flags := $(call cc-param,hwasan-mapping-offset=$(KASAN_SHADOW_OFFSET)) else - instrumentation_flags := -mllvm -hwasan-instrument-with-calls=1 + instrumentation_flags := $(call cc-param,hwasan-instrument-with-calls=1) endif CFLAGS_KASAN := -fsanitize=kernel-hwaddress \ - -mllvm -hwasan-instrument-stack=$(CONFIG_KASAN_STACK) \ - -mllvm -hwasan-use-short-granules=0 \ + $(call cc-param,hwasan-instrument-stack=$(CONFIG_KASAN_STACK)) \ + $(call cc-param,hwasan-use-short-granules=0) \ $(instrumentation_flags) endif # CONFIG_KASAN_SW_TAGS From 02c587733c8161355a43e6e110c2e29bd0acff72 Mon Sep 17 00:00:00 2001 From: Walter Wu Date: Fri, 16 Apr 2021 15:46:00 -0700 Subject: [PATCH 135/182] kasan: remove redundant config option CONFIG_KASAN_STACK and CONFIG_KASAN_STACK_ENABLE both enable KASAN stack instrumentation, but we should only need one config, so that we remove CONFIG_KASAN_STACK_ENABLE and make CONFIG_KASAN_STACK workable. see [1]. When enable KASAN stack instrumentation, then for gcc we could do no prompt and default value y, and for clang prompt and default value n. This patch fixes the following compilation warning: include/linux/kasan.h:333:30: warning: 'CONFIG_KASAN_STACK' is not defined, evaluates to 0 [-Wundef] [akpm@linux-foundation.org: fix merge snafu] Link: https://bugzilla.kernel.org/show_bug.cgi?id=210221 [1] Link: https://lkml.kernel.org/r/20210226012531.29231-1-walter-zh.wu@mediatek.com Fixes: d9b571c885a8 ("kasan: fix KASAN_STACK dependency for HW_TAGS") Signed-off-by: Walter Wu Suggested-by: Dmitry Vyukov Reviewed-by: Nathan Chancellor Acked-by: Arnd Bergmann Reviewed-by: Andrey Konovalov Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Alexander Potapenko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/kernel/sleep.S | 2 +- arch/x86/kernel/acpi/wakeup_64.S | 2 +- include/linux/kasan.h | 2 +- lib/Kconfig.kasan | 9 ++------- mm/kasan/common.c | 2 +- mm/kasan/kasan.h | 2 +- mm/kasan/report_generic.c | 2 +- scripts/Makefile.kasan | 10 ++++++++-- security/Kconfig.hardening | 4 ++-- 9 files changed, 18 insertions(+), 17 deletions(-) diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index 5bfd9b87f85d..4ea9392f86e0 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -134,7 +134,7 @@ SYM_FUNC_START(_cpu_resume) */ bl cpu_do_resume -#if defined(CONFIG_KASAN) && CONFIG_KASAN_STACK +#if defined(CONFIG_KASAN) && defined(CONFIG_KASAN_STACK) mov x0, sp bl kasan_unpoison_task_stack_below #endif diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S index 56b6865afb2a..d5d8a352eafa 100644 --- a/arch/x86/kernel/acpi/wakeup_64.S +++ b/arch/x86/kernel/acpi/wakeup_64.S @@ -115,7 +115,7 @@ SYM_FUNC_START(do_suspend_lowlevel) movq pt_regs_r14(%rax), %r14 movq pt_regs_r15(%rax), %r15 -#if defined(CONFIG_KASAN) && CONFIG_KASAN_STACK +#if defined(CONFIG_KASAN) && defined(CONFIG_KASAN_STACK) /* * The suspend path may have poisoned some areas deeper in the stack, * which we now need to unpoison. diff --git a/include/linux/kasan.h b/include/linux/kasan.h index b91732bd05d7..14f72ec96492 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -330,7 +330,7 @@ static inline bool kasan_check_byte(const void *address) #endif /* CONFIG_KASAN */ -#if defined(CONFIG_KASAN) && CONFIG_KASAN_STACK +#if defined(CONFIG_KASAN) && defined(CONFIG_KASAN_STACK) void kasan_unpoison_task_stack(struct task_struct *task); #else static inline void kasan_unpoison_task_stack(struct task_struct *task) {} diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index fba9909e31b7..cffc2ebbf185 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -138,9 +138,10 @@ config KASAN_INLINE endchoice -config KASAN_STACK_ENABLE +config KASAN_STACK bool "Enable stack instrumentation (unsafe)" if CC_IS_CLANG && !COMPILE_TEST depends on KASAN_GENERIC || KASAN_SW_TAGS + default y if CC_IS_GCC help The LLVM stack address sanitizer has a know problem that causes excessive stack usage in a lot of functions, see @@ -154,12 +155,6 @@ config KASAN_STACK_ENABLE CONFIG_COMPILE_TEST. On gcc it is assumed to always be safe to use and enabled by default. -config KASAN_STACK - int - depends on KASAN_GENERIC || KASAN_SW_TAGS - default 1 if KASAN_STACK_ENABLE || CC_IS_GCC - default 0 - config KASAN_SW_TAGS_IDENTIFY bool "Enable memory corruption identification" depends on KASAN_SW_TAGS diff --git a/mm/kasan/common.c b/mm/kasan/common.c index b5e08d4cefec..7b53291dafa1 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -63,7 +63,7 @@ void __kasan_unpoison_range(const void *address, size_t size) kasan_unpoison(address, size); } -#if CONFIG_KASAN_STACK +#ifdef CONFIG_KASAN_STACK /* Unpoison the entire stack for a task. */ void kasan_unpoison_task_stack(struct task_struct *task) { diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 8c55634d6edd..3436c6bf7c0c 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -231,7 +231,7 @@ void *kasan_find_first_bad_addr(void *addr, size_t size); const char *kasan_get_bug_type(struct kasan_access_info *info); void kasan_metadata_fetch_row(char *buffer, void *row); -#if defined(CONFIG_KASAN_GENERIC) && CONFIG_KASAN_STACK +#if defined(CONFIG_KASAN_GENERIC) && defined(CONFIG_KASAN_STACK) void kasan_print_address_stack_frame(const void *addr); #else static inline void kasan_print_address_stack_frame(const void *addr) { } diff --git a/mm/kasan/report_generic.c b/mm/kasan/report_generic.c index 41f374585144..de732bc341c5 100644 --- a/mm/kasan/report_generic.c +++ b/mm/kasan/report_generic.c @@ -128,7 +128,7 @@ void kasan_metadata_fetch_row(char *buffer, void *row) memcpy(buffer, kasan_mem_to_shadow(row), META_BYTES_PER_ROW); } -#if CONFIG_KASAN_STACK +#ifdef CONFIG_KASAN_STACK static bool __must_check tokenize_frame_descr(const char **frame_descr, char *token, size_t max_tok_len, unsigned long *value) diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan index 127012f45166..3d791908ed36 100644 --- a/scripts/Makefile.kasan +++ b/scripts/Makefile.kasan @@ -4,6 +4,12 @@ KASAN_SHADOW_OFFSET ?= $(CONFIG_KASAN_SHADOW_OFFSET) cc-param = $(call cc-option, -mllvm -$(1), $(call cc-option, --param $(1))) +ifdef CONFIG_KASAN_STACK + stack_enable := 1 +else + stack_enable := 0 +endif + ifdef CONFIG_KASAN_GENERIC ifdef CONFIG_KASAN_INLINE @@ -27,7 +33,7 @@ else CFLAGS_KASAN := $(CFLAGS_KASAN_SHADOW) \ $(call cc-param,asan-globals=1) \ $(call cc-param,asan-instrumentation-with-call-threshold=$(call_threshold)) \ - $(call cc-param,asan-stack=$(CONFIG_KASAN_STACK)) \ + $(call cc-param,asan-stack=$(stack_enable)) \ $(call cc-param,asan-instrument-allocas=1) endif @@ -42,7 +48,7 @@ else endif CFLAGS_KASAN := -fsanitize=kernel-hwaddress \ - $(call cc-param,hwasan-instrument-stack=$(CONFIG_KASAN_STACK)) \ + $(call cc-param,hwasan-instrument-stack=$(stack_enable)) \ $(call cc-param,hwasan-use-short-granules=0) \ $(instrumentation_flags) diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index 269967c4fc1b..a56c36470cb1 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -64,7 +64,7 @@ choice config GCC_PLUGIN_STRUCTLEAK_BYREF bool "zero-init structs passed by reference (strong)" depends on GCC_PLUGINS - depends on !(KASAN && KASAN_STACK=1) + depends on !(KASAN && KASAN_STACK) select GCC_PLUGIN_STRUCTLEAK help Zero-initialize any structures on the stack that may @@ -82,7 +82,7 @@ choice config GCC_PLUGIN_STRUCTLEAK_BYREF_ALL bool "zero-init anything passed by reference (very strong)" depends on GCC_PLUGINS - depends on !(KASAN && KASAN_STACK=1) + depends on !(KASAN && KASAN_STACK) select GCC_PLUGIN_STRUCTLEAK help Zero-initialize any stack variables that may be passed From d199161653d612b8fb96ac51bfd5b2d2782ecef3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 16 Apr 2021 15:46:03 -0700 Subject: [PATCH 136/182] csky: change a Kconfig symbol name to fix e1000 build error e1000's #define of CONFIG_RAM_BASE conflicts with a Kconfig symbol in arch/csky/Kconfig. The symbol in e1000 has been around longer, so change arch/csky/ to use DRAM_BASE instead of RAM_BASE to remove the conflict. (although e1000 is also a 2-line change) Link: https://lkml.kernel.org/r/20210411055335.7111-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Reported-by: kernel test robot Acked-by: Guo Ren Cc: Jesse Brandeburg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/csky/Kconfig | 2 +- arch/csky/include/asm/page.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index 34e91224adc3..8de5b987edb9 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -314,7 +314,7 @@ config FORCE_MAX_ZONEORDER int "Maximum zone order" default "11" -config RAM_BASE +config DRAM_BASE hex "DRAM start addr (the same with memory-section in dts)" default 0x0 diff --git a/arch/csky/include/asm/page.h b/arch/csky/include/asm/page.h index 3b91fc3cf36f..ed7451478b1b 100644 --- a/arch/csky/include/asm/page.h +++ b/arch/csky/include/asm/page.h @@ -28,7 +28,7 @@ #define SSEG_SIZE 0x20000000 #define LOWMEM_LIMIT (SSEG_SIZE * 2) -#define PHYS_OFFSET_OFFSET (CONFIG_RAM_BASE & (SSEG_SIZE - 1)) +#define PHYS_OFFSET_OFFSET (CONFIG_DRAM_BASE & (SSEG_SIZE - 1)) #ifndef __ASSEMBLY__ From 19d000d93303e05bd7b1326e3de9df05a41b25b5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 16 Apr 2021 15:46:06 -0700 Subject: [PATCH 137/182] ia64: remove duplicate entries in generic_defconfig Fix ia64 generic_defconfig duplicate entries, as warned by: arch/ia64/configs/generic_defconfig: warning: override: reassigning to symbol ATA: => 58 arch/ia64/configs/generic_defconfig: warning: override: reassigning to symbol ATA_PIIX: => 59 These 2 symbols still have the same value as in the removed lines. Link: https://lkml.kernel.org/r/20210411020255.18052-1-rdunlap@infradead.org Fixes: c331649e6371 ("ia64: Use libata instead of the legacy ide driver in defconfigs") Signed-off-by: Randy Dunlap Reported-by: Geert Uytterhoeven Reviewed-by: Christoph Hellwig Cc: Tony Luck Cc: Fenghua Yu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/configs/generic_defconfig | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/ia64/configs/generic_defconfig b/arch/ia64/configs/generic_defconfig index ca0d596c800d..8916a2850c48 100644 --- a/arch/ia64/configs/generic_defconfig +++ b/arch/ia64/configs/generic_defconfig @@ -55,8 +55,6 @@ CONFIG_CHR_DEV_SG=m CONFIG_SCSI_FC_ATTRS=y CONFIG_SCSI_SYM53C8XX_2=y CONFIG_SCSI_QLOGIC_1280=y -CONFIG_ATA=y -CONFIG_ATA_PIIX=y CONFIG_SATA_VITESSE=y CONFIG_MD=y CONFIG_BLK_DEV_MD=m From e2af9da4f867a1a54f1252bf3abc1a5c63951778 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 16 Apr 2021 15:46:09 -0700 Subject: [PATCH 138/182] ia64: fix discontig.c section mismatches Fix IA64 discontig.c Section mismatch warnings. When CONFIG_SPARSEMEM=y and CONFIG_MEMORY_HOTPLUG=y, the functions computer_pernodesize() and scatter_node_data() should not be marked as __meminit because they are needed after init, on any memory hotplug event. Also, early_nr_cpus_node() is called by compute_pernodesize(), so early_nr_cpus_node() cannot be __meminit either. WARNING: modpost: vmlinux.o(.text.unlikely+0x1612): Section mismatch in reference from the function arch_alloc_nodedata() to the function .meminit.text:compute_pernodesize() The function arch_alloc_nodedata() references the function __meminit compute_pernodesize(). This is often because arch_alloc_nodedata lacks a __meminit annotation or the annotation of compute_pernodesize is wrong. WARNING: modpost: vmlinux.o(.text.unlikely+0x1692): Section mismatch in reference from the function arch_refresh_nodedata() to the function .meminit.text:scatter_node_data() The function arch_refresh_nodedata() references the function __meminit scatter_node_data(). This is often because arch_refresh_nodedata lacks a __meminit annotation or the annotation of scatter_node_data is wrong. WARNING: modpost: vmlinux.o(.text.unlikely+0x1502): Section mismatch in reference from the function compute_pernodesize() to the function .meminit.text:early_nr_cpus_node() The function compute_pernodesize() references the function __meminit early_nr_cpus_node(). This is often because compute_pernodesize lacks a __meminit annotation or the annotation of early_nr_cpus_node is wrong. Link: https://lkml.kernel.org/r/20210411001201.3069-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/mm/discontig.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 03b3a02375ff..c310b4c99fb3 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -95,7 +95,7 @@ static int __init build_node_maps(unsigned long start, unsigned long len, * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been * called yet. Note that node 0 will also count all non-existent cpus. */ -static int __meminit early_nr_cpus_node(int node) +static int early_nr_cpus_node(int node) { int cpu, n = 0; @@ -110,7 +110,7 @@ static int __meminit early_nr_cpus_node(int node) * compute_pernodesize - compute size of pernode data * @node: the node id. */ -static unsigned long __meminit compute_pernodesize(int node) +static unsigned long compute_pernodesize(int node) { unsigned long pernodesize = 0, cpus; @@ -367,7 +367,7 @@ static void __init reserve_pernode_space(void) } } -static void __meminit scatter_node_data(void) +static void scatter_node_data(void) { pg_data_t **dst; int node; From 17786fea414393813b56e33a1a01b2dfa03c0915 Mon Sep 17 00:00:00 2001 From: John Paul Adrian Glaubitz Date: Fri, 16 Apr 2021 15:46:12 -0700 Subject: [PATCH 139/182] ia64: tools: remove inclusion of ia64-specific version of errno.h header There is no longer an ia64-specific version of the errno.h header below arch/ia64/include/uapi/asm/, so trying to build tools/bpf fails with: CC /usr/src/linux/tools/bpf/bpftool/btf_dumper.o In file included from /usr/src/linux/tools/include/linux/err.h:8, from btf_dumper.c:11: /usr/src/linux/tools/include/uapi/asm/errno.h:13:10: fatal error: ../../../arch/ia64/include/uapi/asm/errno.h: No such file or directory 13 | #include "../../../arch/ia64/include/uapi/asm/errno.h" | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compilation terminated. Thus, just remove the inclusion of the ia64-specific errno.h so that the build will use the generic errno.h header on this target which was used there anyway as the ia64-specific errno.h was just a wrapper for the generic header. Fixes: c25f867ddd00 ("ia64: remove unneeded uapi asm-generic wrappers") Signed-off-by: John Paul Adrian Glaubitz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/include/uapi/asm/errno.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/include/uapi/asm/errno.h b/tools/include/uapi/asm/errno.h index 637189ec1ab9..d30439b4b8ab 100644 --- a/tools/include/uapi/asm/errno.h +++ b/tools/include/uapi/asm/errno.h @@ -9,8 +9,6 @@ #include "../../../arch/alpha/include/uapi/asm/errno.h" #elif defined(__mips__) #include "../../../arch/mips/include/uapi/asm/errno.h" -#elif defined(__ia64__) -#include "../../../arch/ia64/include/uapi/asm/errno.h" #elif defined(__xtensa__) #include "../../../arch/xtensa/include/uapi/asm/errno.h" #else From f4bf09dc3aaa4b07cd15630f2023f68cb2668809 Mon Sep 17 00:00:00 2001 From: John Paul Adrian Glaubitz Date: Fri, 16 Apr 2021 15:46:15 -0700 Subject: [PATCH 140/182] ia64: tools: remove duplicate definition of ia64_mf() on ia64 The ia64_mf() macro defined in tools/arch/ia64/include/asm/barrier.h is already defined in on ia64 which causes libbpf failing to build: CC /usr/src/linux/tools/bpf/bpftool//libbpf/staticobjs/libbpf.o In file included from /usr/src/linux/tools/include/asm/barrier.h:24, from /usr/src/linux/tools/include/linux/ring_buffer.h:4, from libbpf.c:37: /usr/src/linux/tools/include/asm/../../arch/ia64/include/asm/barrier.h:43: error: "ia64_mf" redefined [-Werror] 43 | #define ia64_mf() asm volatile ("mf" ::: "memory") | In file included from /usr/include/ia64-linux-gnu/asm/intrinsics.h:20, from /usr/include/ia64-linux-gnu/asm/swab.h:11, from /usr/include/linux/swab.h:8, from /usr/include/linux/byteorder/little_endian.h:13, from /usr/include/ia64-linux-gnu/asm/byteorder.h:5, from /usr/src/linux/tools/include/uapi/linux/perf_event.h:20, from libbpf.c:36: /usr/include/ia64-linux-gnu/asm/gcc_intrin.h:382: note: this is the location of the previous definition 382 | #define ia64_mf() __asm__ volatile ("mf" ::: "memory") | cc1: all warnings being treated as errors Thus, remove the definition from tools/arch/ia64/include/asm/barrier.h. Signed-off-by: John Paul Adrian Glaubitz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/arch/ia64/include/asm/barrier.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/arch/ia64/include/asm/barrier.h b/tools/arch/ia64/include/asm/barrier.h index 4d471d9511a5..6fffe5682713 100644 --- a/tools/arch/ia64/include/asm/barrier.h +++ b/tools/arch/ia64/include/asm/barrier.h @@ -39,9 +39,6 @@ * sequential memory pages only. */ -/* XXX From arch/ia64/include/uapi/asm/gcc_intrin.h */ -#define ia64_mf() asm volatile ("mf" ::: "memory") - #define mb() ia64_mf() #define rmb() mb() #define wmb() mb() From 94036f4c884377bdf2da1ba7666c9599d6df0191 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Fri, 16 Apr 2021 15:46:18 -0700 Subject: [PATCH 141/182] mm/mapping_dirty_helpers: guard hugepage pud's usage Mapping dirty helpers have, so far, been only used on X86, but a port of vmwgfx to ARM64 exposed a problem which results in a compilation error on ARM64 systems: mm/mapping_dirty_helpers.c: In function `wp_clean_pud_entry': mm/mapping_dirty_helpers.c:172:32: error: implicit declaration of function `pud_dirty'; did you mean `pmd_dirty'? [-Werror=implicit-function-declaration] This is due to the fact that mapping_dirty_helpers code assumes that pud_dirty is always defined, which is not the case for architectures that don't define CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD. ARM64 arch is a little inconsistent when it comes to PUD hugepage helpers, e.g. it defines pud_young but not pud_dirty but regardless of that the core kernel code shouldn't assume that any of the PUD hugepage helpers are available unless CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD is defined. This prevents compilation errors whenever one of the drivers is ported to new architectures. Link: https://lkml.kernel.org/r/20210409165151.694574-1-zackr@vmware.com Signed-off-by: Zack Rusin Reviewed-by: Thomas Hellstrm (Intel) Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mapping_dirty_helpers.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/mapping_dirty_helpers.c b/mm/mapping_dirty_helpers.c index b59054ef2e10..b890854ec761 100644 --- a/mm/mapping_dirty_helpers.c +++ b/mm/mapping_dirty_helpers.c @@ -165,10 +165,12 @@ static int wp_clean_pud_entry(pud_t *pud, unsigned long addr, unsigned long end, return 0; } +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD /* Huge pud */ walk->action = ACTION_CONTINUE; if (pud_trans_huge(pudval) || pud_devmap(pudval)) WARN_ON(pud_write(pudval) || pud_dirty(pudval)); +#endif return 0; } From 458376913d86bed2fb781b4952eb6861675ef3be Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 16 Apr 2021 15:46:20 -0700 Subject: [PATCH 142/182] mm: ptdump: fix build failure READ_ONCE() cannot be used for reading PTEs. Use ptep_get() instead, to avoid the following errors: CC mm/ptdump.o In file included from : mm/ptdump.c: In function 'ptdump_pte_entry': include/linux/compiler_types.h:320:38: error: call to '__compiletime_assert_207' declared with attribute error: Unsupported access size for {READ,WRITE}_ONCE(). 320 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) | ^ include/linux/compiler_types.h:301:4: note: in definition of macro '__compiletime_assert' 301 | prefix ## suffix(); \ | ^~~~~~ include/linux/compiler_types.h:320:2: note: in expansion of macro '_compiletime_assert' 320 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) | ^~~~~~~~~~~~~~~~~~~ include/asm-generic/rwonce.h:36:2: note: in expansion of macro 'compiletime_assert' 36 | compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \ | ^~~~~~~~~~~~~~~~~~ include/asm-generic/rwonce.h:49:2: note: in expansion of macro 'compiletime_assert_rwonce_type' 49 | compiletime_assert_rwonce_type(x); \ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mm/ptdump.c:114:14: note: in expansion of macro 'READ_ONCE' 114 | pte_t val = READ_ONCE(*pte); | ^~~~~~~~~ make[2]: *** [mm/ptdump.o] Error 1 See commit 481e980a7c19 ("mm: Allow arches to provide ptep_get()") and commit c0e1c8c22beb ("powerpc/8xx: Provide ptep_get() with 16k pages") for details. Link: https://lkml.kernel.org/r/912b349e2bcaa88939904815ca0af945740c6bd4.1618478922.git.christophe.leroy@csgroup.eu Fixes: 30d621f6723b ("mm: add generic ptdump") Signed-off-by: Christophe Leroy Cc: Steven Price Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/ptdump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/ptdump.c b/mm/ptdump.c index 4354c1422d57..da751448d0e4 100644 --- a/mm/ptdump.c +++ b/mm/ptdump.c @@ -111,7 +111,7 @@ static int ptdump_pte_entry(pte_t *pte, unsigned long addr, unsigned long next, struct mm_walk *walk) { struct ptdump_state *st = walk->private; - pte_t val = READ_ONCE(*pte); + pte_t val = ptep_get(pte); if (st->effective_prot) st->effective_prot(st, 4, pte_val(val)); From 04c53de57cb6435738961dace8b1b71d3ecd3c39 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 16 Apr 2021 15:46:23 -0700 Subject: [PATCH 143/182] gcov: clang: fix clang-11+ build With clang-11+, the code is broken due to my kvmalloc() conversion (which predated the clang-11 support code) leaving one vmalloc() in place. Fix that. Link: https://lkml.kernel.org/r/20210412214210.6e1ecca9cdc5.I24459763acf0591d5e6b31c7e3a59890d802f79c@changeid Signed-off-by: Johannes Berg Reviewed-by: Nick Desaulniers Tested-by: Nick Desaulniers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/gcov/clang.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/gcov/clang.c b/kernel/gcov/clang.c index c466c7fbdece..b81f2823630d 100644 --- a/kernel/gcov/clang.c +++ b/kernel/gcov/clang.c @@ -369,7 +369,7 @@ static struct gcov_fn_info *gcov_fn_info_dup(struct gcov_fn_info *fn) INIT_LIST_HEAD(&fn_dup->head); cv_size = fn->num_counters * sizeof(fn->counters[0]); - fn_dup->counters = vmalloc(cv_size); + fn_dup->counters = kvmalloc(cv_size, GFP_KERNEL); if (!fn_dup->counters) { kfree(fn_dup); return NULL; From c95c2d328cd051484bea161e66dfa715c02a7d7e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 16 Apr 2021 15:46:26 -0700 Subject: [PATCH 144/182] lib: remove "expecting prototype" kernel-doc warnings Fix various kernel-doc warnings in lib/ due to missing or erroneous function names. Add kernel-doc for some function parameters that was missing. Use kernel-doc "Return:" notation in earlycpio.c. Quietens the following warnings: lib/earlycpio.c:61: warning: expecting prototype for cpio_data find_cpio_data(). Prototype was for find_cpio_data() instead lib/lru_cache.c:640: warning: expecting prototype for lc_dump(). Prototype was for lc_seq_dump_details() instead lru_cache.c:90: warning: Function parameter or member 'cache' not described in 'lc_create' lib/parman.c:368: warning: expecting prototype for parman_item_del(). Prototype was for parman_item_remove() instead parman.c:309: warning: Excess function parameter 'prority' description in 'parman_prio_init' lib/radix-tree.c:703: warning: expecting prototype for __radix_tree_insert(). Prototype was for radix_tree_insert() instead radix-tree.c:180: warning: Excess function parameter 'addr' description in 'radix_tree_find_next_bit' radix-tree.c:180: warning: Excess function parameter 'size' description in 'radix_tree_find_next_bit' radix-tree.c:931: warning: Function parameter or member 'iter' not described in 'radix_tree_iter_replace' Link: https://lkml.kernel.org/r/20210411221756.15461-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Cc: Philipp Reisner Cc: Lars Ellenberg Cc: Jiri Pirko Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/earlycpio.c | 4 ++-- lib/lru_cache.c | 3 ++- lib/parman.c | 4 ++-- lib/radix-tree.c | 11 ++++++----- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/lib/earlycpio.c b/lib/earlycpio.c index e83628882001..7921193f0424 100644 --- a/lib/earlycpio.c +++ b/lib/earlycpio.c @@ -40,7 +40,7 @@ enum cpio_fields { }; /** - * cpio_data find_cpio_data - Search for files in an uncompressed cpio + * find_cpio_data - Search for files in an uncompressed cpio * @path: The directory to search for, including a slash at the end * @data: Pointer to the cpio archive or a header inside * @len: Remaining length of the cpio based on data pointer @@ -49,7 +49,7 @@ enum cpio_fields { * matching file itself. It can be used to iterate through the cpio * to find all files inside of a directory path. * - * @return: struct cpio_data containing the address, length and + * Return: &struct cpio_data containing the address, length and * filename (with the directory path cut off) of the found file. * If you search for a filename and not for files in a directory, * pass the absolute path of the filename in the cpio and make sure diff --git a/lib/lru_cache.c b/lib/lru_cache.c index c69ee53d8dde..52313acbfa62 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c @@ -76,6 +76,7 @@ int lc_try_lock(struct lru_cache *lc) /** * lc_create - prepares to track objects in an active set * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details + * @cache: cache root pointer * @max_pending_changes: maximum changes to accumulate until a transaction is required * @e_count: number of elements allowed to be active simultaneously * @e_size: size of the tracked objects @@ -627,7 +628,7 @@ void lc_set(struct lru_cache *lc, unsigned int enr, int index) } /** - * lc_dump - Dump a complete LRU cache to seq in textual form. + * lc_seq_dump_details - Dump a complete LRU cache to seq in textual form. * @lc: the lru cache to operate on * @seq: the &struct seq_file pointer to seq_printf into * @utext: user supplied additional "heading" or other info diff --git a/lib/parman.c b/lib/parman.c index a11f2f667639..3f8f8d422e62 100644 --- a/lib/parman.c +++ b/lib/parman.c @@ -297,7 +297,7 @@ EXPORT_SYMBOL(parman_destroy); * parman_prio_init - initializes a parman priority chunk * @parman: parman instance * @prio: parman prio structure to be initialized - * @prority: desired priority of the chunk + * @priority: desired priority of the chunk * * Note: all locking must be provided by the caller. * @@ -356,7 +356,7 @@ int parman_item_add(struct parman *parman, struct parman_prio *prio, EXPORT_SYMBOL(parman_item_add); /** - * parman_item_del - deletes parman item + * parman_item_remove - deletes parman item * @parman: parman instance * @prio: parman prio instance to delete the item from * @item: parman item instance diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 3a4da11b804d..b3afafe46fff 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -166,9 +166,9 @@ static inline void all_tag_set(struct radix_tree_node *node, unsigned int tag) /** * radix_tree_find_next_bit - find the next set bit in a memory region * - * @addr: The address to base the search on - * @size: The bitmap size in bits - * @offset: The bitnumber to start searching at + * @node: where to begin the search + * @tag: the tag index + * @offset: the bitnumber to start searching at * * Unrollable variant of find_next_bit() for constant size arrays. * Tail bits starting from size to roundup(size, BITS_PER_LONG) must be zero. @@ -461,7 +461,7 @@ static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp, /** * radix_tree_shrink - shrink radix tree to minimum height - * @root radix tree root + * @root: radix tree root */ static inline bool radix_tree_shrink(struct radix_tree_root *root) { @@ -691,7 +691,7 @@ static inline int insert_entries(struct radix_tree_node *node, } /** - * __radix_tree_insert - insert into a radix tree + * radix_tree_insert - insert into a radix tree * @root: radix tree root * @index: index key * @item: item to insert @@ -919,6 +919,7 @@ EXPORT_SYMBOL(radix_tree_replace_slot); /** * radix_tree_iter_replace - replace item in a slot * @root: radix tree root + * @iter: iterator state * @slot: pointer to slot * @item: new item to store in the slot. * From f2764bd4f6a8dffaec3e220728385d9756b3c2cb Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 16 Apr 2021 21:29:13 +0200 Subject: [PATCH 145/182] netlink: don't call ->netlink_bind with table lock held When I added support to allow generic netlink multicast groups to be restricted to subscribers with CAP_NET_ADMIN I was unaware that a genl_bind implementation already existed in the past. It was reverted due to ABBA deadlock: 1. ->netlink_bind gets called with the table lock held. 2. genetlink bind callback is invoked, it grabs the genl lock. But when a new genl subsystem is (un)registered, these two locks are taken in reverse order. One solution would be to revert again and add a comment in genl referring 1e82a62fec613, "genetlink: remove genl_bind"). This would need a second change in mptcp to not expose the raw token value anymore, e.g. by hashing the token with a secret key so userspace can still associate subflow events with the correct mptcp connection. However, Paolo Abeni reminded me to double-check why the netlink table is locked in the first place. I can't find one. netlink_bind() is already called without this lock when userspace joins a group via NETLINK_ADD_MEMBERSHIP setsockopt. Same holds for the netlink_unbind operation. Digging through the history, commit f773608026ee1 ("netlink: access nlk groups safely in netlink bind and getname") expanded the lock scope. commit 3a20773beeeeade ("net: netlink: cap max groups which will be considered in netlink_bind()") ... removed the nlk->ngroups access that the lock scope extension was all about. Reduce the lock scope again and always call ->netlink_bind without the table lock. The Fixes tag should be vs. the patch mentioned in the link below, but that one got squash-merged into the patch that came earlier in the series. Fixes: 4d54cc32112d8d ("mptcp: avoid lock_fast usage in accept path") Link: https://lore.kernel.org/mptcp/20210213000001.379332-8-mathew.j.martineau@linux.intel.com/T/#u Cc: Cong Wang Cc: Xin Long Cc: Johannes Berg Cc: Sean Tranchetti Cc: Paolo Abeni Cc: Pablo Neira Ayuso Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index dd488938447f..3a62f97acf39 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1019,7 +1019,6 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, return -EINVAL; } - netlink_lock_table(); if (nlk->netlink_bind && groups) { int group; @@ -1031,13 +1030,14 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, if (!err) continue; netlink_undo_bind(group, groups, sk); - goto unlock; + return err; } } /* No need for barriers here as we return to user-space without * using any of the bound attributes. */ + netlink_lock_table(); if (!bound) { err = nladdr->nl_pid ? netlink_insert(sk, nladdr->nl_pid) : From fae8817ae804a682c6823ad1672438f39fc46c28 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 16 Apr 2021 17:43:30 -0700 Subject: [PATCH 146/182] cxl/mem: Fix memory device capacity probing The CXL Identify Memory Device output payload emits capacity in 256MB units. The driver is treating the capacity field as bytes. This was missed because QEMU reports bytes when it should report bytes / 256MB. Fixes: 8adaf747c9f0 ("cxl/mem: Find device capabilities") Reviewed-by: Vishal Verma Cc: Ben Widawsky Link: https://lore.kernel.org/r/161862021044.3259705.7008520073059739760.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/mem.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 1b5078311f7d..2acc6173da36 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -1419,6 +1420,7 @@ static int cxl_mem_enumerate_cmds(struct cxl_mem *cxlm) */ static int cxl_mem_identify(struct cxl_mem *cxlm) { + /* See CXL 2.0 Table 175 Identify Memory Device Output Payload */ struct cxl_mbox_identify { char fw_revision[0x10]; __le64 total_capacity; @@ -1447,10 +1449,11 @@ static int cxl_mem_identify(struct cxl_mem *cxlm) * For now, only the capacity is exported in sysfs */ cxlm->ram_range.start = 0; - cxlm->ram_range.end = le64_to_cpu(id.volatile_capacity) - 1; + cxlm->ram_range.end = le64_to_cpu(id.volatile_capacity) * SZ_256M - 1; cxlm->pmem_range.start = 0; - cxlm->pmem_range.end = le64_to_cpu(id.persistent_capacity) - 1; + cxlm->pmem_range.end = + le64_to_cpu(id.persistent_capacity) * SZ_256M - 1; memcpy(cxlm->firmware_version, id.fw_revision, sizeof(id.fw_revision)); From 84a24bf8c52e66b7ac89ada5e3cfbe72d65c1896 Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Thu, 15 Apr 2021 17:27:11 +0000 Subject: [PATCH 147/182] locking/qrwlock: Fix ordering in queued_write_lock_slowpath() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While this code is executed with the wait_lock held, a reader can acquire the lock without holding wait_lock. The writer side loops checking the value with the atomic_cond_read_acquire(), but only truly acquires the lock when the compare-and-exchange is completed successfully which isn’t ordered. This exposes the window between the acquire and the cmpxchg to an A-B-A problem which allows reads following the lock acquisition to observe values speculatively before the write lock is truly acquired. We've seen a problem in epoll where the reader does a xchg while holding the read lock, but the writer can see a value change out from under it. Writer | Reader -------------------------------------------------------------------------------- ep_scan_ready_list() | |- write_lock_irq() | |- queued_write_lock_slowpath() | |- atomic_cond_read_acquire() | | read_lock_irqsave(&ep->lock, flags); --> (observes value before unlock) | chain_epi_lockless() | | epi->next = xchg(&ep->ovflist, epi); | | read_unlock_irqrestore(&ep->lock, flags); | | | atomic_cmpxchg_relaxed() | |-- READ_ONCE(ep->ovflist); | A core can order the read of the ovflist ahead of the atomic_cmpxchg_relaxed(). Switching the cmpxchg to use acquire semantics addresses this issue at which point the atomic_cond_read can be switched to use relaxed semantics. Fixes: b519b56e378ee ("locking/qrwlock: Use atomic_cond_read_acquire() when spinning in qrwlock") Signed-off-by: Ali Saidi [peterz: use try_cmpxchg()] Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Steve Capper Acked-by: Will Deacon Acked-by: Waiman Long Tested-by: Steve Capper --- kernel/locking/qrwlock.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c index 4786dd271b45..b94f3831e963 100644 --- a/kernel/locking/qrwlock.c +++ b/kernel/locking/qrwlock.c @@ -60,6 +60,8 @@ EXPORT_SYMBOL(queued_read_lock_slowpath); */ void queued_write_lock_slowpath(struct qrwlock *lock) { + int cnts; + /* Put the writer into the wait queue */ arch_spin_lock(&lock->wait_lock); @@ -73,9 +75,8 @@ void queued_write_lock_slowpath(struct qrwlock *lock) /* When no more readers or writers, set the locked flag */ do { - atomic_cond_read_acquire(&lock->cnts, VAL == _QW_WAITING); - } while (atomic_cmpxchg_relaxed(&lock->cnts, _QW_WAITING, - _QW_LOCKED) != _QW_WAITING); + cnts = atomic_cond_read_relaxed(&lock->cnts, VAL == _QW_WAITING); + } while (!atomic_try_cmpxchg_acquire(&lock->cnts, &cnts, _QW_LOCKED)); unlock: arch_spin_unlock(&lock->wait_lock); } From 0c93ac69407d63a85be0129aa55ffaec27ffebd3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 17 Apr 2021 09:27:04 -0700 Subject: [PATCH 148/182] readdir: make sure to verify directory entry for legacy interfaces too This does the directory entry name verification for the legacy "fillonedir" (and compat) interface that goes all the way back to the dark ages before we had a proper dirent, and the readdir() system call returned just a single entry at a time. Nobody should use this interface unless you still have binaries from 1991, but let's do it right. This came up during discussions about unsafe_copy_to_user() and proper checking of all the inputs to it, as the networking layer is looking to use it in a few new places. So let's make sure the _old_ users do it all right and proper, before we add new ones. See also commit 8a23eb804ca4 ("Make filldir[64]() verify the directory entry filename is valid") which did the proper modern interfaces that people actually use. It had a note: Note that I didn't bother adding the checks to any legacy interfaces that nobody uses. which this now corrects. Note that we really don't care about POSIX and the presense of '/' in a directory entry, but verify_dirent_name() also ends up doing the proper name length verification which is what the input checking discussion was about. [ Another option would be to remove the support for this particular very old interface: any binaries that use it are likely a.out binaries, and they will no longer run anyway since we removed a.out binftm support in commit eac616557050 ("x86: Deprecate a.out support"). But I'm not sure which came first: getdents() or ELF support, so let's pretend somebody might still have a working binary that uses the legacy readdir() case.. ] Link: https://lore.kernel.org/lkml/CAHk-=wjbvzCAhAtvG0d81W5o0-KT5PPTHhfJ5ieDFq+bGtgOYg@mail.gmail.com/ Acked-by: Al Viro Signed-off-by: Linus Torvalds --- fs/readdir.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/readdir.c b/fs/readdir.c index 19434b3c982c..09e8ed7d4161 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -150,6 +150,9 @@ static int fillonedir(struct dir_context *ctx, const char *name, int namlen, if (buf->result) return -EINVAL; + buf->result = verify_dirent_name(name, namlen); + if (buf->result < 0) + return buf->result; d_ino = ino; if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { buf->result = -EOVERFLOW; @@ -405,6 +408,9 @@ static int compat_fillonedir(struct dir_context *ctx, const char *name, if (buf->result) return -EINVAL; + buf->result = verify_dirent_name(name, namlen); + if (buf->result < 0) + return buf->result; d_ino = ino; if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { buf->result = -EOVERFLOW; From d2f7eca60b29006285d57c7035539e33300e89e5 Mon Sep 17 00:00:00 2001 From: Fredrik Strupe Date: Mon, 5 Apr 2021 21:52:05 +0100 Subject: [PATCH 149/182] ARM: 9071/1: uprobes: Don't hook on thumb instructions Since uprobes is not supported for thumb, check that the thumb bit is not set when matching the uprobes instruction hooks. The Arm UDF instructions used for uprobes triggering (UPROBE_SWBP_ARM_INSN and UPROBE_SS_ARM_INSN) coincidentally share the same encoding as a pair of unallocated 32-bit thumb instructions (not UDF) when the condition code is 0b1111 (0xf). This in effect makes it possible to trigger the uprobes functionality from thumb, and at that using two unallocated instructions which are not permanently undefined. Signed-off-by: Fredrik Strupe Cc: stable@vger.kernel.org Fixes: c7edc9e326d5 ("ARM: add uprobes support") Signed-off-by: Russell King --- arch/arm/probes/uprobes/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/probes/uprobes/core.c b/arch/arm/probes/uprobes/core.c index c4b49b322e8a..f5f790c6e5f8 100644 --- a/arch/arm/probes/uprobes/core.c +++ b/arch/arm/probes/uprobes/core.c @@ -204,7 +204,7 @@ unsigned long uprobe_get_swbp_addr(struct pt_regs *regs) static struct undef_hook uprobes_arm_break_hook = { .instr_mask = 0x0fffffff, .instr_val = (UPROBE_SWBP_ARM_INSN & 0x0fffffff), - .cpsr_mask = MODE_MASK, + .cpsr_mask = (PSR_T_BIT | MODE_MASK), .cpsr_val = USR_MODE, .fn = uprobe_trap_handler, }; @@ -212,7 +212,7 @@ static struct undef_hook uprobes_arm_break_hook = { static struct undef_hook uprobes_arm_ss_hook = { .instr_mask = 0x0fffffff, .instr_val = (UPROBE_SS_ARM_INSN & 0x0fffffff), - .cpsr_mask = MODE_MASK, + .cpsr_mask = (PSR_T_BIT | MODE_MASK), .cpsr_val = USR_MODE, .fn = uprobe_trap_handler, }; From bf05bf16c76bb44ab5156223e1e58e26dfe30a88 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 18 Apr 2021 14:45:32 -0700 Subject: [PATCH 150/182] Linux 5.12-rc8 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4730cf156f6b..bc19584fee59 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 12 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = -rc8 NAME = Frozen Wasteland # *DOCUMENTATION* From 7412dee9f1fd3e224202b633fdfa6eeaebe0307e Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Fri, 16 Apr 2021 11:43:47 +0200 Subject: [PATCH 151/182] mmc: meson-gx: replace WARN_ONCE with dev_warn_once about scatterlist size alignment in block mode Since commit e085b51c74cc ("mmc: meson-gx: check for scatterlist size alignment in block mode"), support for SDIO SD_IO_RW_EXTENDED transferts are properly filtered but some driver like brcmfmac still gives a block sg buffer size not aligned with SDIO block, triggerring a WARN_ONCE() with scary stacktrace even if the transfer works fine but with possible degraded performances. Simply replace with dev_warn_once() to inform user this should be fixed to avoid degraded performance. This should be ultimately fixed in brcmfmac, but since it's only a performance issue the warning should be removed. Fixes: e085b51c74cc ("mmc: meson-gx: check for scatterlist size alignment in block mode") Reported-by: Marek Szyprowski Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20210416094347.2015896-1-narmstrong@baylibre.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/meson-gx-mmc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c index eb6c02bc4a02..b8b771b643cc 100644 --- a/drivers/mmc/host/meson-gx-mmc.c +++ b/drivers/mmc/host/meson-gx-mmc.c @@ -247,8 +247,9 @@ static void meson_mmc_get_transfer_mode(struct mmc_host *mmc, */ for_each_sg(data->sg, sg, data->sg_len, i) { if (sg->length % data->blksz) { - WARN_ONCE(1, "unaligned sg len %u blksize %u\n", - sg->length, data->blksz); + dev_warn_once(mmc_dev(mmc), + "unaligned sg len %u blksize %u, disabling descriptor DMA for transfer\n", + sg->length, data->blksz); return; } } From 0c89d87d1d43d9fa268d1dc489518564d58bf497 Mon Sep 17 00:00:00 2001 From: Zhouyi Zhou Date: Sat, 10 Apr 2021 15:35:23 +0800 Subject: [PATCH 152/182] preempt/dynamic: Fix typo in macro conditional statement Commit 40607ee97e4e ("preempt/dynamic: Provide irqentry_exit_cond_resched() static call") tried to provide irqentry_exit_cond_resched() static call in irqentry_exit, but has a typo in macro conditional statement. Fixes: 40607ee97e4e ("preempt/dynamic: Provide irqentry_exit_cond_resched() static call") Signed-off-by: Zhouyi Zhou Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210410073523.5493-1-zhouzhouyi@gmail.com --- kernel/entry/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/entry/common.c b/kernel/entry/common.c index 8442e5c9cfa2..2003d69bd6d5 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -422,7 +422,7 @@ noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state) instrumentation_begin(); if (IS_ENABLED(CONFIG_PREEMPTION)) { -#ifdef CONFIG_PREEMT_DYNAMIC +#ifdef CONFIG_PREEMPT_DYNAMIC static_call(irqentry_exit_cond_resched)(); #else irqentry_exit_cond_resched(); From d2b9935d65dab6e92beb33c150c1a6ded14ab670 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 13 Apr 2021 02:24:12 +0300 Subject: [PATCH 153/182] drm/i915: Fix modesetting in case of unexpected AUX timeouts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In case AUX failures happen unexpectedly during a modeset, the driver should still complete the modeset. In particular the driver should perform the link training sequence steps even in case of an AUX failure, as this sequence also includes port initialization steps. Not doing that can leave the port/pipe in a broken state and lead for instance to a flip done timeout. Fix this by continuing with link training (in a no-LTTPR mode) if the DPRX DPCD readout failed for some reason at the beginning of link training. After a successful connector detection we already have the DPCD read out and cached, so the failed repeated read for it should not cause a problem. Note that a partial AUX read could in theory partly overwrite the cached DPCD (and return error) but this overwrite should not happen if the returned values are corrupted (due to a timeout or some other IO error). Kudos to Ville to root cause the problem. Fixes: 7dffbdedb96a ("drm/i915: Disable LTTPR support when the DPCD rev < 1.4") References: https://gitlab.freedesktop.org/drm/intel/-/issues/3308 Cc: stable@vger.kernel.org # 5.11 Cc: Ville Syrjälä Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210412232413.2755054-1-imre.deak@intel.com (cherry picked from commit e42e7e585984b85b0fb9dd1fefc85ee4800ca629) Signed-off-by: Rodrigo Vivi [adjusted Fixes: tag] --- drivers/gpu/drm/i915/display/intel_dp_link_training.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index be6ac0dd846e..2ed309534e97 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -848,7 +848,8 @@ void intel_dp_start_link_train(struct intel_dp *intel_dp, int lttpr_count = intel_dp_init_lttpr_and_dprx_caps(intel_dp); if (lttpr_count < 0) - return; + /* Still continue with enabling the port and link training. */ + lttpr_count = 0; if (!intel_dp_link_train_all_phys(intel_dp, crtc_state, lttpr_count)) intel_dp_schedule_fallback_link_training(intel_dp, crtc_state); From 7af08140979a6e7e12b78c93b8625c8d25b084e2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 19 Apr 2021 15:08:49 -0700 Subject: [PATCH 154/182] Revert "gcov: clang: fix clang-11+ build" This reverts commit 04c53de57cb6435738961dace8b1b71d3ecd3c39. Nathan Chancellor points out that it should not have been merged into mainline by itself. It was a fix for "gcov: use kvmalloc()", which is still in -mm/-next. Merging it alone has broken the build. Link: https://github.com/ClangBuiltLinux/continuous-integration2/runs/2384465683?check_suite_focus=true Reported-by: Nathan Chancellor Cc: Johannes Berg Cc: Nick Desaulniers Cc: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/gcov/clang.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/gcov/clang.c b/kernel/gcov/clang.c index b81f2823630d..c466c7fbdece 100644 --- a/kernel/gcov/clang.c +++ b/kernel/gcov/clang.c @@ -369,7 +369,7 @@ static struct gcov_fn_info *gcov_fn_info_dup(struct gcov_fn_info *fn) INIT_LIST_HEAD(&fn_dup->head); cv_size = fn->num_counters * sizeof(fn->counters[0]); - fn_dup->counters = kvmalloc(cv_size, GFP_KERNEL); + fn_dup->counters = vmalloc(cv_size); if (!fn_dup->counters) { kfree(fn_dup); return NULL; From 0e1e71d34901a633825cd5ae78efaf8abd9215c6 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 19 Apr 2021 14:23:12 -0400 Subject: [PATCH 155/182] tracing: Fix checking event hash pointer logic when tp_printk is enabled Pointers in events that are printed are unhashed if the flags allow it, and the logic to do so is called before processing the event output from the raw ring buffer. In most cases, this is done when a user reads one of the trace files. But if tp_printk is added on the kernel command line, this logic is done for trace events when they are triggered, and their output goes out via printk. The unhash logic (and even the validation of the output) did not support the tp_printk output, and would crash. Link: https://lore.kernel.org/linux-tegra/9835d9f1-8d3a-3440-c53f-516c2606ad07@nvidia.com/ Fixes: efbbdaa22bb7 ("tracing: Show real address for trace event arguments") Reported-by: Jon Hunter Tested-by: Jon Hunter Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5c777627212f..c0c9aa5cd8e2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3545,7 +3545,11 @@ static char *trace_iter_expand_format(struct trace_iterator *iter) { char *tmp; - if (iter->fmt == static_fmt_buf) + /* + * iter->tr is NULL when used with tp_printk, which makes + * this get called where it is not safe to call krealloc(). + */ + if (!iter->tr || iter->fmt == static_fmt_buf) return NULL; tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE, @@ -3566,7 +3570,7 @@ const char *trace_event_format(struct trace_iterator *iter, const char *fmt) if (WARN_ON_ONCE(!fmt)) return fmt; - if (iter->tr->trace_flags & TRACE_ITER_HASH_PTR) + if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR) return fmt; p = fmt; @@ -9692,7 +9696,7 @@ void __init early_trace_init(void) { if (tracepoint_printk) { tracepoint_print_iter = - kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL); + kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL); if (MEM_FAIL(!tracepoint_print_iter, "Failed to allocate trace iterator\n")) tracepoint_printk = 0; From 5849cdf8c120e3979c57d34be55b92d90a77a47e Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Fri, 16 Apr 2021 14:02:07 +0200 Subject: [PATCH 156/182] x86/crash: Fix crash_setup_memmap_entries() out-of-bounds access Commit in Fixes: added support for kexec-ing a kernel on panic using a new system call. As part of it, it does prepare a memory map for the new kernel. However, while doing so, it wrongly accesses memory it has not allocated: it accesses the first element of the cmem->ranges[] array in memmap_exclude_ranges() but it has not allocated the memory for it in crash_setup_memmap_entries(). As KASAN reports: BUG: KASAN: vmalloc-out-of-bounds in crash_setup_memmap_entries+0x17e/0x3a0 Write of size 8 at addr ffffc90000426008 by task kexec/1187 (gdb) list *crash_setup_memmap_entries+0x17e 0xffffffff8107cafe is in crash_setup_memmap_entries (arch/x86/kernel/crash.c:322). 317 unsigned long long mend) 318 { 319 unsigned long start, end; 320 321 cmem->ranges[0].start = mstart; 322 cmem->ranges[0].end = mend; 323 cmem->nr_ranges = 1; 324 325 /* Exclude elf header region */ 326 start = image->arch.elf_load_addr; (gdb) Make sure the ranges array becomes a single element allocated. [ bp: Write a proper commit message. ] Fixes: dd5f726076cc ("kexec: support for kexec on panic using new system call") Signed-off-by: Mike Galbraith Signed-off-by: Borislav Petkov Reviewed-by: Dave Young Cc: Link: https://lkml.kernel.org/r/725fa3dc1da2737f0f6188a1a9701bead257ea9d.camel@gmx.de --- arch/x86/kernel/crash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index a8f3af257e26..b1deacbeb266 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -337,7 +337,7 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params) struct crash_memmap_data cmd; struct crash_mem *cmem; - cmem = vzalloc(sizeof(struct crash_mem)); + cmem = vzalloc(struct_size(cmem, ranges, 1)); if (!cmem) return -ENOMEM; From f2211881e737cade55e0ee07cf6a26d91a35a6fe Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Thu, 15 Apr 2021 16:34:16 +0800 Subject: [PATCH 157/182] perf data: Fix error return code in perf_data__create_dir() Although 'ret' has been initialized to -1, but it will be reassigned by the "ret = open(...)" statement in the for loop. So that, the value of 'ret' is unknown when asprintf() failed. Reported-by: Hulk Robot Signed-off-by: Zhen Lei Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210415083417.3740-1-thunder.leizhen@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/data.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index f29af4fc3d09..8fca4779ae6a 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -35,7 +35,7 @@ void perf_data__close_dir(struct perf_data *data) int perf_data__create_dir(struct perf_data *data, int nr) { struct perf_data_file *files = NULL; - int i, ret = -1; + int i, ret; if (WARN_ON(!data->is_dir)) return -EINVAL; @@ -51,7 +51,8 @@ int perf_data__create_dir(struct perf_data *data, int nr) for (i = 0; i < nr; i++) { struct perf_data_file *file = &files[i]; - if (asprintf(&file->path, "%s/data.%d", data->path, i) < 0) + ret = asprintf(&file->path, "%s/data.%d", data->path, i); + if (ret < 0) goto out_err; ret = open(file->path, O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR); From db2e718a47984b9d71ed890eb2ea36ecf150de18 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Tue, 20 Apr 2021 08:43:34 -0500 Subject: [PATCH 158/182] capabilities: require CAP_SETFCAP to map uid 0 cap_setfcap is required to create file capabilities. Since commit 8db6c34f1dbc ("Introduce v3 namespaced file capabilities"), a process running as uid 0 but without cap_setfcap is able to work around this as follows: unshare a new user namespace which maps parent uid 0 into the child namespace. While this task will not have new capabilities against the parent namespace, there is a loophole due to the way namespaced file capabilities are represented as xattrs. File capabilities valid in userns 1 are distinguished from file capabilities valid in userns 2 by the kuid which underlies uid 0. Therefore the restricted root process can unshare a new self-mapping namespace, add a namespaced file capability onto a file, then use that file capability in the parent namespace. To prevent that, do not allow mapping parent uid 0 if the process which opened the uid_map file does not have CAP_SETFCAP, which is the capability for setting file capabilities. As a further wrinkle: a task can unshare its user namespace, then open its uid_map file itself, and map (only) its own uid. In this case we do not have the credential from before unshare, which was potentially more restricted. So, when creating a user namespace, we record whether the creator had CAP_SETFCAP. Then we can use that during map_write(). With this patch: 1. Unprivileged user can still unshare -Ur ubuntu@caps:~$ unshare -Ur root@caps:~# logout 2. Root user can still unshare -Ur ubuntu@caps:~$ sudo bash root@caps:/home/ubuntu# unshare -Ur root@caps:/home/ubuntu# logout 3. Root user without CAP_SETFCAP cannot unshare -Ur: root@caps:/home/ubuntu# /sbin/capsh --drop=cap_setfcap -- root@caps:/home/ubuntu# /sbin/setcap cap_setfcap=p /sbin/setcap unable to set CAP_SETFCAP effective capability: Operation not permitted root@caps:/home/ubuntu# unshare -Ur unshare: write failed /proc/self/uid_map: Operation not permitted Note: an alternative solution would be to allow uid 0 mappings by processes without CAP_SETFCAP, but to prevent such a namespace from writing any file capabilities. This approach can be seen at [1]. Background history: commit 95ebabde382 ("capabilities: Don't allow writing ambiguous v3 file capabilities") tried to fix the issue by preventing v3 fscaps to be written to disk when the root uid would map to the same uid in nested user namespaces. This led to regressions for various workloads. For example, see [2]. Ultimately this is a valid use-case we have to support meaning we had to revert this change in 3b0c2d3eaa83 ("Revert 95ebabde382c ("capabilities: Don't allow writing ambiguous v3 file capabilities")"). Link: https://git.kernel.org/pub/scm/linux/kernel/git/sergeh/linux.git/log/?h=2021-04-15/setfcap-nsfscaps-v4 [1] Link: https://github.com/containers/buildah/issues/3071 [2] Signed-off-by: Serge Hallyn Reviewed-by: Andrew G. Morgan Tested-by: Christian Brauner Reviewed-by: Christian Brauner Tested-by: Giuseppe Scrivano Cc: Eric Biederman Signed-off-by: Linus Torvalds --- include/linux/user_namespace.h | 3 ++ include/uapi/linux/capability.h | 3 +- kernel/user_namespace.c | 65 +++++++++++++++++++++++++++++++-- 3 files changed, 67 insertions(+), 4 deletions(-) diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 64cf8ebdc4ec..f6c5f784be5a 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -63,6 +63,9 @@ struct user_namespace { kgid_t group; struct ns_common ns; unsigned long flags; + /* parent_could_setfcap: true if the creator if this ns had CAP_SETFCAP + * in its effective capability set at the child ns creation time. */ + bool parent_could_setfcap; #ifdef CONFIG_KEYS /* List of joinable keyrings in this namespace. Modification access of diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h index c6ca33034147..2ddb4226cd23 100644 --- a/include/uapi/linux/capability.h +++ b/include/uapi/linux/capability.h @@ -335,7 +335,8 @@ struct vfs_ns_cap_data { #define CAP_AUDIT_CONTROL 30 -/* Set or remove capabilities on files */ +/* Set or remove capabilities on files. + Map uid=0 into a child user namespace. */ #define CAP_SETFCAP 31 diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index af612945a4d0..9a4b980d695b 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -106,6 +106,7 @@ int create_user_ns(struct cred *new) if (!ns) goto fail_dec; + ns->parent_could_setfcap = cap_raised(new->cap_effective, CAP_SETFCAP); ret = ns_alloc_inum(&ns->ns); if (ret) goto fail_free; @@ -841,6 +842,60 @@ static int sort_idmaps(struct uid_gid_map *map) return 0; } +/** + * verify_root_map() - check the uid 0 mapping + * @file: idmapping file + * @map_ns: user namespace of the target process + * @new_map: requested idmap + * + * If a process requests mapping parent uid 0 into the new ns, verify that the + * process writing the map had the CAP_SETFCAP capability as the target process + * will be able to write fscaps that are valid in ancestor user namespaces. + * + * Return: true if the mapping is allowed, false if not. + */ +static bool verify_root_map(const struct file *file, + struct user_namespace *map_ns, + struct uid_gid_map *new_map) +{ + int idx; + const struct user_namespace *file_ns = file->f_cred->user_ns; + struct uid_gid_extent *extent0 = NULL; + + for (idx = 0; idx < new_map->nr_extents; idx++) { + if (new_map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS) + extent0 = &new_map->extent[idx]; + else + extent0 = &new_map->forward[idx]; + if (extent0->lower_first == 0) + break; + + extent0 = NULL; + } + + if (!extent0) + return true; + + if (map_ns == file_ns) { + /* The process unshared its ns and is writing to its own + * /proc/self/uid_map. User already has full capabilites in + * the new namespace. Verify that the parent had CAP_SETFCAP + * when it unshared. + * */ + if (!file_ns->parent_could_setfcap) + return false; + } else { + /* Process p1 is writing to uid_map of p2, who is in a child + * user namespace to p1's. Verify that the opener of the map + * file has CAP_SETFCAP against the parent of the new map + * namespace */ + if (!file_ns_capable(file, map_ns->parent, CAP_SETFCAP)) + return false; + } + + return true; +} + static ssize_t map_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos, int cap_setid, @@ -848,7 +903,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, struct uid_gid_map *parent_map) { struct seq_file *seq = file->private_data; - struct user_namespace *ns = seq->private; + struct user_namespace *map_ns = seq->private; struct uid_gid_map new_map; unsigned idx; struct uid_gid_extent extent; @@ -895,7 +950,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, /* * Adjusting namespace settings requires capabilities on the target. */ - if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN)) + if (cap_valid(cap_setid) && !file_ns_capable(file, map_ns, CAP_SYS_ADMIN)) goto out; /* Parse the user data */ @@ -965,7 +1020,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, ret = -EPERM; /* Validate the user is allowed to use user id's mapped to. */ - if (!new_idmap_permitted(file, ns, cap_setid, &new_map)) + if (!new_idmap_permitted(file, map_ns, cap_setid, &new_map)) goto out; ret = -EPERM; @@ -1086,6 +1141,10 @@ static bool new_idmap_permitted(const struct file *file, struct uid_gid_map *new_map) { const struct cred *cred = file->f_cred; + + if (cap_setid == CAP_SETUID && !verify_root_map(file, ns, new_map)) + return false; + /* Don't allow mappings that would allow anything that wouldn't * be allowed without the establishment of unprivileged mappings. */ From 9d480158ee86ad606d3a8baaf81e6b71acbfd7d5 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 15 Apr 2021 14:22:43 -0700 Subject: [PATCH 159/182] perf/x86/intel/uncore: Remove uncore extra PCI dev HSWEP_PCI_PCU_3 There may be a kernel panic on the Haswell server and the Broadwell server, if the snbep_pci2phy_map_init() return error. The uncore_extra_pci_dev[HSWEP_PCI_PCU_3] is used in the cpu_init() to detect the existence of the SBOX, which is a MSR type of PMON unit. The uncore_extra_pci_dev is allocated in the uncore_pci_init(). If the snbep_pci2phy_map_init() returns error, perf doesn't initialize the PCI type of the PMON units, so the uncore_extra_pci_dev will not be allocated. But perf may continue initializing the MSR type of PMON units. A null dereference kernel panic will be triggered. The sockets in a Haswell server or a Broadwell server are identical. Only need to detect the existence of the SBOX once. Current perf probes all available PCU devices and stores them into the uncore_extra_pci_dev. It's unnecessary. Use the pci_get_device() to replace the uncore_extra_pci_dev. Only detect the existence of the SBOX on the first available PCU device once. Factor out hswep_has_limit_sbox(), since the Haswell server and the Broadwell server uses the same way to detect the existence of the SBOX. Add some macros to replace the magic number. Fixes: 5306c31c5733 ("perf/x86/uncore/hsw-ep: Handle systems with only two SBOXes") Reported-by: Steve Wahl Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Tested-by: Steve Wahl Link: https://lkml.kernel.org/r/1618521764-100923-1-git-send-email-kan.liang@linux.intel.com --- arch/x86/events/intel/uncore_snbep.c | 59 ++++++++++++---------------- 1 file changed, 25 insertions(+), 34 deletions(-) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index b79951d0707c..9b8937631838 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1159,7 +1159,6 @@ enum { SNBEP_PCI_QPI_PORT0_FILTER, SNBEP_PCI_QPI_PORT1_FILTER, BDX_PCI_QPI_PORT2_FILTER, - HSWEP_PCI_PCU_3, }; static int snbep_qpi_hw_config(struct intel_uncore_box *box, struct perf_event *event) @@ -2857,22 +2856,33 @@ static struct intel_uncore_type *hswep_msr_uncores[] = { NULL, }; +#define HSWEP_PCU_DID 0x2fc0 +#define HSWEP_PCU_CAPID4_OFFET 0x94 +#define hswep_get_chop(_cap) (((_cap) >> 6) & 0x3) + +static bool hswep_has_limit_sbox(unsigned int device) +{ + struct pci_dev *dev = pci_get_device(PCI_VENDOR_ID_INTEL, device, NULL); + u32 capid4; + + if (!dev) + return false; + + pci_read_config_dword(dev, HSWEP_PCU_CAPID4_OFFET, &capid4); + if (!hswep_get_chop(capid4)) + return true; + + return false; +} + void hswep_uncore_cpu_init(void) { - int pkg = boot_cpu_data.logical_proc_id; - if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; /* Detect 6-8 core systems with only two SBOXes */ - if (uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3]) { - u32 capid4; - - pci_read_config_dword(uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3], - 0x94, &capid4); - if (((capid4 >> 6) & 0x3) == 0) - hswep_uncore_sbox.num_boxes = 2; - } + if (hswep_has_limit_sbox(HSWEP_PCU_DID)) + hswep_uncore_sbox.num_boxes = 2; uncore_msr_uncores = hswep_msr_uncores; } @@ -3135,11 +3145,6 @@ static const struct pci_device_id hswep_uncore_pci_ids[] = { .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, SNBEP_PCI_QPI_PORT1_FILTER), }, - { /* PCU.3 (for Capability registers) */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fc0), - .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, - HSWEP_PCI_PCU_3), - }, { /* end: all zeroes */ } }; @@ -3231,27 +3236,18 @@ static struct event_constraint bdx_uncore_pcu_constraints[] = { EVENT_CONSTRAINT_END }; +#define BDX_PCU_DID 0x6fc0 + void bdx_uncore_cpu_init(void) { - int pkg = topology_phys_to_logical_pkg(boot_cpu_data.phys_proc_id); - if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; uncore_msr_uncores = bdx_msr_uncores; - /* BDX-DE doesn't have SBOX */ - if (boot_cpu_data.x86_model == 86) { - uncore_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL; /* Detect systems with no SBOXes */ - } else if (uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3]) { - struct pci_dev *pdev; - u32 capid4; + if ((boot_cpu_data.x86_model == 86) || hswep_has_limit_sbox(BDX_PCU_DID)) + uncore_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL; - pdev = uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3]; - pci_read_config_dword(pdev, 0x94, &capid4); - if (((capid4 >> 6) & 0x3) == 0) - bdx_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL; - } hswep_uncore_pcu.constraints = bdx_uncore_pcu_constraints; } @@ -3472,11 +3468,6 @@ static const struct pci_device_id bdx_uncore_pci_ids[] = { .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, BDX_PCI_QPI_PORT2_FILTER), }, - { /* PCU.3 (for Capability registers) */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fc0), - .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, - HSWEP_PCI_PCU_3), - }, { /* end: all zeroes */ } }; From ddd8d94ca31e768c76cf8bfe34ba7b10136b3694 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Sat, 17 Apr 2021 11:38:39 +0300 Subject: [PATCH 160/182] gpio: omap: Save and restore sysconfig As we are using cpu_pm to save and restore context, we must also save and restore the GPIO sysconfig register. This is needed because we are not calling PM runtime functions at all with cpu_pm. We need to save the sysconfig on idle as it's value can get reconfigured by PM runtime and can be different from the init time value. Device specific flags like "ti,no-idle-on-init" can affect the init value. Fixes: b764a5863fd8 ("gpio: omap: Remove custom PM calls and use cpu_pm instead") Cc: Aaro Koskinen Cc: Adam Ford Cc: Andreas Kemnade Cc: Grygorii Strashko Cc: Peter Ujfalusi Signed-off-by: Tony Lindgren Acked-by: Grygorii Strashko Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-omap.c | 9 +++++++++ include/linux/platform_data/gpio-omap.h | 3 +++ 2 files changed, 12 insertions(+) diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index 41952bb818ad..56152263ab38 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -29,6 +29,7 @@ #define OMAP4_GPIO_DEBOUNCINGTIME_MASK 0xFF struct gpio_regs { + u32 sysconfig; u32 irqenable1; u32 irqenable2; u32 wake_en; @@ -1069,6 +1070,7 @@ static void omap_gpio_init_context(struct gpio_bank *p) const struct omap_gpio_reg_offs *regs = p->regs; void __iomem *base = p->base; + p->context.sysconfig = readl_relaxed(base + regs->sysconfig); p->context.ctrl = readl_relaxed(base + regs->ctrl); p->context.oe = readl_relaxed(base + regs->direction); p->context.wake_en = readl_relaxed(base + regs->wkup_en); @@ -1088,6 +1090,7 @@ static void omap_gpio_restore_context(struct gpio_bank *bank) const struct omap_gpio_reg_offs *regs = bank->regs; void __iomem *base = bank->base; + writel_relaxed(bank->context.sysconfig, base + regs->sysconfig); writel_relaxed(bank->context.wake_en, base + regs->wkup_en); writel_relaxed(bank->context.ctrl, base + regs->ctrl); writel_relaxed(bank->context.leveldetect0, base + regs->leveldetect0); @@ -1115,6 +1118,10 @@ static void omap_gpio_idle(struct gpio_bank *bank, bool may_lose_context) bank->saved_datain = readl_relaxed(base + bank->regs->datain); + /* Save syconfig, it's runtime value can be different from init value */ + if (bank->loses_context) + bank->context.sysconfig = readl_relaxed(base + bank->regs->sysconfig); + if (!bank->enabled_non_wakeup_gpios) goto update_gpio_context_count; @@ -1279,6 +1286,7 @@ static int gpio_omap_cpu_notifier(struct notifier_block *nb, static const struct omap_gpio_reg_offs omap2_gpio_regs = { .revision = OMAP24XX_GPIO_REVISION, + .sysconfig = OMAP24XX_GPIO_SYSCONFIG, .direction = OMAP24XX_GPIO_OE, .datain = OMAP24XX_GPIO_DATAIN, .dataout = OMAP24XX_GPIO_DATAOUT, @@ -1302,6 +1310,7 @@ static const struct omap_gpio_reg_offs omap2_gpio_regs = { static const struct omap_gpio_reg_offs omap4_gpio_regs = { .revision = OMAP4_GPIO_REVISION, + .sysconfig = OMAP4_GPIO_SYSCONFIG, .direction = OMAP4_GPIO_OE, .datain = OMAP4_GPIO_DATAIN, .dataout = OMAP4_GPIO_DATAOUT, diff --git a/include/linux/platform_data/gpio-omap.h b/include/linux/platform_data/gpio-omap.h index 8b30b14b47d3..f377817ce75c 100644 --- a/include/linux/platform_data/gpio-omap.h +++ b/include/linux/platform_data/gpio-omap.h @@ -85,6 +85,7 @@ * omap2+ specific GPIO registers */ #define OMAP24XX_GPIO_REVISION 0x0000 +#define OMAP24XX_GPIO_SYSCONFIG 0x0010 #define OMAP24XX_GPIO_IRQSTATUS1 0x0018 #define OMAP24XX_GPIO_IRQSTATUS2 0x0028 #define OMAP24XX_GPIO_IRQENABLE2 0x002c @@ -108,6 +109,7 @@ #define OMAP24XX_GPIO_SETDATAOUT 0x0094 #define OMAP4_GPIO_REVISION 0x0000 +#define OMAP4_GPIO_SYSCONFIG 0x0010 #define OMAP4_GPIO_EOI 0x0020 #define OMAP4_GPIO_IRQSTATUSRAW0 0x0024 #define OMAP4_GPIO_IRQSTATUSRAW1 0x0028 @@ -148,6 +150,7 @@ #ifndef __ASSEMBLER__ struct omap_gpio_reg_offs { u16 revision; + u16 sysconfig; u16 direction; u16 datain; u16 dataout; From d42a5b639d15622ece5b9dd12dafd9776efa2593 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 1 Apr 2021 00:22:23 -0400 Subject: [PATCH 161/182] drm/amdgpu: reserve fence slot to update page table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Forgot to reserve a fence slot to use sdma to update page table, cause below kernel BUG backtrace to handle vm retry fault while application is exiting. [ 133.048143] kernel BUG at /home/yangp/git/compute_staging/kernel/drivers/dma-buf/dma-resv.c:281! [ 133.048487] Workqueue: events amdgpu_irq_handle_ih1 [amdgpu] [ 133.048506] RIP: 0010:dma_resv_add_shared_fence+0x204/0x280 [ 133.048672] amdgpu_vm_sdma_commit+0x134/0x220 [amdgpu] [ 133.048788] amdgpu_vm_bo_update_range+0x220/0x250 [amdgpu] [ 133.048905] amdgpu_vm_handle_fault+0x202/0x370 [amdgpu] [ 133.049031] gmc_v9_0_process_interrupt+0x1ab/0x310 [amdgpu] [ 133.049165] ? kgd2kfd_interrupt+0x9a/0x180 [amdgpu] [ 133.049289] ? amdgpu_irq_dispatch+0xb6/0x240 [amdgpu] [ 133.049408] amdgpu_irq_dispatch+0xb6/0x240 [amdgpu] [ 133.049534] amdgpu_ih_process+0x9b/0x1c0 [amdgpu] [ 133.049657] amdgpu_irq_handle_ih1+0x21/0x60 [amdgpu] [ 133.049669] process_one_work+0x29f/0x640 [ 133.049678] worker_thread+0x39/0x3f0 [ 133.049685] ? process_one_work+0x640/0x640 Signed-off-by: Philip Yang Signed-off-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 5.11.x --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 7d2c8b169827..326dae31b675 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -3300,7 +3300,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, struct amdgpu_bo *root; uint64_t value, flags; struct amdgpu_vm *vm; - long r; + int r; spin_lock(&adev->vm_manager.pasid_lock); vm = idr_find(&adev->vm_manager.pasid_idr, pasid); @@ -3349,6 +3349,12 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, value = 0; } + r = dma_resv_reserve_shared(root->tbo.base.resv, 1); + if (r) { + pr_debug("failed %d to reserve fence slot\n", r); + goto error_unlock; + } + r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false, NULL, addr, addr, flags, value, NULL, NULL, NULL); @@ -3360,7 +3366,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, error_unlock: amdgpu_bo_unreserve(root); if (r < 0) - DRM_ERROR("Can't handle page fault (%ld)\n", r); + DRM_ERROR("Can't handle page fault (%d)\n", r); error_unref: amdgpu_bo_unref(&root); From 6d638b3ffd27036c062d32cb4efd4be172c2a65e Mon Sep 17 00:00:00 2001 From: Qingqing Zhuo Date: Wed, 14 Apr 2021 19:00:01 -0400 Subject: [PATCH 162/182] drm/amd/display: Update modifier list for gfx10_3 [Why] Current list supports modifiers that have DCC_MAX_COMPRESSED_BLOCK set to AMD_FMT_MOD_DCC_BLOCK_128B, while AMD_FMT_MOD_DCC_BLOCK_64B is used instead by userspace. [How] Replace AMD_FMT_MOD_DCC_BLOCK_128B with AMD_FMT_MOD_DCC_BLOCK_64B for modifiers with DCC supported. Fixes: faa37f54ce0462 ("drm/amd/display: Expose modifiers") Signed-off-by: Qingqing Zhuo Reviewed-by: Bas Nieuwenhuizen Tested-by: Bas Nieuwenhuizen Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 573cf17262da..57e5900059ed 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4296,7 +4296,7 @@ add_gfx10_3_modifiers(const struct amdgpu_device *adev, AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B)); + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B)); add_modifier(mods, size, capacity, AMD_FMT_MOD | AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | @@ -4308,7 +4308,7 @@ add_gfx10_3_modifiers(const struct amdgpu_device *adev, AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B)); + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B)); add_modifier(mods, size, capacity, AMD_FMT_MOD | AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | From 9ebb6bc0125dfb1e65a53eea4aeecc63d4d6ec2d Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Fri, 26 Mar 2021 17:59:44 +0100 Subject: [PATCH 163/182] amd/display: allow non-linear multi-planar formats Accept non-linear buffers which use a multi-planar format, as long as they don't use DCC. Tested on GFX9 with NV12. Signed-off-by: Simon Ser Cc: Alex Deucher Cc: Harry Wentland Cc: Nicholas Kazlauskas Cc: Bas Nieuwenhuizen Reviewed-by: Bas Nieuwenhuizen Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 57e5900059ed..d699a5cf6c11 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4071,13 +4071,6 @@ static bool dm_plane_format_mod_supported(struct drm_plane *plane, if (modifier == DRM_FORMAT_MOD_LINEAR) return true; - /* - * The arbitrary tiling support for multiplane formats has not been hooked - * up. - */ - if (info->num_planes > 1) - return false; - /* * For D swizzle the canonical modifier depends on the bpp, so check * it here. @@ -4096,6 +4089,10 @@ static bool dm_plane_format_mod_supported(struct drm_plane *plane, /* Per radeonsi comments 16/64 bpp are more complicated. */ if (info->cpp[0] != 4) return false; + /* We support multi-planar formats, but not when combined with + * additional DCC metadata planes. */ + if (info->num_planes > 1) + return false; } return true; From 24d034528ef06ad94cfcf4394beac0443ab1b16d Mon Sep 17 00:00:00 2001 From: Jiansong Chen Date: Mon, 19 Apr 2021 16:33:22 +0800 Subject: [PATCH 164/182] drm/amdgpu: fix GCR_GENERAL_CNTL offset for dimgrey_cavefish dimgrey_cavefish has similar gc_10_3 ip with sienna_cichlid, so follow its registers offset setting. Signed-off-by: Jiansong Chen Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 45d1172b7bff..63691deb7df3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3280,7 +3280,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_4[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x00000280, 0x00000280), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0x07800000, 0x00800000), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x00001d00, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x00001d00, 0x00000500), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PC_CNTL, 0x003c0000, 0x00280400), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), From 68e6582e8f2dc32fd2458b9926564faa1fb4560e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Apr 2021 18:05:02 +0200 Subject: [PATCH 165/182] block: return -EBUSY when there are open partitions in blkdev_reread_part The switch to go through blkdev_get_by_dev means we now ignore the return value from bdev_disk_changed in __blkdev_get. Add a manual check to restore the old semantics. Fixes: 4601b4b130de ("block: reopen the device in blkdev_reread_part") Reported-by: Karel Zak Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210421160502.447418-1-hch@lst.de Signed-off-by: Jens Axboe --- block/ioctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/ioctl.c b/block/ioctl.c index ff241e663c01..8ba1ed8defd0 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -89,6 +89,8 @@ static int blkdev_reread_part(struct block_device *bdev, fmode_t mode) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EACCES; + if (bdev->bd_part_count) + return -EBUSY; /* * Reopen the device to revalidate the driver state and force a From 9d5171eab462a63e2fbebfccf6026e92be018f20 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 21 Apr 2021 15:42:47 -0700 Subject: [PATCH 166/182] KEYS: trusted: Fix TPM reservation for seal/unseal The original patch 8c657a0590de ("KEYS: trusted: Reserve TPM for seal and unseal operations") was correct on the mailing list: https://lore.kernel.org/linux-integrity/20210128235621.127925-4-jarkko@kernel.org/ But somehow got rebased so that the tpm_try_get_ops() in tpm2_seal_trusted() got lost. This causes an imbalanced put of the TPM ops and causes oopses on TIS based hardware. This fix puts back the lost tpm_try_get_ops() Fixes: 8c657a0590de ("KEYS: trusted: Reserve TPM for seal and unseal operations") Reported-by: Mimi Zohar Acked-by: Mimi Zohar Signed-off-by: James Bottomley --- security/keys/trusted-keys/trusted_tpm2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/keys/trusted-keys/trusted_tpm2.c b/security/keys/trusted-keys/trusted_tpm2.c index e2a0ed5d02f0..c87c4df8703d 100644 --- a/security/keys/trusted-keys/trusted_tpm2.c +++ b/security/keys/trusted-keys/trusted_tpm2.c @@ -79,7 +79,7 @@ int tpm2_seal_trusted(struct tpm_chip *chip, if (i == ARRAY_SIZE(tpm2_hash_map)) return -EINVAL; - rc = tpm_buf_init(&buf, TPM2_ST_SESSIONS, TPM2_CC_CREATE); + rc = tpm_try_get_ops(chip); if (rc) return rc; From 482715ff0601c836152b792f06c353464d826b9b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 15 Apr 2021 16:03:56 +0300 Subject: [PATCH 167/182] pinctrl: core: Show pin numbers for the controllers with base = 0 The commit f1b206cf7c57 ("pinctrl: core: print gpio in pins debugfs file") enabled GPIO pin number and label in debugfs for pin controller. However, it limited that feature to the chips where base is positive number. This, in particular, excluded chips where base is 0 for the historical or backward compatibility reasons. Refactor the code to include the latter as well. Fixes: f1b206cf7c57 ("pinctrl: core: print gpio in pins debugfs file") Cc: Drew Fustini Signed-off-by: Andy Shevchenko Tested-by: Drew Fustini Reviewed-by: Drew Fustini Link: https://lore.kernel.org/r/20210415130356.15885-1-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/pinctrl/core.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index 7d3370289938..6e6825d17a1d 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -1604,8 +1604,8 @@ static int pinctrl_pins_show(struct seq_file *s, void *what) unsigned i, pin; #ifdef CONFIG_GPIOLIB struct pinctrl_gpio_range *range; - unsigned int gpio_num; struct gpio_chip *chip; + int gpio_num; #endif seq_printf(s, "registered pins: %d\n", pctldev->desc->npins); @@ -1625,7 +1625,7 @@ static int pinctrl_pins_show(struct seq_file *s, void *what) seq_printf(s, "pin %d (%s) ", pin, desc->name); #ifdef CONFIG_GPIOLIB - gpio_num = 0; + gpio_num = -1; list_for_each_entry(range, &pctldev->gpio_ranges, node) { if ((pin >= range->pin_base) && (pin < (range->pin_base + range->npins))) { @@ -1633,10 +1633,12 @@ static int pinctrl_pins_show(struct seq_file *s, void *what) break; } } - chip = gpio_to_chip(gpio_num); - if (chip && chip->gpiodev && chip->gpiodev->base) - seq_printf(s, "%u:%s ", gpio_num - - chip->gpiodev->base, chip->label); + if (gpio_num >= 0) + chip = gpio_to_chip(gpio_num); + else + chip = NULL; + if (chip) + seq_printf(s, "%u:%s ", gpio_num - chip->gpiodev->base, chip->label); else seq_puts(s, "0:? "); #endif From 4d09ccc4a81e7de6b002482af554d8b5626f5041 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Wed, 14 Apr 2021 11:47:40 +0100 Subject: [PATCH 168/182] arm64: dts: allwinner: Revert SD card CD GPIO for Pine64-LTS Commit 941432d00768 ("arm64: dts: allwinner: Drop non-removable from SoPine/LTS SD card") enabled the card detect GPIO for the SOPine module, along the way with the Pine64-LTS, which share the same base .dtsi. This was based on the observation that the Pine64-LTS has as "push-push" SD card socket, and that the schematic mentions the card detect GPIO. After having received two reports about failing SD card access with that patch, some more research and polls on that subject revealed that there are at least two different versions of the Pine64-LTS out there: - On some boards (including mine) the card detect pin is "stuck" at high, regardless of an microSD card being inserted or not. - On other boards the card-detect is working, but is active-high, by virtue of an explicit inverter circuit, as shown in the schematic. To cover all versions of the board out there, and don't take any chances, let's revert the introduction of the active-low CD GPIO, but let's use the broken-cd property for the Pine64-LTS this time. That should avoid regressions and should work for everyone, even allowing SD card changes now. The SOPine card detect has proven to be working, so let's keep that GPIO in place. Fixes: 941432d00768 ("arm64: dts: allwinner: Drop non-removable from SoPine/LTS SD card") Reported-by: Michael Weiser Reported-by: Daniel Kulesz Suggested-by: Chen-Yu Tsai Signed-off-by: Andre Przywara Tested-by: Michael Weiser Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20210414104740.31497-1-andre.przywara@arm.com --- arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-lts.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-lts.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-lts.dts index e79ce49e7e6a..596a25907432 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-lts.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-lts.dts @@ -21,5 +21,5 @@ led { }; &mmc0 { - cd-gpios = <&pio 5 6 GPIO_ACTIVE_LOW>; /* PF6 push-push switch */ + broken-cd; /* card detect is broken on *some* boards */ }; From 4b2f1e59229b9da319d358828cdfa4ddbc140769 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 21 Apr 2021 17:18:34 -0700 Subject: [PATCH 169/182] perf/x86/kvm: Fix Broadwell Xeon stepping in isolation_ucodes[] The only stepping of Broadwell Xeon parts is stepping 1. Fix the relevant isolation_ucodes[] entry, which previously enumerated stepping 2. Although the original commit was characterized as an optimization, it is also a workaround for a correctness issue. If a PMI arrives between kvm's call to perf_guest_get_msrs() and the subsequent VM-entry, a stale value for the IA32_PEBS_ENABLE MSR may be restored at the next VM-exit. This is because, unbeknownst to kvm, PMI throttling may clear bits in the IA32_PEBS_ENABLE MSR. CPUs with "PEBS isolation" don't suffer from this issue, because perf_guest_get_msrs() doesn't report the IA32_PEBS_ENABLE value. Fixes: 9b545c04abd4f ("perf/x86/kvm: Avoid unnecessary work in guest filtering") Signed-off-by: Jim Mattson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Peter Shier Acked-by: Andi Kleen Link: https://lkml.kernel.org/r/20210422001834.1748319-1-jmattson@google.com --- arch/x86/events/intel/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 37ce38403cb8..c57ec8e27907 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4516,7 +4516,7 @@ static const struct x86_cpu_desc isolation_ucodes[] = { INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 3, 0x07000009), INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 4, 0x0f000009), INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 5, 0x0e000002), - INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_X, 2, 0x0b000014), + INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_X, 1, 0x0b000014), INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 3, 0x00000021), INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 4, 0x00000000), INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 5, 0x00000000), From a9d064524fc3cf463b3bb14fa63de78aafb40dab Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Mon, 12 Apr 2021 17:55:12 +0800 Subject: [PATCH 170/182] vhost-vdpa: protect concurrent access to vhost device iotlb Protect vhost device iotlb by vhost_dev->mutex. Otherwise, it might cause corruption of the list and interval tree in struct vhost_iotlb if userspace sends the VHOST_IOTLB_MSG_V2 message concurrently. Fixes: 4c8cf318("vhost: introduce vDPA-based backend") Cc: stable@vger.kernel.org Signed-off-by: Xie Yongji Acked-by: Jason Wang Reviewed-by: Stefano Garzarella Link: https://lore.kernel.org/r/20210412095512.178-1-xieyongji@bytedance.com Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index e0a27e336293..bfa4c6ef554e 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -745,9 +745,11 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, const struct vdpa_config_ops *ops = vdpa->config; int r = 0; + mutex_lock(&dev->mutex); + r = vhost_dev_check_owner(dev); if (r) - return r; + goto unlock; switch (msg->type) { case VHOST_IOTLB_UPDATE: @@ -768,6 +770,8 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, r = -EINVAL; break; } +unlock: + mutex_unlock(&dev->mutex); return r; } From be286f84e33da1a7f83142b64dbd86f600e73363 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Sun, 11 Apr 2021 11:36:46 +0300 Subject: [PATCH 171/182] vdpa/mlx5: Set err = -ENOMEM in case dma_map_sg_attrs fails Set err = -ENOMEM if dma_map_sg_attrs() fails so the function reutrns error. Fixes: 94abbccdf291 ("vdpa/mlx5: Add shared memory registration code") Signed-off-by: Eli Cohen Reported-by: kernel test robot Reported-by: Dan Carpenter Link: https://lore.kernel.org/r/20210411083646.910546-1-elic@nvidia.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Reviewed-by: Stefano Garzarella --- drivers/vdpa/mlx5/core/mr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c index 3908ff28eec0..800cfd1967ad 100644 --- a/drivers/vdpa/mlx5/core/mr.c +++ b/drivers/vdpa/mlx5/core/mr.c @@ -278,8 +278,10 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr mr->log_size = log_entity_size; mr->nsg = nsg; mr->nent = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); - if (!mr->nent) + if (!mr->nent) { + err = -ENOMEM; goto err_map; + } err = create_direct_mr(mvdev, mr); if (err) From b14585d9f18dc617e975815570fe836be656b1da Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Tue, 20 Apr 2021 23:15:53 +0800 Subject: [PATCH 172/182] perf auxtrace: Fix potential NULL pointer dereference In the function auxtrace_parse_snapshot_options(), the callback pointer "itr->parse_snapshot_options" can be NULL if it has not been set during the AUX record initialization. This can cause tool crashing if the callback pointer "itr->parse_snapshot_options" is dereferenced without performing NULL check. Add a NULL check for the pointer "itr->parse_snapshot_options" before invoke the callback. Fixes: d20031bb63dd6dde ("perf tools: Add AUX area tracing Snapshot Mode") Signed-off-by: Leo Yan Acked-by: Adrian Hunter Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Tiezhu Yang Link: http://lore.kernel.org/lkml/20210420151554.2031768-1-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/auxtrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 5b6ccb90b397..1b4091a3b508 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -634,7 +634,7 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr, break; } - if (itr) + if (itr && itr->parse_snapshot_options) return itr->parse_snapshot_options(itr, opts, str); pr_err("No AUX area tracing to snapshot\n"); From 671b60cb6a897a5b3832fe57657152f2c3995e25 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Wed, 21 Apr 2021 14:04:00 +0200 Subject: [PATCH 173/182] perf ftrace: Fix access to pid in array when setting a pid filter Command 'perf ftrace -v -- ls' fails in s390 (at least 5.12.0rc6). The root cause is a missing pointer dereference which causes an array element address to be used as PID. Fix this by extracting the PID. Output before: # ./perf ftrace -v -- ls function_graph tracer is used write '-263732416' to tracing/set_ftrace_pid failed: Invalid argument failed to set ftrace pid # Output after: ./perf ftrace -v -- ls function_graph tracer is used # tracer: function_graph # # CPU DURATION FUNCTION CALLS # | | | | | | | 4) | rcu_read_lock_sched_held() { 4) 0.552 us | rcu_lockdep_current_cpu_online(); 4) 6.124 us | } Reported-by: Alexander Schmidt Signed-off-by: Thomas Richter Acked-by: Namhyung Kim Cc: Heiko Carstens Cc: Sumanth Korikkar Cc: Sven Schnelle Cc: Vasily Gorbik Link: http://lore.kernel.org/lkml/20210421120400.2126433-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index d49448a1060c..87cb11a7a3ee 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -289,7 +289,7 @@ static int set_tracing_pid(struct perf_ftrace *ftrace) for (i = 0; i < perf_thread_map__nr(ftrace->evlist->core.threads); i++) { scnprintf(buf, sizeof(buf), "%d", - ftrace->evlist->core.threads->map[i]); + perf_thread_map__pid(ftrace->evlist->core.threads, i)); if (append_tracing_file("set_ftrace_pid", buf) < 0) return -1; } From c6f87141254d16e281e4b4431af7316895207b8f Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Thu, 15 Apr 2021 17:27:44 +0800 Subject: [PATCH 174/182] perf map: Fix error return code in maps__clone() Although 'err' has been initialized to -ENOMEM, but it will be reassigned by the "err = unwind__prepare_access(...)" statement in the for loop. So that, the value of 'err' is unknown when map__clone() failed. Fixes: 6c502584438bda63 ("perf unwind: Call unwind__prepare_access for forked thread") Reported-by: Hulk Robot Signed-off-by: Zhen Lei Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: zhen lei Link: http://lore.kernel.org/lkml/20210415092744.3793-1-thunder.leizhen@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/map.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index fbc40a2c17d4..8af693d9678c 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -840,15 +840,18 @@ int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp) int maps__clone(struct thread *thread, struct maps *parent) { struct maps *maps = thread->maps; - int err = -ENOMEM; + int err; struct map *map; down_read(&parent->lock); maps__for_each_entry(parent, map) { struct map *new = map__clone(map); - if (new == NULL) + + if (new == NULL) { + err = -ENOMEM; goto out_unlock; + } err = unwind__prepare_access(maps, new, NULL); if (err) From 9c1a07442c95f6e64dc8de099e9f35ea73db7852 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Fri, 23 Apr 2021 16:23:20 +0800 Subject: [PATCH 175/182] KVM: x86/xen: Take srcu lock when accessing kvm_memslots() kvm_memslots() will be called by kvm_write_guest_offset_cached() so we should take the srcu lock. Let's pull the srcu lock operation from kvm_steal_time_set_preempted() again to fix xen part. Fixes: 30b5c851af7 ("KVM: x86/xen: Add support for vCPU runstate information") Signed-off-by: Wanpeng Li Message-Id: <1619166200-9215-1-git-send-email-wanpengli@tencent.com> Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index eca63625aee4..ee0dc58ac3a5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4025,7 +4025,6 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) { struct kvm_host_map map; struct kvm_steal_time *st; - int idx; if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) return; @@ -4033,15 +4032,9 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) if (vcpu->arch.st.preempted) return; - /* - * Take the srcu lock as memslots will be accessed to check the gfn - * cache generation against the memslots generation. - */ - idx = srcu_read_lock(&vcpu->kvm->srcu); - if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map, &vcpu->arch.st.cache, true)) - goto out; + return; st = map.hva + offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS); @@ -4049,20 +4042,25 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) st->preempted = vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED; kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true); - -out: - srcu_read_unlock(&vcpu->kvm->srcu, idx); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + int idx; + if (vcpu->preempted && !vcpu->arch.guest_state_protected) vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu); + /* + * Take the srcu lock as memslots will be accessed to check the gfn + * cache generation against the memslots generation. + */ + idx = srcu_read_lock(&vcpu->kvm->srcu); if (kvm_xen_msr_enabled(vcpu->kvm)) kvm_xen_runstate_set_preempted(vcpu); else kvm_steal_time_set_preempted(vcpu); + srcu_read_unlock(&vcpu->kvm->srcu, idx); static_call(kvm_x86_vcpu_put)(vcpu); vcpu->arch.last_host_tsc = rdtsc(); From 9da29c7f77cd04e5c9150e30f047521b6f20a918 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 23 Apr 2021 14:28:51 -0700 Subject: [PATCH 176/182] coda: fix reference counting in coda_file_mmap error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mmap_region() now calls fput() on the vma->vm_file. So we need to drop the extra reference on the coda file instead of the host file. Link: https://lkml.kernel.org/r/20210421132012.82354-1-christian.koenig@amd.com Fixes: 1527f926fd04 ("mm: mmap: fix fput in error path v2") Signed-off-by: Christian König Reviewed-by: Daniel Vetter Acked-by: Jan Harkes Cc: Miklos Szeredi Cc: Jason Gunthorpe Cc: [5.11+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/coda/file.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/coda/file.c b/fs/coda/file.c index 128d63df5bfb..ef5ca22bfb3e 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -175,10 +175,10 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma) ret = call_mmap(vma->vm_file, vma); if (ret) { - /* if call_mmap fails, our caller will put coda_file so we - * should drop the reference to the host_file that we got. + /* if call_mmap fails, our caller will put host_file so we + * should drop the reference to the coda_file that we got. */ - fput(host_file); + fput(coda_file); kfree(cvm_ops); } else { /* here we add redirects for the open/close vm_operations */ From 2896900e22f8212606a1837d89a6bbce314ceeda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 23 Apr 2021 14:28:54 -0700 Subject: [PATCH 177/182] ovl: fix reference counting in ovl_mmap error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mmap_region() now calls fput() on the vma->vm_file. Fix this by using vma_set_file() so it doesn't need to be handled manually here any more. Link: https://lkml.kernel.org/r/20210421132012.82354-2-christian.koenig@amd.com Fixes: 1527f926fd04 ("mm: mmap: fix fput in error path v2") Signed-off-by: Christian König Reviewed-by: Daniel Vetter Cc: Jan Harkes Cc: Miklos Szeredi Cc: Jason Gunthorpe Cc: [5.11+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/overlayfs/file.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index dbfb35fb0ff7..3847cdc069b5 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -430,20 +430,11 @@ static int ovl_mmap(struct file *file, struct vm_area_struct *vma) if (WARN_ON(file != vma->vm_file)) return -EIO; - vma->vm_file = get_file(realfile); + vma_set_file(vma, realfile); old_cred = ovl_override_creds(file_inode(file)->i_sb); ret = call_mmap(vma->vm_file, vma); revert_creds(old_cred); - - if (ret) { - /* Drop reference count from new vm_file value */ - fput(realfile); - } else { - /* Drop reference count from previous vm_file value */ - fput(file); - } - ovl_file_accessed(file); return ret; From 2d11e738151d6cd321dd944cefe9c941ea00086c Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 23 Apr 2021 14:28:57 -0700 Subject: [PATCH 178/182] mm/filemap: fix find_lock_entries hang on 32-bit THP No problem on 64-bit, or without huge pages, but xfstests generic/308 hung uninterruptibly on 32-bit huge tmpfs. Since commit 0cc3b0ec23ce ("Clarify (and fix) in 4.13 MAX_LFS_FILESIZE macros"), MAX_LFS_FILESIZE is only a PAGE_SIZE away from wrapping 32-bit xa_index to 0, so the new find_lock_entries() has to be extra careful when handling a THP. Link: https://lkml.kernel.org/r/alpine.LSU.2.11.2104211735430.3299@eggly.anvils Fixes: 5c211ba29deb ("mm: add and use find_lock_entries") Signed-off-by: Hugh Dickins Cc: Matthew Wilcox Cc: William Kucharski Cc: Christoph Hellwig Cc: Jan Kara Cc: Dave Chinner Cc: Johannes Weiner Cc: "Kirill A. Shutemov" Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/filemap.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 43700480d897..bcf64e92ffb0 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1969,8 +1969,14 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t start, put: put_page(page); next: - if (!xa_is_value(page) && PageTransHuge(page)) - xas_set(&xas, page->index + thp_nr_pages(page)); + if (!xa_is_value(page) && PageTransHuge(page)) { + unsigned int nr_pages = thp_nr_pages(page); + + /* Final THP may cross MAX_LFS_FILESIZE on 32-bit */ + xas_set(&xas, page->index + nr_pages); + if (xas.xa_index < nr_pages) + break; + } } rcu_read_unlock(); From ed98b0159fa92a22a2838bd92522b8c8d964556b Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 23 Apr 2021 14:29:00 -0700 Subject: [PATCH 179/182] mm/filemap: fix mapping_seek_hole_data on THP & 32-bit No problem on 64-bit, or without huge pages, but xfstests generic/285 and other SEEK_HOLE/SEEK_DATA tests have regressed on huge tmpfs, and on 32-bit architectures, with the new mapping_seek_hole_data(). Several different bugs turned out to need fixing. u64 cast to stop losing bits when converting unsigned long to loff_t (and let's use shifts throughout, rather than mixed with * and /). Use round_up() when advancing pos, to stop assuming that pos was already THP-aligned when advancing it by THP-size. (This use of round_up() assumes that any THP has THP-aligned index: true at present and true going forward, but could be recoded to avoid the assumption.) Use xas_set() when iterating away from a THP, so that xa_index stays in synch with start, instead of drifting away to return bogus offset. Check start against end to avoid wrapping 32-bit xa_index to 0 (and to handle these additional cases, seek_data or not, it's easier to break the loop than goto: so rearrange exit from the function). [hughd@google.com: remove unneeded u64 casts, per Matthew] Link: https://lkml.kernel.org/r/alpine.LSU.2.11.2104221347240.1170@eggly.anvils Link: https://lkml.kernel.org/r/alpine.LSU.2.11.2104211737410.3299@eggly.anvils Fixes: 41139aa4c3a3 ("mm/filemap: add mapping_seek_hole_data") Signed-off-by: Hugh Dickins Cc: Christoph Hellwig Cc: Dave Chinner Cc: Jan Kara Cc: Johannes Weiner Cc: "Kirill A. Shutemov" Cc: Matthew Wilcox Cc: William Kucharski Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/filemap.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index bcf64e92ffb0..6ce832dc59e7 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2678,7 +2678,7 @@ loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start, loff_t end, int whence) { XA_STATE(xas, &mapping->i_pages, start >> PAGE_SHIFT); - pgoff_t max = (end - 1) / PAGE_SIZE; + pgoff_t max = (end - 1) >> PAGE_SHIFT; bool seek_data = (whence == SEEK_DATA); struct page *page; @@ -2687,7 +2687,8 @@ loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start, rcu_read_lock(); while ((page = find_get_entry(&xas, max, XA_PRESENT))) { - loff_t pos = xas.xa_index * PAGE_SIZE; + loff_t pos = (u64)xas.xa_index << PAGE_SHIFT; + unsigned int seek_size; if (start < pos) { if (!seek_data) @@ -2695,25 +2696,25 @@ loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start, start = pos; } - pos += seek_page_size(&xas, page); + seek_size = seek_page_size(&xas, page); + pos = round_up(pos + 1, seek_size); start = page_seek_hole_data(&xas, mapping, page, start, pos, seek_data); if (start < pos) goto unlock; + if (start >= end) + break; + if (seek_size > PAGE_SIZE) + xas_set(&xas, pos >> PAGE_SHIFT); if (!xa_is_value(page)) put_page(page); } - rcu_read_unlock(); - if (seek_data) - return -ENXIO; - goto out; - + start = -ENXIO; unlock: rcu_read_unlock(); - if (!xa_is_value(page)) + if (page && !xa_is_value(page)) put_page(page); -out: if (start > end) return end; return start; From 1974c45dd7745e999b9387be3d8fdcb27a5b1721 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 23 Apr 2021 14:29:03 -0700 Subject: [PATCH 180/182] tools/cgroup/slabinfo.py: updated to work on current kernel slabinfo.py script does not work with actual kernel version. First, it was unable to recognise SLUB susbsytem, and when I specified it manually it failed again with AttributeError: 'struct page' has no member 'obj_cgroups' .. and then again with File "tools/cgroup/memcg_slabinfo.py", line 221, in main memcg.kmem_caches.address_of_(), AttributeError: 'struct mem_cgroup' has no member 'kmem_caches' Link: https://lkml.kernel.org/r/cec1a75e-43b4-3d64-2084-d9f98fda037f@virtuozzo.com Signed-off-by: Vasily Averin Tested-by: Roman Gushchin Acked-by: Roman Gushchin Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/cgroup/memcg_slabinfo.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/cgroup/memcg_slabinfo.py b/tools/cgroup/memcg_slabinfo.py index c4225ed63565..1600b17dbb8a 100644 --- a/tools/cgroup/memcg_slabinfo.py +++ b/tools/cgroup/memcg_slabinfo.py @@ -128,9 +128,9 @@ def detect_kernel_config(): cfg['nr_nodes'] = prog['nr_online_nodes'].value_() - if prog.type('struct kmem_cache').members[1][1] == 'flags': + if prog.type('struct kmem_cache').members[1].name == 'flags': cfg['allocator'] = 'SLUB' - elif prog.type('struct kmem_cache').members[1][1] == 'batchcount': + elif prog.type('struct kmem_cache').members[1].name == 'batchcount': cfg['allocator'] = 'SLAB' else: err('Can\'t determine the slab allocator') @@ -193,7 +193,7 @@ def main(): # look over all slab pages, belonging to non-root memcgs # and look for objects belonging to the given memory cgroup for page in for_each_slab_page(prog): - objcg_vec_raw = page.obj_cgroups.value_() + objcg_vec_raw = page.memcg_data.value_() if objcg_vec_raw == 0: continue cache = page.slab_cache @@ -202,7 +202,7 @@ def main(): addr = cache.value_() caches[addr] = cache # clear the lowest bit to get the true obj_cgroups - objcg_vec = Object(prog, page.obj_cgroups.type_, + objcg_vec = Object(prog, 'struct obj_cgroup **', value=objcg_vec_raw & ~1) if addr not in stats: From 799bac5512188522213e2d7eb78ca7094dfdf30c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 24 Apr 2021 09:32:35 -0700 Subject: [PATCH 181/182] Revert "net/rds: Avoid potential use after free in rds_send_remove_from_sock" This reverts commit 0c85a7e87465f2d4cbc768e245f4f45b2f299b05. The games with 'rm' are on (two separate instances) of a local variable, and make no difference. Quoting Aditya Pakki: "I was the author of the patch and it was the cause of the giant UMN revert. The patch is garbage and I was unaware of the steps involved in retracting it. I *believed* the maintainers would pull it, given it was already under Greg's list. The patch does not introduce any bugs but is pointless and is stupid. I accept my incompetence and for not requesting a revert earlier." Link: https://lwn.net/Articles/854319/ Requested-by: Aditya Pakki Cc: Santosh Shilimkar Cc: David S. Miller Cc: Al Viro Signed-off-by: Linus Torvalds --- net/rds/message.c | 1 - net/rds/send.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/net/rds/message.c b/net/rds/message.c index 4fc66ff0f1ec..799034e0f513 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -180,7 +180,6 @@ void rds_message_put(struct rds_message *rm) rds_message_purge(rm); kfree(rm); - rm = NULL; } } EXPORT_SYMBOL_GPL(rds_message_put); diff --git a/net/rds/send.c b/net/rds/send.c index fe5264b9d4b3..985d0b7713ac 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -665,7 +665,7 @@ static void rds_send_remove_from_sock(struct list_head *messages, int status) unlock_and_drop: spin_unlock_irqrestore(&rm->m_rs_lock, flags); rds_message_put(rm); - if (was_on_sock && rm) + if (was_on_sock) rds_message_put(rm); } From 9f4ad9e425a1d3b6a34617b8ea226d56a119a717 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 25 Apr 2021 13:49:08 -0700 Subject: [PATCH 182/182] Linux 5.12 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index bc19584fee59..3a10a8e08b6d 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 12 SUBLEVEL = 0 -EXTRAVERSION = -rc8 +EXTRAVERSION = NAME = Frozen Wasteland # *DOCUMENTATION*